r:multiple_regression
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| r:multiple_regression [2019/11/08 10:59] – [Prediction] hkimscil | r:multiple_regression [2023/10/19 08:23] (current) – hkimscil | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| ====== Multiple Regression ====== | ====== Multiple Regression ====== | ||
| {{: | {{: | ||
| - | < | + | University of New Mexico enrollment data (for 30 years) |
| + | ROLL: # of enrollment | ||
| + | UNEM: enemployment level | ||
| + | HGRAD: # of High school graduates | ||
| + | INC: income level | ||
| + | < | ||
| + | # data import | ||
| + | > datavar <- read.csv(" | ||
| > str(datavar) | > str(datavar) | ||
| ' | ' | ||
| Line 11: | Line 18: | ||
| > | > | ||
| </ | </ | ||
| - | |||
| < | < | ||
| - | onePredictorModel <- lm(ROLL ~ UNEM, data = datavar) | + | two.predictor.model |
| - | twoPredictorModel | + | summary(two.predictor.model) |
| - | threePredictorModel <- lm(ROLL ~ UNEM + HGRAD + INC, data = datavar) | + | two.predictor.model |
| </ | </ | ||
| - | < | + | < |
| - | summary(twoPredictorModel) | + | three.predictor.model <- lm(ROLL ~ UNEM + HGRAD + INC, datavar) |
| - | summary(threePredictorModel) | + | summary(three.predictor.model) |
| + | three.predictor.model | ||
| </ | </ | ||
| - | < | + | < |
| - | + | > two.predictor.model <- lm(ROLL ~ UNEM + HGRAD, datavar) | |
| - | Call: | + | > summary(two.predictor.model) |
| - | lm(formula = ROLL ~ UNEM, data = datavar) | + | |
| - | + | ||
| - | Residuals: | + | |
| - | Min 1Q Median | + | |
| - | -7640.0 -1046.5 | + | |
| - | + | ||
| - | Coefficients: | + | |
| - | Estimate Std. Error t value Pr(>|t|) | + | |
| - | (Intercept) | + | |
| - | UNEM 1133.8 | + | |
| - | --- | + | |
| - | Signif. codes: | + | |
| - | + | ||
| - | Residual standard error: 3049 on 27 degrees of freedom | + | |
| - | Multiple R-squared: | + | |
| - | F-statistic: | + | |
| - | </ | + | |
| - | + | ||
| - | < | + | |
| Call: | Call: | ||
| Line 65: | Line 53: | ||
| F-statistic: | F-statistic: | ||
| - | > </ | + | > two.predictor.model |
| + | |||
| + | Call: | ||
| + | lm(formula = ROLL ~ UNEM + HGRAD, data = datavar) | ||
| + | |||
| + | Coefficients: | ||
| + | (Intercept) | ||
| + | | ||
| + | |||
| + | > | ||
| + | </ | ||
| < | < | ||
| - | > summary(threePredictorModel) | + | > three.predictor.model <- lm(ROLL ~ UNEM + HGRAD + INC, datavar) |
| + | > summary(three.predictor.model) | ||
| Call: | Call: | ||
| Line 89: | Line 89: | ||
| F-statistic: | F-statistic: | ||
| + | > three.predictor.model | ||
| + | |||
| + | Call: | ||
| + | lm(formula = ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
| + | |||
| + | Coefficients: | ||
| + | (Intercept) | ||
| + | | ||
| + | |||
| + | > | ||
| </ | </ | ||
| - | < | + | 만약에 |
| - | Analysis | + | * unemployment rate (UNEM) = 9%, 12%, 3% |
| + | * spring high school graduating class (HGRAD) = 100000, 98000, 78000 | ||
| + | * a per capita income (INC) of \$30000, \$28000, \$36000 | ||
| + | * 일 때, enrollment는 어떻게 predict할 수 있을까? | ||
| - | Model 1: ROLL ~ UNEM | + | 위에서 얻은 prediction model은 아래와 같다. |
| - | Model 2: ROLL ~ UNEM + HGRAD | + | $$ \hat{Y} = -9153.2545 |
| - | Model 3: ROLL ~ UNEM + HGRAD + INC | + | 여기에 위의 정보를 대입해 보면 된다. |
| - | Res.Df RSS Df Sum of Sq F Pr(> | + | |
| - | 1 27 251084710 | + | <code> |
| - | 2 | + | new.data <- data.frame(UNEM=c(9, |
| - | 3 25 11237313 | + | predict(three.predictor.model, newdata=new.data) |
| - | --- | + | </ |
| - | Signif. codes: | + | |
| + | < | ||
| + | > new.data <- data.frame(UNEM=c(9, | ||
| + | > predict(three.predictor.model, newdata=new.data) | ||
| + | | ||
| + | 163792.0 154879.4 110526.6 | ||
| > | > | ||
| + | </ | ||
| + | \begin{align*} | ||
| + | \hat{Y} & = -9153.2545 + 450.1245 \cdot \text{UNEM} + 0.4065 \cdot \text{HGRAD} + 4.2749 \cdot \text{INC} | ||
| + | 163792.0 & = -9153.2545 + 450.1245 \cdot (9) + 0.4065 \cdot (100000) + 4.2749 \cdot (30000) \\ | ||
| + | 154879.4 & = -9153.2545 + 450.1245 \cdot (10) + 0.4065 \cdot (98000) + 4.2749 \cdot (28000) \\ | ||
| + | 110526.6 & = -9153.2545 + 450.1245 \cdot (15) + 0.4065 \cdot (78000) + 4.2749 \cdot (19000) \\ | ||
| + | |||
| + | \end{align*} | ||
| + | |||
| + | beta coefficient 살펴보기 | ||
| + | see [[:beta coefficients]] | ||
| + | < | ||
| + | # install.packages(' | ||
| + | # library(lm.beta) | ||
| + | lm.beta(three.predictor.model) | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | > # install.packages(' | ||
| + | > # library(lm.beta) | ||
| + | > lm.beta(three.predictor.model) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
| + | |||
| + | Standardized Coefficients:: | ||
| + | (Intercept) | ||
| + | 0.0000000 | ||
| + | |||
| + | > | ||
| + | </ | ||
| + | by hand | ||
| + | < | ||
| + | # coefficient * (sd(x)/ | ||
| + | # | ||
| + | attach(datavar) | ||
| + | sd.roll <- sd(ROLL) | ||
| + | sd.unem <- sd(UNEM) | ||
| + | sd.hgrad <- sd(HGRAD) | ||
| + | sd.inc <- sd(INC) | ||
| + | |||
| + | b.unem <- three.predictor.model$coefficients[2] | ||
| + | b.hgrad <- three.predictor.model$coefficients[3] | ||
| + | b.inc <- three.predictor.model$coefficients[4] | ||
| + | |||
| + | ## or | ||
| + | b.unem <- 4.501e+02 | ||
| + | b.hgrad <- 4.065e-01 | ||
| + | b.inc <- 4.275e+00 | ||
| + | |||
| + | |||
| + | b.unem * (sd.unem / sd.roll) | ||
| + | b.hgrad * (sd.hgrad / sd.roll) | ||
| + | b.inc * (sd.inc / sd.roll) | ||
| + | |||
| + | lm.beta(three.predictor.model) | ||
| + | |||
| + | </ | ||
| + | output of the above | ||
| + | < | ||
| + | > sd.roll <- sd(ROLL) | ||
| + | > sd.unem <- sd(UNEM) | ||
| + | > sd.hgrad <- sd(HGRAD) | ||
| + | > sd.inc <- sd(INC) | ||
| + | > | ||
| + | > b.unem <- three.predictor.model$coefficients[2] | ||
| + | > b.hgrad <- three.predictor.model$coefficients[3] | ||
| + | > b.inc <- three.predictor.model$coefficients[4] | ||
| + | > | ||
| + | > ## or | ||
| + | > b.unem <- 4.501e+02 | ||
| + | > b.hgrad <- 4.065e-01 | ||
| + | > b.inc <- 4.275e+00 | ||
| + | > | ||
| + | > | ||
| + | > b.unem * (sd.unem / sd.roll) | ||
| + | [1] 0.1554 | ||
| + | > b.hgrad * (sd.hgrad / sd.roll) | ||
| + | [1] 0.3656 | ||
| + | > b.inc * (sd.inc / sd.roll) | ||
| + | [1] 0.6062 | ||
| + | > | ||
| + | > lm.beta(three.predictor.model) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
| + | |||
| + | Standardized Coefficients:: | ||
| + | (Intercept) | ||
| + | | ||
| + | |||
| + | > | ||
| + | </ | ||
| + | |||
| + | see also [[: | ||
| + | see also [[: | ||
| + | |||
| + | < | ||
| + | > fit <- three.predictor.model | ||
| + | > step <- stepAIC(fit, | ||
| + | Start: | ||
| + | ROLL ~ UNEM + HGRAD + INC | ||
| + | |||
| + | Df Sum of Sq RSS AIC | ||
| + | < | ||
| + | - UNEM | ||
| + | - HGRAD 1 12852039 24089352 401 | ||
| + | - INC 1 33568255 44805568 419 | ||
| + | > | ||
| + | |||
| </ | </ | ||
| ====== Housing ====== | ====== Housing ====== | ||
| Line 111: | Line 239: | ||
| ====== etc ====== | ====== etc ====== | ||
| + | {{: | ||
| < | < | ||
| + | marketing <- read.csv(" | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | # install.packages(" | ||
| library(tidyverse) | library(tidyverse) | ||
| data(" | data(" | ||
| Line 119: | Line 253: | ||
| * Note that to list all the independent (explanatory) variables, you could use '' | * Note that to list all the independent (explanatory) variables, you could use '' | ||
| * You could also use '' | * You could also use '' | ||
| + | |||
| | | ||
| < | < | ||
| Line 345: | Line 480: | ||
| | interest | | interest | ||
| | unemp | 1 (b) | 22394 (2) | 22394 | 4.497690299 | | unemp | 1 (b) | 22394 (2) | 22394 | 4.497690299 | ||
| - | | res | 21 (c) | 104559 (3) | 4979 | | | + | | res | 21 %%(%%c%%)%% | 104559 (3) | 4979 | | |
| | total | 23 | 1021416 (4) | | | total | 23 | 1021416 (4) | | ||
| - | | interst | + | | interest |
| (4) = (1) + (2) + (3) | (4) = (1) + (2) + (3) | ||
| Line 534: | Line 669: | ||
| Signif. codes: | Signif. codes: | ||
| > </ | > </ | ||
| + | ====== e.g. 5 ====== | ||
| + | http:// | ||
| + | |||
| + | < | ||
| + | #packages we will need to conduct to create and graph our data | ||
| + | library(MASS) #create data | ||
| + | library(car) #graph data | ||
| + | py1 =.6 #Cor between X1 (Practice Time) and Memory Errors | ||
| + | py2 =.4 #Cor between X2 (Performance Anxiety) and Memory Errors | ||
| + | p12= .3 #Cor between X1 (Practice Time) and X2 (Performance Anxiety) | ||
| + | Means.X1X2Y< | ||
| + | CovMatrix.X1X2Y <- matrix(c(1, | ||
| + | p12,1,py2, | ||
| + | py1, | ||
| + | #build the correlated variables. Note: empirical=TRUE means make the correlation EXACTLY r. | ||
| + | # if we say empirical=FALSE, | ||
| + | set.seed(42) | ||
| + | CorrDataT< | ||
| + | #Convert them to a " | ||
| + | CorrDataT< | ||
| + | colnames(CorrDataT) <- c(" | ||
| + | #make the scatter plots | ||
| + | scatterplot(Memory~Practice, | ||
| + | scatterplot(Memory~Anxiety, | ||
| + | scatterplot(Anxiety~Practice, | ||
| + | # Pearson Correlations | ||
| + | ry1< | ||
| + | ry2< | ||
| + | r12< | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | ry1 | ||
| + | ry2 | ||
| + | r12 | ||
| + | ry1^2 | ||
| + | ry2^2 | ||
| + | r12^2 | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | > ry1 | ||
| + | [1] 0.6 | ||
| + | > ry2 | ||
| + | [1] 0.4 | ||
| + | > r12 | ||
| + | [1] 0.3 | ||
| + | > | ||
| + | > ry1^2 | ||
| + | [1] 0.36 | ||
| + | > ry2^2 | ||
| + | [1] 0.16 | ||
| + | > r12^2 | ||
| + | [1] 0.09 | ||
| + | > | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | > lm.m.pa <- lm(Memory~Practice+Anxiety, | ||
| + | > summary(lm.m.pa) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = Memory ~ Practice + Anxiety, data = CorrDataT) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -1.99998 -0.54360 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | Practice | ||
| + | Anxiety | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 0.7739 on 97 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | </ | ||
| + | |||
| + | {{ http:// | ||
| + | |||
| + | a+b+c+e = variance of y = total variance = 100% = 1 이라고 보면 | ||
| + | r< | ||
| + | |||
| + | 이 중에서 우리는 이미 | ||
| + | a + c = 0.6< | ||
| + | b + c = 0.4< | ||
| + | |||
| + | 따라서 Practice를 제어한 Anxiety의 영향력은 | ||
| + | r< | ||
| + | 반대로, Anxiety를 제어한 Practice의 영향력은 | ||
| + | r< | ||
r/multiple_regression.1573178381.txt.gz · Last modified: by hkimscil
