r:multiple_regression
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
r:multiple_regression [2019/11/08 10:59] – [Prediction] hkimscil | r:multiple_regression [2023/10/19 08:23] (current) – hkimscil | ||
---|---|---|---|
Line 1: | Line 1: | ||
====== Multiple Regression ====== | ====== Multiple Regression ====== | ||
{{: | {{: | ||
- | < | + | University of New Mexico enrollment data (for 30 years) |
+ | ROLL: # of enrollment | ||
+ | UNEM: enemployment level | ||
+ | HGRAD: # of High school graduates | ||
+ | INC: income level | ||
+ | < | ||
+ | # data import | ||
+ | > datavar <- read.csv(" | ||
> str(datavar) | > str(datavar) | ||
' | ' | ||
Line 11: | Line 18: | ||
> | > | ||
</ | </ | ||
- | |||
< | < | ||
- | onePredictorModel <- lm(ROLL ~ UNEM, data = datavar) | + | two.predictor.model |
- | twoPredictorModel | + | summary(two.predictor.model) |
- | threePredictorModel <- lm(ROLL ~ UNEM + HGRAD + INC, data = datavar) | + | two.predictor.model |
</ | </ | ||
- | < | + | < |
- | summary(twoPredictorModel) | + | three.predictor.model <- lm(ROLL ~ UNEM + HGRAD + INC, datavar) |
- | summary(threePredictorModel) | + | summary(three.predictor.model) |
+ | three.predictor.model | ||
</ | </ | ||
- | < | + | < |
- | + | > two.predictor.model <- lm(ROLL ~ UNEM + HGRAD, datavar) | |
- | Call: | + | > summary(two.predictor.model) |
- | lm(formula = ROLL ~ UNEM, data = datavar) | + | |
- | + | ||
- | Residuals: | + | |
- | Min 1Q Median | + | |
- | -7640.0 -1046.5 | + | |
- | + | ||
- | Coefficients: | + | |
- | Estimate Std. Error t value Pr(>|t|) | + | |
- | (Intercept) | + | |
- | UNEM 1133.8 | + | |
- | --- | + | |
- | Signif. codes: | + | |
- | + | ||
- | Residual standard error: 3049 on 27 degrees of freedom | + | |
- | Multiple R-squared: | + | |
- | F-statistic: | + | |
- | </ | + | |
- | + | ||
- | < | + | |
Call: | Call: | ||
Line 65: | Line 53: | ||
F-statistic: | F-statistic: | ||
- | > </ | + | > two.predictor.model |
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM + HGRAD, data = datavar) | ||
+ | |||
+ | Coefficients: | ||
+ | (Intercept) | ||
+ | | ||
+ | |||
+ | > | ||
+ | </ | ||
< | < | ||
- | > summary(threePredictorModel) | + | > three.predictor.model <- lm(ROLL ~ UNEM + HGRAD + INC, datavar) |
+ | > summary(three.predictor.model) | ||
Call: | Call: | ||
Line 89: | Line 89: | ||
F-statistic: | F-statistic: | ||
+ | > three.predictor.model | ||
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
+ | |||
+ | Coefficients: | ||
+ | (Intercept) | ||
+ | | ||
+ | |||
+ | > | ||
</ | </ | ||
- | < | + | 만약에 |
- | Analysis | + | * unemployment rate (UNEM) = 9%, 12%, 3% |
+ | * spring high school graduating class (HGRAD) = 100000, 98000, 78000 | ||
+ | * a per capita income (INC) of \$30000, \$28000, \$36000 | ||
+ | * 일 때, enrollment는 어떻게 predict할 수 있을까? | ||
- | Model 1: ROLL ~ UNEM | + | 위에서 얻은 prediction model은 아래와 같다. |
- | Model 2: ROLL ~ UNEM + HGRAD | + | $$ \hat{Y} = -9153.2545 |
- | Model 3: ROLL ~ UNEM + HGRAD + INC | + | 여기에 위의 정보를 대입해 보면 된다. |
- | Res.Df RSS Df Sum of Sq F Pr(> | + | |
- | 1 27 251084710 | + | <code> |
- | 2 | + | new.data <- data.frame(UNEM=c(9, |
- | 3 25 11237313 | + | predict(three.predictor.model, newdata=new.data) |
- | --- | + | </ |
- | Signif. codes: | + | |
+ | < | ||
+ | > new.data <- data.frame(UNEM=c(9, | ||
+ | > predict(three.predictor.model, newdata=new.data) | ||
+ | | ||
+ | 163792.0 154879.4 110526.6 | ||
> | > | ||
+ | </ | ||
+ | \begin{align*} | ||
+ | \hat{Y} & = -9153.2545 + 450.1245 \cdot \text{UNEM} + 0.4065 \cdot \text{HGRAD} + 4.2749 \cdot \text{INC} | ||
+ | 163792.0 & = -9153.2545 + 450.1245 \cdot (9) + 0.4065 \cdot (100000) + 4.2749 \cdot (30000) \\ | ||
+ | 154879.4 & = -9153.2545 + 450.1245 \cdot (10) + 0.4065 \cdot (98000) + 4.2749 \cdot (28000) \\ | ||
+ | 110526.6 & = -9153.2545 + 450.1245 \cdot (15) + 0.4065 \cdot (78000) + 4.2749 \cdot (19000) \\ | ||
+ | |||
+ | \end{align*} | ||
+ | |||
+ | beta coefficient 살펴보기 | ||
+ | see [[:beta coefficients]] | ||
+ | < | ||
+ | # install.packages(' | ||
+ | # library(lm.beta) | ||
+ | lm.beta(three.predictor.model) | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > # install.packages(' | ||
+ | > # library(lm.beta) | ||
+ | > lm.beta(three.predictor.model) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
+ | |||
+ | Standardized Coefficients:: | ||
+ | (Intercept) | ||
+ | 0.0000000 | ||
+ | |||
+ | > | ||
+ | </ | ||
+ | by hand | ||
+ | < | ||
+ | # coefficient * (sd(x)/ | ||
+ | # | ||
+ | attach(datavar) | ||
+ | sd.roll <- sd(ROLL) | ||
+ | sd.unem <- sd(UNEM) | ||
+ | sd.hgrad <- sd(HGRAD) | ||
+ | sd.inc <- sd(INC) | ||
+ | |||
+ | b.unem <- three.predictor.model$coefficients[2] | ||
+ | b.hgrad <- three.predictor.model$coefficients[3] | ||
+ | b.inc <- three.predictor.model$coefficients[4] | ||
+ | |||
+ | ## or | ||
+ | b.unem <- 4.501e+02 | ||
+ | b.hgrad <- 4.065e-01 | ||
+ | b.inc <- 4.275e+00 | ||
+ | |||
+ | |||
+ | b.unem * (sd.unem / sd.roll) | ||
+ | b.hgrad * (sd.hgrad / sd.roll) | ||
+ | b.inc * (sd.inc / sd.roll) | ||
+ | |||
+ | lm.beta(three.predictor.model) | ||
+ | |||
+ | </ | ||
+ | output of the above | ||
+ | < | ||
+ | > sd.roll <- sd(ROLL) | ||
+ | > sd.unem <- sd(UNEM) | ||
+ | > sd.hgrad <- sd(HGRAD) | ||
+ | > sd.inc <- sd(INC) | ||
+ | > | ||
+ | > b.unem <- three.predictor.model$coefficients[2] | ||
+ | > b.hgrad <- three.predictor.model$coefficients[3] | ||
+ | > b.inc <- three.predictor.model$coefficients[4] | ||
+ | > | ||
+ | > ## or | ||
+ | > b.unem <- 4.501e+02 | ||
+ | > b.hgrad <- 4.065e-01 | ||
+ | > b.inc <- 4.275e+00 | ||
+ | > | ||
+ | > | ||
+ | > b.unem * (sd.unem / sd.roll) | ||
+ | [1] 0.1554 | ||
+ | > b.hgrad * (sd.hgrad / sd.roll) | ||
+ | [1] 0.3656 | ||
+ | > b.inc * (sd.inc / sd.roll) | ||
+ | [1] 0.6062 | ||
+ | > | ||
+ | > lm.beta(three.predictor.model) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
+ | |||
+ | Standardized Coefficients:: | ||
+ | (Intercept) | ||
+ | | ||
+ | |||
+ | > | ||
+ | </ | ||
+ | |||
+ | see also [[: | ||
+ | see also [[: | ||
+ | |||
+ | < | ||
+ | > fit <- three.predictor.model | ||
+ | > step <- stepAIC(fit, | ||
+ | Start: | ||
+ | ROLL ~ UNEM + HGRAD + INC | ||
+ | |||
+ | Df Sum of Sq RSS AIC | ||
+ | < | ||
+ | - UNEM | ||
+ | - HGRAD 1 12852039 24089352 401 | ||
+ | - INC 1 33568255 44805568 419 | ||
+ | > | ||
+ | |||
</ | </ | ||
====== Housing ====== | ====== Housing ====== | ||
Line 111: | Line 239: | ||
====== etc ====== | ====== etc ====== | ||
+ | {{: | ||
< | < | ||
+ | marketing <- read.csv(" | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | # install.packages(" | ||
library(tidyverse) | library(tidyverse) | ||
data(" | data(" | ||
Line 119: | Line 253: | ||
* Note that to list all the independent (explanatory) variables, you could use '' | * Note that to list all the independent (explanatory) variables, you could use '' | ||
* You could also use '' | * You could also use '' | ||
+ | |||
| | ||
< | < | ||
Line 345: | Line 480: | ||
| interest | | interest | ||
| unemp | 1 (b) | 22394 (2) | 22394 | 4.497690299 | | unemp | 1 (b) | 22394 (2) | 22394 | 4.497690299 | ||
- | | res | 21 (c) | 104559 (3) | 4979 | | | + | | res | 21 %%(%%c%%)%% | 104559 (3) | 4979 | | |
| total | 23 | 1021416 (4) | | | total | 23 | 1021416 (4) | | ||
- | | interst | + | | interest |
(4) = (1) + (2) + (3) | (4) = (1) + (2) + (3) | ||
Line 534: | Line 669: | ||
Signif. codes: | Signif. codes: | ||
> </ | > </ | ||
+ | ====== e.g. 5 ====== | ||
+ | http:// | ||
+ | |||
+ | < | ||
+ | #packages we will need to conduct to create and graph our data | ||
+ | library(MASS) #create data | ||
+ | library(car) #graph data | ||
+ | py1 =.6 #Cor between X1 (Practice Time) and Memory Errors | ||
+ | py2 =.4 #Cor between X2 (Performance Anxiety) and Memory Errors | ||
+ | p12= .3 #Cor between X1 (Practice Time) and X2 (Performance Anxiety) | ||
+ | Means.X1X2Y< | ||
+ | CovMatrix.X1X2Y <- matrix(c(1, | ||
+ | p12,1,py2, | ||
+ | py1, | ||
+ | #build the correlated variables. Note: empirical=TRUE means make the correlation EXACTLY r. | ||
+ | # if we say empirical=FALSE, | ||
+ | set.seed(42) | ||
+ | CorrDataT< | ||
+ | #Convert them to a " | ||
+ | CorrDataT< | ||
+ | colnames(CorrDataT) <- c(" | ||
+ | #make the scatter plots | ||
+ | scatterplot(Memory~Practice, | ||
+ | scatterplot(Memory~Anxiety, | ||
+ | scatterplot(Anxiety~Practice, | ||
+ | # Pearson Correlations | ||
+ | ry1< | ||
+ | ry2< | ||
+ | r12< | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | ry1 | ||
+ | ry2 | ||
+ | r12 | ||
+ | ry1^2 | ||
+ | ry2^2 | ||
+ | r12^2 | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > ry1 | ||
+ | [1] 0.6 | ||
+ | > ry2 | ||
+ | [1] 0.4 | ||
+ | > r12 | ||
+ | [1] 0.3 | ||
+ | > | ||
+ | > ry1^2 | ||
+ | [1] 0.36 | ||
+ | > ry2^2 | ||
+ | [1] 0.16 | ||
+ | > r12^2 | ||
+ | [1] 0.09 | ||
+ | > | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > lm.m.pa <- lm(Memory~Practice+Anxiety, | ||
+ | > summary(lm.m.pa) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = Memory ~ Practice + Anxiety, data = CorrDataT) | ||
+ | |||
+ | Residuals: | ||
+ | | ||
+ | -1.99998 -0.54360 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) | ||
+ | Practice | ||
+ | Anxiety | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 0.7739 on 97 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | </ | ||
+ | |||
+ | {{ http:// | ||
+ | |||
+ | a+b+c+e = variance of y = total variance = 100% = 1 이라고 보면 | ||
+ | r< | ||
+ | |||
+ | 이 중에서 우리는 이미 | ||
+ | a + c = 0.6< | ||
+ | b + c = 0.4< | ||
+ | |||
+ | 따라서 Practice를 제어한 Anxiety의 영향력은 | ||
+ | r< | ||
+ | 반대로, Anxiety를 제어한 Practice의 영향력은 | ||
+ | r< | ||
r/multiple_regression.1573178381.txt.gz · Last modified: 2019/11/08 10:59 by hkimscil