r:multiple_regression
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
r:multiple_regression [2020/08/31 12:57] – [e.g. 5] hkimscil | r:multiple_regression [2023/10/19 08:23] (current) – hkimscil | ||
---|---|---|---|
Line 1: | Line 1: | ||
====== Multiple Regression ====== | ====== Multiple Regression ====== | ||
{{: | {{: | ||
- | < | + | University of New Mexico enrollment data (for 30 years) |
+ | ROLL: # of enrollment | ||
+ | UNEM: enemployment level | ||
+ | HGRAD: # of High school graduates | ||
+ | INC: income level | ||
+ | < | ||
+ | # data import | ||
+ | > datavar <- read.csv(" | ||
> str(datavar) | > str(datavar) | ||
' | ' | ||
Line 11: | Line 18: | ||
> | > | ||
</ | </ | ||
- | |||
< | < | ||
- | onePredictorModel <- lm(ROLL ~ UNEM, data = datavar) | + | two.predictor.model |
- | twoPredictorModel | + | summary(two.predictor.model) |
- | threePredictorModel <- lm(ROLL ~ UNEM + HGRAD + INC, data = datavar) | + | two.predictor.model |
</ | </ | ||
- | < | + | < |
- | summary(twoPredictorModel) | + | three.predictor.model <- lm(ROLL ~ UNEM + HGRAD + INC, datavar) |
- | summary(threePredictorModel) | + | summary(three.predictor.model) |
+ | three.predictor.model | ||
</ | </ | ||
- | < | + | < |
- | + | > two.predictor.model <- lm(ROLL ~ UNEM + HGRAD, datavar) | |
- | Call: | + | > summary(two.predictor.model) |
- | lm(formula = ROLL ~ UNEM, data = datavar) | + | |
- | + | ||
- | Residuals: | + | |
- | Min 1Q Median | + | |
- | -7640.0 -1046.5 | + | |
- | + | ||
- | Coefficients: | + | |
- | Estimate Std. Error t value Pr(>|t|) | + | |
- | (Intercept) | + | |
- | UNEM 1133.8 | + | |
- | --- | + | |
- | Signif. codes: | + | |
- | + | ||
- | Residual standard error: 3049 on 27 degrees of freedom | + | |
- | Multiple R-squared: | + | |
- | F-statistic: | + | |
- | </ | + | |
- | + | ||
- | < | + | |
Call: | Call: | ||
Line 65: | Line 53: | ||
F-statistic: | F-statistic: | ||
- | > </ | + | > two.predictor.model |
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM + HGRAD, data = datavar) | ||
+ | |||
+ | Coefficients: | ||
+ | (Intercept) | ||
+ | | ||
+ | |||
+ | > | ||
+ | </ | ||
< | < | ||
- | > summary(threePredictorModel) | + | > three.predictor.model <- lm(ROLL ~ UNEM + HGRAD + INC, datavar) |
+ | > summary(three.predictor.model) | ||
Call: | Call: | ||
Line 89: | Line 89: | ||
F-statistic: | F-statistic: | ||
+ | > three.predictor.model | ||
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
+ | |||
+ | Coefficients: | ||
+ | (Intercept) | ||
+ | | ||
+ | |||
+ | > | ||
</ | </ | ||
- | < | + | 만약에 |
- | Analysis | + | * unemployment rate (UNEM) = 9%, 12%, 3% |
+ | * spring high school graduating class (HGRAD) = 100000, 98000, 78000 | ||
+ | * a per capita income (INC) of \$30000, \$28000, \$36000 | ||
+ | * 일 때, enrollment는 어떻게 predict할 수 있을까? | ||
- | Model 1: ROLL ~ UNEM | + | 위에서 얻은 prediction model은 아래와 같다. |
- | Model 2: ROLL ~ UNEM + HGRAD | + | $$ \hat{Y} = -9153.2545 |
- | Model 3: ROLL ~ UNEM + HGRAD + INC | + | 여기에 위의 정보를 대입해 보면 된다. |
- | Res.Df RSS Df Sum of Sq F Pr(> | + | |
- | 1 27 251084710 | + | <code> |
- | 2 | + | new.data <- data.frame(UNEM=c(9, |
- | 3 25 11237313 | + | predict(three.predictor.model, newdata=new.data) |
- | --- | + | </ |
- | Signif. codes: | + | |
+ | < | ||
+ | > new.data <- data.frame(UNEM=c(9, | ||
+ | > predict(three.predictor.model, newdata=new.data) | ||
+ | | ||
+ | 163792.0 154879.4 110526.6 | ||
> | > | ||
+ | </ | ||
+ | \begin{align*} | ||
+ | \hat{Y} & = -9153.2545 + 450.1245 \cdot \text{UNEM} + 0.4065 \cdot \text{HGRAD} + 4.2749 \cdot \text{INC} | ||
+ | 163792.0 & = -9153.2545 + 450.1245 \cdot (9) + 0.4065 \cdot (100000) + 4.2749 \cdot (30000) \\ | ||
+ | 154879.4 & = -9153.2545 + 450.1245 \cdot (10) + 0.4065 \cdot (98000) + 4.2749 \cdot (28000) \\ | ||
+ | 110526.6 & = -9153.2545 + 450.1245 \cdot (15) + 0.4065 \cdot (78000) + 4.2749 \cdot (19000) \\ | ||
+ | |||
+ | \end{align*} | ||
+ | |||
+ | beta coefficient 살펴보기 | ||
+ | see [[:beta coefficients]] | ||
+ | < | ||
+ | # install.packages(' | ||
+ | # library(lm.beta) | ||
+ | lm.beta(three.predictor.model) | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > # install.packages(' | ||
+ | > # library(lm.beta) | ||
+ | > lm.beta(three.predictor.model) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
+ | |||
+ | Standardized Coefficients:: | ||
+ | (Intercept) | ||
+ | 0.0000000 | ||
+ | |||
+ | > | ||
+ | </ | ||
+ | by hand | ||
+ | < | ||
+ | # coefficient * (sd(x)/ | ||
+ | # | ||
+ | attach(datavar) | ||
+ | sd.roll <- sd(ROLL) | ||
+ | sd.unem <- sd(UNEM) | ||
+ | sd.hgrad <- sd(HGRAD) | ||
+ | sd.inc <- sd(INC) | ||
+ | |||
+ | b.unem <- three.predictor.model$coefficients[2] | ||
+ | b.hgrad <- three.predictor.model$coefficients[3] | ||
+ | b.inc <- three.predictor.model$coefficients[4] | ||
+ | |||
+ | ## or | ||
+ | b.unem <- 4.501e+02 | ||
+ | b.hgrad <- 4.065e-01 | ||
+ | b.inc <- 4.275e+00 | ||
+ | |||
+ | |||
+ | b.unem * (sd.unem / sd.roll) | ||
+ | b.hgrad * (sd.hgrad / sd.roll) | ||
+ | b.inc * (sd.inc / sd.roll) | ||
+ | |||
+ | lm.beta(three.predictor.model) | ||
+ | |||
+ | </ | ||
+ | output of the above | ||
+ | < | ||
+ | > sd.roll <- sd(ROLL) | ||
+ | > sd.unem <- sd(UNEM) | ||
+ | > sd.hgrad <- sd(HGRAD) | ||
+ | > sd.inc <- sd(INC) | ||
+ | > | ||
+ | > b.unem <- three.predictor.model$coefficients[2] | ||
+ | > b.hgrad <- three.predictor.model$coefficients[3] | ||
+ | > b.inc <- three.predictor.model$coefficients[4] | ||
+ | > | ||
+ | > ## or | ||
+ | > b.unem <- 4.501e+02 | ||
+ | > b.hgrad <- 4.065e-01 | ||
+ | > b.inc <- 4.275e+00 | ||
+ | > | ||
+ | > | ||
+ | > b.unem * (sd.unem / sd.roll) | ||
+ | [1] 0.1554 | ||
+ | > b.hgrad * (sd.hgrad / sd.roll) | ||
+ | [1] 0.3656 | ||
+ | > b.inc * (sd.inc / sd.roll) | ||
+ | [1] 0.6062 | ||
+ | > | ||
+ | > lm.beta(three.predictor.model) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
+ | |||
+ | Standardized Coefficients:: | ||
+ | (Intercept) | ||
+ | | ||
+ | |||
+ | > | ||
+ | </ | ||
+ | |||
+ | see also [[: | ||
+ | see also [[: | ||
+ | |||
+ | < | ||
+ | > fit <- three.predictor.model | ||
+ | > step <- stepAIC(fit, | ||
+ | Start: | ||
+ | ROLL ~ UNEM + HGRAD + INC | ||
+ | |||
+ | Df Sum of Sq RSS AIC | ||
+ | < | ||
+ | - UNEM | ||
+ | - HGRAD 1 12852039 24089352 401 | ||
+ | - INC 1 33568255 44805568 419 | ||
+ | > | ||
+ | |||
</ | </ | ||
====== Housing ====== | ====== Housing ====== | ||
Line 111: | Line 239: | ||
====== etc ====== | ====== etc ====== | ||
+ | {{: | ||
< | < | ||
+ | marketing <- read.csv(" | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | # install.packages(" | ||
library(tidyverse) | library(tidyverse) | ||
data(" | data(" | ||
Line 119: | Line 253: | ||
* Note that to list all the independent (explanatory) variables, you could use '' | * Note that to list all the independent (explanatory) variables, you could use '' | ||
* You could also use '' | * You could also use '' | ||
+ | |||
| | ||
< | < | ||
Line 345: | Line 480: | ||
| interest | | interest | ||
| unemp | 1 (b) | 22394 (2) | 22394 | 4.497690299 | | unemp | 1 (b) | 22394 (2) | 22394 | 4.497690299 | ||
- | | res | 21 (c) | 104559 (3) | 4979 | | | + | | res | 21 %%(%%c%%)%% | 104559 (3) | 4979 | | |
| total | 23 | 1021416 (4) | | | total | 23 | 1021416 (4) | | ||
- | | interst | + | | interest |
(4) = (1) + (2) + (3) | (4) = (1) + (2) + (3) |
r/multiple_regression.1598846266.txt.gz · Last modified: 2020/08/31 12:57 by hkimscil