r:multiple_regression
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
r:multiple_regression [2020/12/01 14:16] – [Multiple Regression] hkimscil | r:multiple_regression [2023/10/19 08:23] (current) – hkimscil | ||
---|---|---|---|
Line 1: | Line 1: | ||
====== Multiple Regression ====== | ====== Multiple Regression ====== | ||
{{: | {{: | ||
+ | University of New Mexico enrollment data (for 30 years) | ||
+ | ROLL: # of enrollment | ||
+ | UNEM: enemployment level | ||
+ | HGRAD: # of High school graduates | ||
+ | INC: income level | ||
+ | < | ||
+ | # data import | ||
+ | > datavar <- read.csv(" | ||
+ | > str(datavar) | ||
+ | ' | ||
+ | $ YEAR : int 1 2 3 4 5 6 7 8 9 10 ... | ||
+ | $ ROLL : int 5501 5945 6629 7556 8716 9369 9920 10167 11084 12504 ... | ||
+ | $ UNEM : num 8.1 7 7.3 7.5 7 6.4 6.5 6.4 6.3 7.7 ... | ||
+ | $ HGRAD: int 9552 9680 9731 11666 14675 15265 15484 15723 16501 16890 ... | ||
+ | $ INC : int 1923 1961 1979 2030 2112 2192 2235 2351 2411 2475 ... | ||
+ | > | ||
+ | </ | ||
+ | < | ||
+ | two.predictor.model <- lm(ROLL ~ UNEM + HGRAD, datavar) | ||
+ | summary(two.predictor.model) | ||
+ | two.predictor.model | ||
+ | </ | ||
- | [[: | + | < |
+ | three.predictor.model <- lm(ROLL ~ UNEM + HGRAD + INC, datavar) | ||
+ | summary(three.predictor.model) | ||
+ | three.predictor.model | ||
+ | </ | ||
+ | < | ||
+ | > two.predictor.model <- lm(ROLL ~ UNEM + HGRAD, datavar) | ||
+ | > summary(two.predictor.model) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM + HGRAD, data = datavar) | ||
+ | |||
+ | Residuals: | ||
+ | Min 1Q Median | ||
+ | -2102.2 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) -8.256e+03 | ||
+ | UNEM | ||
+ | HGRAD 9.423e-01 | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 1313 on 26 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | > two.predictor.model | ||
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM + HGRAD, data = datavar) | ||
+ | |||
+ | Coefficients: | ||
+ | (Intercept) | ||
+ | | ||
+ | |||
+ | > | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > three.predictor.model <- lm(ROLL ~ UNEM + HGRAD + INC, datavar) | ||
+ | > summary(three.predictor.model) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
+ | |||
+ | Residuals: | ||
+ | | ||
+ | -1148.84 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) -9.153e+03 | ||
+ | UNEM | ||
+ | HGRAD 4.065e-01 | ||
+ | INC 4.275e+00 | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 670.4 on 25 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | > three.predictor.model | ||
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
+ | |||
+ | Coefficients: | ||
+ | (Intercept) | ||
+ | | ||
+ | |||
+ | > | ||
+ | </ | ||
+ | |||
+ | 만약에 | ||
+ | * unemployment rate (UNEM) = 9%, 12%, 3% | ||
+ | * spring high school graduating class (HGRAD) = 100000, 98000, 78000 | ||
+ | * a per capita income (INC) of \$30000, \$28000, \$36000 | ||
+ | * 일 때, enrollment는 어떻게 predict할 수 있을까? | ||
+ | |||
+ | 위에서 얻은 prediction model은 아래와 같다. | ||
+ | $$ \hat{Y} = -9153.2545 + 450.1245 \cdot UNEM + 0.4065 \cdot HGRAD + 4.2749 \cdot INC $$ | ||
+ | 여기에 위의 정보를 대입해 보면 된다. | ||
+ | |||
+ | < | ||
+ | new.data <- data.frame(UNEM=c(9, | ||
+ | predict(three.predictor.model, | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > new.data <- data.frame(UNEM=c(9, | ||
+ | > predict(three.predictor.model, | ||
+ | | ||
+ | 163792.0 154879.4 110526.6 | ||
+ | > | ||
+ | </ | ||
+ | \begin{align*} | ||
+ | \hat{Y} & = -9153.2545 + 450.1245 \cdot \text{UNEM} + 0.4065 \cdot \text{HGRAD} + 4.2749 \cdot \text{INC} | ||
+ | 163792.0 & = -9153.2545 + 450.1245 \cdot (9) + 0.4065 \cdot (100000) + 4.2749 \cdot (30000) \\ | ||
+ | 154879.4 & = -9153.2545 + 450.1245 \cdot (10) + 0.4065 \cdot (98000) + 4.2749 \cdot (28000) \\ | ||
+ | 110526.6 & = -9153.2545 + 450.1245 \cdot (15) + 0.4065 \cdot (78000) + 4.2749 \cdot (19000) \\ | ||
+ | |||
+ | \end{align*} | ||
+ | |||
+ | beta coefficient 살펴보기 | ||
+ | see [[:beta coefficients]] | ||
+ | < | ||
+ | # install.packages(' | ||
+ | # library(lm.beta) | ||
+ | lm.beta(three.predictor.model) | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | > # install.packages(' | ||
+ | > # library(lm.beta) | ||
+ | > lm.beta(three.predictor.model) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
+ | |||
+ | Standardized Coefficients:: | ||
+ | (Intercept) | ||
+ | 0.0000000 | ||
+ | |||
+ | > | ||
+ | </ | ||
+ | by hand | ||
+ | < | ||
+ | # coefficient * (sd(x)/ | ||
+ | # | ||
+ | attach(datavar) | ||
+ | sd.roll <- sd(ROLL) | ||
+ | sd.unem <- sd(UNEM) | ||
+ | sd.hgrad <- sd(HGRAD) | ||
+ | sd.inc <- sd(INC) | ||
+ | |||
+ | b.unem <- three.predictor.model$coefficients[2] | ||
+ | b.hgrad <- three.predictor.model$coefficients[3] | ||
+ | b.inc <- three.predictor.model$coefficients[4] | ||
+ | |||
+ | ## or | ||
+ | b.unem <- 4.501e+02 | ||
+ | b.hgrad <- 4.065e-01 | ||
+ | b.inc <- 4.275e+00 | ||
+ | |||
+ | |||
+ | b.unem * (sd.unem / sd.roll) | ||
+ | b.hgrad * (sd.hgrad / sd.roll) | ||
+ | b.inc * (sd.inc / sd.roll) | ||
+ | |||
+ | lm.beta(three.predictor.model) | ||
+ | |||
+ | </ | ||
+ | output of the above | ||
+ | < | ||
+ | > sd.roll <- sd(ROLL) | ||
+ | > sd.unem <- sd(UNEM) | ||
+ | > sd.hgrad <- sd(HGRAD) | ||
+ | > sd.inc <- sd(INC) | ||
+ | > | ||
+ | > b.unem <- three.predictor.model$coefficients[2] | ||
+ | > b.hgrad <- three.predictor.model$coefficients[3] | ||
+ | > b.inc <- three.predictor.model$coefficients[4] | ||
+ | > | ||
+ | > ## or | ||
+ | > b.unem <- 4.501e+02 | ||
+ | > b.hgrad <- 4.065e-01 | ||
+ | > b.inc <- 4.275e+00 | ||
+ | > | ||
+ | > | ||
+ | > b.unem * (sd.unem / sd.roll) | ||
+ | [1] 0.1554 | ||
+ | > b.hgrad * (sd.hgrad / sd.roll) | ||
+ | [1] 0.3656 | ||
+ | > b.inc * (sd.inc / sd.roll) | ||
+ | [1] 0.6062 | ||
+ | > | ||
+ | > lm.beta(three.predictor.model) | ||
+ | |||
+ | Call: | ||
+ | lm(formula = ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
+ | |||
+ | Standardized Coefficients:: | ||
+ | (Intercept) | ||
+ | | ||
+ | |||
+ | > | ||
+ | </ | ||
+ | |||
+ | see also [[: | ||
+ | see also [[: | ||
+ | |||
+ | < | ||
+ | > fit <- three.predictor.model | ||
+ | > step <- stepAIC(fit, | ||
+ | Start: | ||
+ | ROLL ~ UNEM + HGRAD + INC | ||
+ | |||
+ | Df Sum of Sq RSS AIC | ||
+ | < | ||
+ | - UNEM | ||
+ | - HGRAD 1 12852039 24089352 401 | ||
+ | - INC 1 33568255 44805568 419 | ||
+ | > | ||
+ | |||
+ | </ | ||
====== Housing ====== | ====== Housing ====== | ||
{{housing.txt}} | {{housing.txt}} | ||
Line 10: | Line 239: | ||
====== etc ====== | ====== etc ====== | ||
+ | {{: | ||
< | < | ||
+ | marketing <- read.csv(" | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | # install.packages(" | ||
library(tidyverse) | library(tidyverse) | ||
data(" | data(" | ||
Line 18: | Line 253: | ||
* Note that to list all the independent (explanatory) variables, you could use '' | * Note that to list all the independent (explanatory) variables, you could use '' | ||
* You could also use '' | * You could also use '' | ||
+ | |||
| | ||
< | < | ||
Line 244: | Line 480: | ||
| interest | | interest | ||
| unemp | 1 (b) | 22394 (2) | 22394 | 4.497690299 | | unemp | 1 (b) | 22394 (2) | 22394 | 4.497690299 | ||
- | | res | 21 (c) | 104559 (3) | 4979 | | | + | | res | 21 %%(%%c%%)%% | 104559 (3) | 4979 | | |
| total | 23 | 1021416 (4) | | | total | 23 | 1021416 (4) | | ||
- | | interst | + | | interest |
(4) = (1) + (2) + (3) | (4) = (1) + (2) + (3) |
r/multiple_regression.1606799791.txt.gz · Last modified: 2020/12/01 14:16 by hkimscil