r:multiple_regression
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| r:multiple_regression [2020/12/01 14:21] – hkimscil | r:multiple_regression [2023/10/19 08:23] (current) – hkimscil | ||
|---|---|---|---|
| Line 19: | Line 19: | ||
| </ | </ | ||
| < | < | ||
| - | > mo <- lm(ROLL ~ UNEM + HGRAD + INC, datavar) | + | two.predictor.model <- lm(ROLL ~ UNEM + HGRAD, datavar) |
| - | > summary(mo) | + | summary(two.predictor.model) |
| + | two.predictor.model | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | three.predictor.model <- lm(ROLL ~ UNEM + HGRAD + INC, datavar) | ||
| + | summary(three.predictor.model) | ||
| + | three.predictor.model | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | > two.predictor.model <- lm(ROLL ~ UNEM + HGRAD, datavar) | ||
| + | > summary(two.predictor.model) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = ROLL ~ UNEM + HGRAD, data = datavar) | ||
| + | |||
| + | Residuals: | ||
| + | Min 1Q Median | ||
| + | -2102.2 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) -8.256e+03 | ||
| + | UNEM | ||
| + | HGRAD 9.423e-01 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 1313 on 26 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > two.predictor.model | ||
| + | |||
| + | Call: | ||
| + | lm(formula = ROLL ~ UNEM + HGRAD, data = datavar) | ||
| + | |||
| + | Coefficients: | ||
| + | (Intercept) | ||
| + | | ||
| + | |||
| + | > | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | > three.predictor.model | ||
| + | > summary(three.predictor.model) | ||
| Call: | Call: | ||
| Line 41: | Line 88: | ||
| Multiple R-squared: | Multiple R-squared: | ||
| F-statistic: | F-statistic: | ||
| + | |||
| + | > three.predictor.model | ||
| + | |||
| + | Call: | ||
| + | lm(formula = ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
| + | |||
| + | Coefficients: | ||
| + | (Intercept) | ||
| + | | ||
| > | > | ||
| </ | </ | ||
| - | [[: | + | 만약에 |
| + | * unemployment rate (UNEM) = 9%, 12%, 3% | ||
| + | * spring high school graduating class (HGRAD) = 100000, 98000, 78000 | ||
| + | * a per capita income (INC) of \$30000, \$28000, \$36000 | ||
| + | * 일 때, enrollment는 어떻게 predict할 수 있을까? | ||
| + | 위에서 얻은 prediction model은 아래와 같다. | ||
| + | $$ \hat{Y} = -9153.2545 + 450.1245 \cdot UNEM + 0.4065 \cdot HGRAD + 4.2749 \cdot INC $$ | ||
| + | 여기에 위의 정보를 대입해 보면 된다. | ||
| + | |||
| + | < | ||
| + | new.data <- data.frame(UNEM=c(9, | ||
| + | predict(three.predictor.model, | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | > new.data <- data.frame(UNEM=c(9, | ||
| + | > predict(three.predictor.model, | ||
| + | | ||
| + | 163792.0 154879.4 110526.6 | ||
| + | > | ||
| + | </ | ||
| + | \begin{align*} | ||
| + | \hat{Y} & = -9153.2545 + 450.1245 \cdot \text{UNEM} + 0.4065 \cdot \text{HGRAD} + 4.2749 \cdot \text{INC} | ||
| + | 163792.0 & = -9153.2545 + 450.1245 \cdot (9) + 0.4065 \cdot (100000) + 4.2749 \cdot (30000) \\ | ||
| + | 154879.4 & = -9153.2545 + 450.1245 \cdot (10) + 0.4065 \cdot (98000) + 4.2749 \cdot (28000) \\ | ||
| + | 110526.6 & = -9153.2545 + 450.1245 \cdot (15) + 0.4065 \cdot (78000) + 4.2749 \cdot (19000) \\ | ||
| + | |||
| + | \end{align*} | ||
| + | |||
| + | beta coefficient 살펴보기 | ||
| + | see [[:beta coefficients]] | ||
| + | < | ||
| + | # install.packages(' | ||
| + | # library(lm.beta) | ||
| + | lm.beta(three.predictor.model) | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | > # install.packages(' | ||
| + | > # library(lm.beta) | ||
| + | > lm.beta(three.predictor.model) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
| + | |||
| + | Standardized Coefficients:: | ||
| + | (Intercept) | ||
| + | 0.0000000 | ||
| + | |||
| + | > | ||
| + | </ | ||
| + | by hand | ||
| + | < | ||
| + | # coefficient * (sd(x)/ | ||
| + | # | ||
| + | attach(datavar) | ||
| + | sd.roll <- sd(ROLL) | ||
| + | sd.unem <- sd(UNEM) | ||
| + | sd.hgrad <- sd(HGRAD) | ||
| + | sd.inc <- sd(INC) | ||
| + | |||
| + | b.unem <- three.predictor.model$coefficients[2] | ||
| + | b.hgrad <- three.predictor.model$coefficients[3] | ||
| + | b.inc <- three.predictor.model$coefficients[4] | ||
| + | |||
| + | ## or | ||
| + | b.unem <- 4.501e+02 | ||
| + | b.hgrad <- 4.065e-01 | ||
| + | b.inc <- 4.275e+00 | ||
| + | |||
| + | |||
| + | b.unem * (sd.unem / sd.roll) | ||
| + | b.hgrad * (sd.hgrad / sd.roll) | ||
| + | b.inc * (sd.inc / sd.roll) | ||
| + | |||
| + | lm.beta(three.predictor.model) | ||
| + | |||
| + | </ | ||
| + | output of the above | ||
| + | < | ||
| + | > sd.roll <- sd(ROLL) | ||
| + | > sd.unem <- sd(UNEM) | ||
| + | > sd.hgrad <- sd(HGRAD) | ||
| + | > sd.inc <- sd(INC) | ||
| + | > | ||
| + | > b.unem <- three.predictor.model$coefficients[2] | ||
| + | > b.hgrad <- three.predictor.model$coefficients[3] | ||
| + | > b.inc <- three.predictor.model$coefficients[4] | ||
| + | > | ||
| + | > ## or | ||
| + | > b.unem <- 4.501e+02 | ||
| + | > b.hgrad <- 4.065e-01 | ||
| + | > b.inc <- 4.275e+00 | ||
| + | > | ||
| + | > | ||
| + | > b.unem * (sd.unem / sd.roll) | ||
| + | [1] 0.1554 | ||
| + | > b.hgrad * (sd.hgrad / sd.roll) | ||
| + | [1] 0.3656 | ||
| + | > b.inc * (sd.inc / sd.roll) | ||
| + | [1] 0.6062 | ||
| + | > | ||
| + | > lm.beta(three.predictor.model) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = ROLL ~ UNEM + HGRAD + INC, data = datavar) | ||
| + | |||
| + | Standardized Coefficients:: | ||
| + | (Intercept) | ||
| + | | ||
| + | |||
| + | > | ||
| + | </ | ||
| + | |||
| + | see also [[: | ||
| + | see also [[: | ||
| + | |||
| + | < | ||
| + | > fit <- three.predictor.model | ||
| + | > step <- stepAIC(fit, | ||
| + | Start: | ||
| + | ROLL ~ UNEM + HGRAD + INC | ||
| + | |||
| + | Df Sum of Sq RSS AIC | ||
| + | < | ||
| + | - UNEM | ||
| + | - HGRAD 1 12852039 24089352 401 | ||
| + | - INC 1 33568255 44805568 419 | ||
| + | > | ||
| + | |||
| + | </ | ||
| ====== Housing ====== | ====== Housing ====== | ||
| {{housing.txt}} | {{housing.txt}} | ||
| Line 53: | Line 239: | ||
| ====== etc ====== | ====== etc ====== | ||
| + | {{: | ||
| < | < | ||
| + | marketing <- read.csv(" | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | # install.packages(" | ||
| library(tidyverse) | library(tidyverse) | ||
| data(" | data(" | ||
| Line 61: | Line 253: | ||
| * Note that to list all the independent (explanatory) variables, you could use '' | * Note that to list all the independent (explanatory) variables, you could use '' | ||
| * You could also use '' | * You could also use '' | ||
| + | |||
| | | ||
| < | < | ||
| Line 287: | Line 480: | ||
| | interest | | interest | ||
| | unemp | 1 (b) | 22394 (2) | 22394 | 4.497690299 | | unemp | 1 (b) | 22394 (2) | 22394 | 4.497690299 | ||
| - | | res | 21 (c) | 104559 (3) | 4979 | | | + | | res | 21 %%(%%c%%)%% | 104559 (3) | 4979 | | |
| | total | 23 | 1021416 (4) | | | total | 23 | 1021416 (4) | | ||
| - | | interst | + | | interest |
| (4) = (1) + (2) + (3) | (4) = (1) + (2) + (3) | ||
r/multiple_regression.1606800117.txt.gz · Last modified: by hkimscil
