c:ms:2025:schedule:w13.lecture.note
This is an old revision of the document!
MR
# multiple regression: a simple e.g. # # rm(list=ls()) d <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv") d colnames(d) <- c("y", "x1", "x2") d # y = 통장갯수 # x1 = 인컴 # x2 = 부양가족수 lm.y.x1 <- lm(y ~ x1, data=d) summary(lm.y.x1) anova(lm.y.x1) lm.y.x2 <- lm(y ~ x2, data=d) summary(lm.y.x2) anova(lm.y.x2) lm.y.x1x2 <- lm(y ~ x1+x2, data=d) summary(lm.y.x1x2) anova(lm.y.x1x2) lm.y.x1x2$coefficient # y.hat = 6.399103 + (0.01184145)*x1 + (−0.54472725)*x2 a <- lm.y.x1x2$coefficient[1] b1 <- lm.y.x1x2$coefficient[2] b2 <- lm.y.x1x2$coefficient[3] a b1 b2 y.pred <- a + (b1 * x1) + (b2 * x2) y.pred # or y.pred2 <- predict(lm.y.x1x2) head(y.pred == y.pred2) y.real <- y y.real y.mean <- mean(y) y.mean res <- y.real - y.pred reg <- y.pred - y.mean y.mean # remember y is sum of res + reg + y.mean y2 <- res + reg + y.mean y==y2 ss.res <- sum(res^2) ss.reg <- sum(reg^2) ss.tot <- var(y) * (length(y)-1) ss.tot ss.res ss.reg ss.res+ss.reg k <- 3 # # of parameters a, b1, b2 df.tot <- length(y)-1 df.reg <- k - 1 df.res <- df.tot - df.reg ms.reg <- ss.reg/df.reg ms.res <- ss.res/df.res ms.reg ms.res f.val <- ms.reg/ms.res f.val p.val <- pf(f.val, df.reg, df.res, lower.tail = F) p.val # double check summary(lm.y.x1x2) anova(lm.y.x1x2) # note on 2 t-tests in summary # anova에서의 x1, x2에 대한 테스트와 # lm에서의 x1, x2에 대한 테스트 (t-test) 간에 # 차이가 있음에 주의 (x1, x2에 대한 Pr 값이 # 다름). 그 이유는 # t-tests는 __pr__ 테스트로 테스트를 # (spr, zero_order_r 테스트가 아님) 하고 # anova test는 x1 전체에 대한 테스트 하고 # x2는 x1에 대한 테스트 외에 나머지를 가지고 # 테스트하기 때문에 그러함 # beta coefficient (standardized b) # beta <- b * (sd(x)/sd(y)) beta1 <- b1 * (sd(x1)/sd(y)) beta2 <- b2 * (sd(x2)/sd(y)) beta1 beta2 # install.packages("lm.beta") library(lm.beta) lm.beta(lm.y.x1x2) ####################################################### # partial correlation coefficient and pr2 # x2's explanation? # understand with diagrams first # then calculate with r lm.tmp.1 <- lm(x2~x1, data=d) res.x2.x1 <- lm.tmp.1$residuals lm.tmp.2 <- lm(y~x1, data=d) res.y.x1 <- lm.tmp.2$residuals lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d) summary(lm.tmp.3) # install.packages("ppcor") library(ppcor) partial.r <- pcor.test(y, x2, x1) partial.r str(partial.r) summary(lm.tmp.3) summary(lm.tmp.3)$r.square partial.r$estimate^2 # x1's own explanation? lm.tmp.4 <- lm(x1~x2, data=d) res.x1.x2 <- lm.tmp.4$residuals lm.tmp.5 <- lm(y~x2, data=d) res.y.x2 <- lm.tmp.5$residuals lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d) summary(lm.tmp.6) partial.r <- pcor.test(y, x1, x2) str(partial.r) partial.r$estimate # this is partial correlation, not pr^2 # in order to get pr2, you should ^2 partial.r$estimate^2 ####################################################### # # semipartial correlation coefficient and spr2 # spr.1 <- spcor.test(y,x2,x1) spr.2 <- spcor.test(y,x1,x2) spr.1 spr.2 spr.1$estimate^2 spr.2$estimate^2 lm.tmp.7 <- lm(y ~ res.x2.x1, data = d) summary(lm.tmp.7) spr.1$estimate^2 lm.tmp.8 <- lm(y~res.x1.x2, data = d) summary(lm.tmp.8) spr.2$estimate^2 ####################################################### # get the common area that explain the y variable # 1. summary(lm.y.x2) all.x2 <- summary(lm.y.x2)$r.squared sp.x2 <- spr.1$estimate^2 all.x2 sp.x2 cma.1 <- all.x2 - sp.x2 cma.1 # 2. summary(lm.y.x1) all.x1 <- summary(lm.y.x1)$r.squared sp.x1 <- spr.2$estimate^2 all.x1 sp.x1 cma.2 <- all.x1 - sp.x1 cma.2 # OR 3. summary(lm.y.x1x2) r2.y.x1x2 <- summary(lm.y.x1x2)$r.square r2.y.x1x2 sp.x1 sp.x2 cma.3 <- r2.y.x1x2 - (sp.x1 + sp.x2) cma.3 cma.1 cma.2 cma.3 # OR 애초에 simple regression과 multiple # regression에서 얻은 R2을 가지고 # 공통설명력을 알아볼 수도 있었다. r2.x1 <- summary(lm.y.x1)$r.square r2.x2 <- summary(lm.y.x2)$r.square r2.x1x2 <- summary(lm.y.x1x2)$r.square r2.x1 + r2.x2 - r2.x1x2
output
c/ms/2025/schedule/w13.lecture.note.1748822961.txt.gz · Last modified: 2025/06/02 09:09 by hkimscil