c:ms:2025:schedule:w13.lecture.note
Differences
This shows you the differences between two versions of the page.
| Next revision | Previous revision | ||
| c:ms:2025:schedule:w13.lecture.note [2025/06/02 00:07] – created hkimscil | c:ms:2025:schedule:w13.lecture.note [2025/06/08 23:51] (current) – [output] hkimscil | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| ====== MR ====== | ====== MR ====== | ||
| < | < | ||
| + | # multiple regression: a simple e.g. | ||
| + | # | ||
| + | # | ||
| + | rm(list=ls()) | ||
| + | df <- read.csv(" | ||
| + | df | ||
| + | |||
| + | colnames(df) <- c(" | ||
| + | df | ||
| + | # y = 통장갯수 | ||
| + | # x1 = 인컴 | ||
| + | # x2 = 부양가족수 | ||
| + | lm.y.x1 <- lm(y ~ x1, data=df) | ||
| + | summary(lm.y.x1) | ||
| + | anova(lm.y.x1) | ||
| + | cor(df$x1, df$y)^2 | ||
| + | summary(lm.y.x1)$r.squared | ||
| + | |||
| + | |||
| + | lm.y.x2 <- lm(y ~ x2, data=df) | ||
| + | summary(lm.y.x2) | ||
| + | anova(lm.y.x2) | ||
| + | cor(df$x2, df$y)^2 | ||
| + | summary(lm.y.x2)$r.squared | ||
| + | |||
| + | |||
| + | lm.y.x1x2 <- lm(y ~ x1+x2, data=df) | ||
| + | summary(lm.y.x1x2) | ||
| + | anova(lm.y.x1x2) | ||
| + | bcd <- summary(lm.y.x1x2)$r.squared | ||
| + | bcd | ||
| + | bc.cd <- summary(lm.y.x1)$r.squared + summary(lm.y.x2)$r.squared | ||
| + | bc.cd | ||
| + | bc.cd - bcd # note that this is the amount that shared by the two IVs | ||
| + | |||
| + | |||
| + | lm.y.x1x2$coefficient | ||
| + | # y.hat = 6.399103 + (0.01184145)*x1 + (? | ||
| + | a <- lm.y.x1x2$coefficient[1] | ||
| + | b1 <- lm.y.x1x2$coefficient[2] | ||
| + | b2 <- lm.y.x1x2$coefficient[3] | ||
| + | a | ||
| + | b1 | ||
| + | b2 | ||
| + | |||
| + | y.pred <- a + (b1 * df$x1) + (b2 * df$x2) | ||
| + | y.pred | ||
| + | # or | ||
| + | y.pred2 <- predict(lm.y.x1x2) | ||
| + | head(y.pred == y.pred2) | ||
| + | |||
| + | y.real <- df$y | ||
| + | y.real | ||
| + | y.mean <- mean(df$y) | ||
| + | y.mean | ||
| + | |||
| + | deviation.score <- df$y - y.mean | ||
| + | ds <- deviation.score | ||
| + | res <- y.real - y.pred | ||
| + | reg <- y.pred - y.mean | ||
| + | y.mean | ||
| + | # remember y is sum of res + reg + y.mean | ||
| + | y2 <- res + reg + y.mean | ||
| + | df$y==y2 | ||
| + | |||
| + | ss.tot <- sum(ds^2) | ||
| + | ss.res <- sum(res^2) | ||
| + | ss.reg <- sum(reg^2) | ||
| + | |||
| + | ss.tot2 <- var(df$y) * (length(df$y)-1) | ||
| + | ss.tot | ||
| + | ss.tot2 | ||
| + | ss.res | ||
| + | ss.reg | ||
| + | ss.res+ss.reg | ||
| + | |||
| + | k <- 3 # # of parameters a, b1, b2 | ||
| + | df.tot <- length(df$y)-1 | ||
| + | df.reg <- k - 1 | ||
| + | df.res <- df.tot - df.reg | ||
| + | |||
| + | ms.reg <- ss.reg/ | ||
| + | ms.res <- ss.res/ | ||
| + | ms.reg | ||
| + | ms.res | ||
| + | f.val <- ms.reg/ | ||
| + | f.val | ||
| + | p.val <- pf(f.val, df.reg, df.res, lower.tail = F) | ||
| + | p.val | ||
| + | |||
| + | # double check | ||
| + | summary(lm.y.x1x2) | ||
| + | anova(lm.y.x1x2) | ||
| + | |||
| + | summary(lm(y~x2+x1, | ||
| + | anova(lm(y~x2+x1, | ||
| + | |||
| + | # note on 2 t-tests in summary | ||
| + | # anova에서의 x1, x2에 대한 테스트와 | ||
| + | # lm에서의 x1, x2에 대한 테스트 (t-test) 간에 | ||
| + | # 차이가 있음에 주의 (x1, x2에 대한 Pr 값이 | ||
| + | # 다름). 그 이유는 | ||
| + | # t-tests는 __pr__ 테스트로 테스트를 | ||
| + | # (spr, zero_order_r 테스트가 아님) 하고 | ||
| + | # anova test는 x1 전체에 대한 테스트 하고 | ||
| + | # x2는 x1에 대한 테스트 외에 나머지를 가지고 | ||
| + | # 테스트하기 때문에 그러함 | ||
| + | |||
| + | # 또한 anova test에서 두번째 IV의 F값은 | ||
| + | # summary(lm)에서 두번 째 IV의 t값의 제곱값 | ||
| + | # 임을 이해. 이는 두번 째 IV의 설명력을 나 | ||
| + | # 타내는 부분이 lm과 anova 모두 같기 때문 | ||
| + | # 반면에 첫번째 IV의 경우에는 lm 분석 때에는 | ||
| + | # 고유의 설명력만을 가지고 (semi-partial cor^2) | ||
| + | # 판단을 하는 반면에, anova는 x2와 공유하는 | ||
| + | # 설명력도 포함해서 분석하기 때문에 t값의 | ||
| + | # 제곱이 F값이 되지 못함 | ||
| + | |||
| + | # beta에 대한 설명 | ||
| + | # beta coefficient (standardized b) | ||
| + | # beta <- b * (sd(x)/ | ||
| + | beta1 <- b1 * (sd(df$x1)/ | ||
| + | beta2 <- b2 * (sd(df$x2)/ | ||
| + | beta1 | ||
| + | beta2 | ||
| + | |||
| + | # install.packages(" | ||
| + | library(lm.beta) | ||
| + | lm.beta(lm.y.x1x2) | ||
| + | |||
| + | ####################################################### | ||
| + | # partial correlation coefficient and pr2 | ||
| + | # x2's explanation? | ||
| + | # understand with diagrams first | ||
| + | # then calculate with r | ||
| + | lm.tmp.1 <- lm(x2~x1, data=df) | ||
| + | res.x2.x1 <- lm.tmp.1$residuals | ||
| + | |||
| + | lm.tmp.2 <- lm(y~x1, data=df) | ||
| + | res.y.x1 <- lm.tmp.2$residuals | ||
| + | |||
| + | lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df) | ||
| + | summary(lm.tmp.3) | ||
| + | summary(lm.tmp.3)$r.squared | ||
| + | sqrt(summary(lm.tmp.3)$r.squared) | ||
| + | # install.packages(" | ||
| + | library(ppcor) | ||
| + | partial.r <- pcor.test(df$y, | ||
| + | str(partial.r) | ||
| + | partial.r$estimate | ||
| + | summary(lm.tmp.3) | ||
| + | summary(lm.tmp.3)$r.square | ||
| + | partial.r$estimate^2 | ||
| + | |||
| + | |||
| + | # x1's own explanation? | ||
| + | lm.tmp.4 <- lm(x1~x2, data=df) | ||
| + | res.x1.x2 <- lm.tmp.4$residuals | ||
| + | |||
| + | lm.tmp.5 <- lm(y~x2, data=df) | ||
| + | res.y.x2 <- lm.tmp.5$residuals | ||
| + | |||
| + | lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df) | ||
| + | summary(lm.tmp.6) | ||
| + | |||
| + | partial.r <- pcor.test(df$y, | ||
| + | str(partial.r) | ||
| + | partial.r$estimate # this is partial correlation, | ||
| + | # in order to get pr2, you should ^2 | ||
| + | partial.r$estimate^2 | ||
| + | summary(lm.tmp.6)$r.squared | ||
| + | |||
| + | ####################################################### | ||
| + | # semipartial correlation coefficient and spr2 | ||
| + | # | ||
| + | spr.y.x2.x1 <- spcor.test(df$y, | ||
| + | spr.y.x1.x2 <- spcor.test(df$y, | ||
| + | spr.y.x2.x1 | ||
| + | spr.y.x1.x2 | ||
| + | spr2.y.x2.x1 <- spr.y.x2.x1$estimate^2 | ||
| + | spr2.y.x1.x2 <- spr.y.x1.x2$estimate^2 | ||
| + | spr2.y.x2.x1 | ||
| + | spr2.y.x1.x2 | ||
| + | |||
| + | lm.tmp.7 <- lm(y ~ res.x2.x1, data=df) | ||
| + | summary(lm.tmp.7) | ||
| + | spr2.y.x2.x1 | ||
| + | |||
| + | lm.tmp.8 <- lm(y~res.x1.x2, | ||
| + | summary(lm.tmp.8) | ||
| + | spr2.y.x1.x2 | ||
| + | |||
| + | bcd # remember bcd in the above? | ||
| + | bd <- spr2.y.x2.x1 + spr2.y.x1.x2 | ||
| + | bd | ||
| + | bcd - bd | ||
| + | |||
| + | ####################################################### | ||
| + | # get the common area that explain the y variable | ||
| + | # 1. | ||
| + | summary(lm.y.x2) | ||
| + | all.x2 <- summary(lm.y.x2)$r.squared | ||
| + | all.x2 | ||
| + | spr2.y.x2.x1 | ||
| + | cma.1 <- all.x2 - spr2.y.x2.x1 | ||
| + | cma.1 | ||
| + | |||
| + | # 2. | ||
| + | summary(lm.y.x1) | ||
| + | all.x1 <- summary(lm.y.x1)$r.squared | ||
| + | all.x1 | ||
| + | spr2.y.x1.x2 | ||
| + | cma.2 <- all.x1 - spr2.y.x1.x2 | ||
| + | cma.2 | ||
| + | |||
| + | # OR 3. | ||
| + | summary(lm.y.x1x2) | ||
| + | r2.y.x1x2 <- summary(lm.y.x1x2)$r.square | ||
| + | r2.y.x1x2 | ||
| + | spr2.y.x1.x2 | ||
| + | spr2.y.x2.x1 | ||
| + | cma.3 <- r2.y.x1x2 - (spr2.y.x1.x2 + spr2.y.x2.x1) | ||
| + | bcd - bd | ||
| + | cma.3 | ||
| + | |||
| + | cma.1 | ||
| + | cma.2 | ||
| + | cma.3 | ||
| + | |||
| + | # OR 애초에 simple regression과 multiple | ||
| + | # regression에서 얻은 R2을 가지고 | ||
| + | # 공통설명력을 알아볼 수도 있었다. | ||
| + | r2.y.x1 <- summary(lm.y.x1)$r.square | ||
| + | r2.y.x2 <- summary(lm.y.x2)$r.square | ||
| + | r2.y.x1 | ||
| + | r2.y.x2 | ||
| + | r2.y.x1x2 <- summary(lm.y.x1x2)$r.square | ||
| + | r2.y.x1x2 | ||
| + | cma.4 <- r2.y.x1 + r2.y.x2 - r2.y.x1x2 | ||
| + | cma.4 | ||
| + | |||
| + | # Note that sorting out unique and common | ||
| + | # explanation area is only possible with | ||
| + | # semi-partial correlation determinant | ||
| + | # NOT partial correlation determinant | ||
| + | # because only semi-partial correlation | ||
| + | # shares the same denominator (as total | ||
| + | # y). | ||
| + | ############################################# | ||
| + | |||
| + | |||
| </ | </ | ||
| ====== output ====== | ====== output ====== | ||
| < | < | ||
| + | > # multiple regression: a simple e.g. | ||
| + | > # | ||
| + | > # | ||
| + | > rm(list=ls()) | ||
| + | > df <- read.csv(" | ||
| + | > df | ||
| + | | ||
| + | 1 6 220 5 | ||
| + | 2 5 190 6 | ||
| + | 3 7 260 3 | ||
| + | 4 7 200 4 | ||
| + | 5 8 330 2 | ||
| + | 6 | ||
| + | 7 8 210 3 | ||
| + | 8 | ||
| + | 9 9 320 1 | ||
| + | 10 | ||
| + | > | ||
| + | > colnames(df) <- c(" | ||
| + | > df | ||
| + | y x1 x2 | ||
| + | 1 6 220 5 | ||
| + | 2 5 190 6 | ||
| + | 3 7 260 3 | ||
| + | 4 7 200 4 | ||
| + | 5 8 330 2 | ||
| + | 6 10 490 4 | ||
| + | 7 8 210 3 | ||
| + | 8 11 380 2 | ||
| + | 9 9 320 1 | ||
| + | 10 9 270 3 | ||
| + | > # y = 통장갯수 | ||
| + | > # x1 = 인컴 | ||
| + | > # x2 = 부양가족수 | ||
| + | > lm.y.x1 <- lm(y ~ x1, data=df) | ||
| + | > summary(lm.y.x1) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = y ~ x1, data = df) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -1.519 -0.897 -0.130 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | x1 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 1.18 on 8 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > anova(lm.y.x1) | ||
| + | Analysis of Variance Table | ||
| + | |||
| + | Response: y | ||
| + | Df Sum Sq Mean Sq F value Pr(> | ||
| + | x1 | ||
| + | Residuals | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | > cor(df$x1, df$y)^2 | ||
| + | [1] 0.6311 | ||
| + | > summary(lm.y.x1)$r.squared | ||
| + | [1] 0.6311 | ||
| + | > | ||
| + | > | ||
| + | > lm.y.x2 <- lm(y ~ x2, data=df) | ||
| + | > summary(lm.y.x2) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = y ~ x2, data = df) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -1.254 -0.888 -0.485 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | x2 -0.846 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 1.4 on 8 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > anova(lm.y.x2) | ||
| + | Analysis of Variance Table | ||
| + | |||
| + | Response: y | ||
| + | Df Sum Sq Mean Sq F value Pr(> | ||
| + | x2 | ||
| + | Residuals | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | > cor(df$x2, df$y)^2 | ||
| + | [1] 0.4793 | ||
| + | > summary(lm.y.x2)$r.squared | ||
| + | [1] 0.4793 | ||
| + | > | ||
| + | > | ||
| + | > lm.y.x1x2 <- lm(y ~ x1+x2, data=df) | ||
| + | > summary(lm.y.x1x2) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = y ~ x1 + x2, data = df) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -1.217 -0.578 -0.151 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | x1 | ||
| + | x2 -0.54473 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 0.93 on 7 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > anova(lm.y.x1x2) | ||
| + | Analysis of Variance Table | ||
| + | |||
| + | Response: y | ||
| + | Df Sum Sq Mean Sq F value Pr(> | ||
| + | x1 | ||
| + | x2 | ||
| + | Residuals | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | > bcd <- summary(lm.y.x1x2)$r.squared | ||
| + | > bcd | ||
| + | [1] 0.7981 | ||
| + | > bc.cd <- summary(lm.y.x1)$r.squared + summary(lm.y.x2)$r.squared | ||
| + | > bc.cd | ||
| + | [1] 1.11 | ||
| + | > bc.cd - bcd # note that this is the amount that shared by the two IVs | ||
| + | [1] 0.3123 | ||
| + | > | ||
| + | > | ||
| + | > lm.y.x1x2$coefficient | ||
| + | (Intercept) | ||
| + | 6.39910 | ||
| + | > # y.hat = 6.399103 + (0.01184145)*x1 + (? | ||
| + | > a <- lm.y.x1x2$coefficient[1] | ||
| + | > b1 <- lm.y.x1x2$coefficient[2] | ||
| + | > b2 <- lm.y.x1x2$coefficient[3] | ||
| + | > a | ||
| + | (Intercept) | ||
| + | 6.399 | ||
| + | > b1 | ||
| + | | ||
| + | 0.01184 | ||
| + | > b2 | ||
| + | | ||
| + | -0.5447 | ||
| + | > | ||
| + | > y.pred <- a + (b1 * df$x1) + (b2 * df$x2) | ||
| + | > y.pred | ||
| + | | ||
| + | > # or | ||
| + | > y.pred2 <- predict(lm.y.x1x2) | ||
| + | > head(y.pred == y.pred2) | ||
| + | | ||
| + | TRUE TRUE TRUE TRUE TRUE TRUE | ||
| + | > | ||
| + | > y.real <- df$y | ||
| + | > y.real | ||
| + | | ||
| + | > y.mean <- mean(df$y) | ||
| + | > y.mean | ||
| + | [1] 8 | ||
| + | > | ||
| + | > deviation.score <- df$y - y.mean | ||
| + | > ds <- deviation.score | ||
| + | > res <- y.real - y.pred | ||
| + | > reg <- y.pred - y.mean | ||
| + | > y.mean | ||
| + | [1] 8 | ||
| + | > # remember y is sum of res + reg + y.mean | ||
| + | > y2 <- res + reg + y.mean | ||
| + | > df$y==y2 | ||
| + | [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE | ||
| + | > | ||
| + | > ss.tot <- sum(ds^2) | ||
| + | > ss.res <- sum(res^2) | ||
| + | > ss.reg <- sum(reg^2) | ||
| + | > | ||
| + | > ss.tot2 <- var(df$y) * (length(df$y)-1) | ||
| + | > ss.tot | ||
| + | [1] 30 | ||
| + | > ss.tot2 | ||
| + | [1] 30 | ||
| + | > ss.res | ||
| + | [1] 6.056 | ||
| + | > ss.reg | ||
| + | [1] 23.94 | ||
| + | > ss.res+ss.reg | ||
| + | [1] 30 | ||
| + | > | ||
| + | > k <- 3 # # of parameters a, b1, b2 | ||
| + | > df.tot <- length(df$y)-1 | ||
| + | > df.reg <- k - 1 | ||
| + | > df.res <- df.tot - df.reg | ||
| + | > | ||
| + | > ms.reg <- ss.reg/ | ||
| + | > ms.res <- ss.res/ | ||
| + | > ms.reg | ||
| + | [1] 11.97 | ||
| + | > ms.res | ||
| + | [1] 0.8652 | ||
| + | > f.val <- ms.reg/ | ||
| + | > f.val | ||
| + | [1] 13.84 | ||
| + | > p.val <- pf(f.val, df.reg, df.res, lower.tail = F) | ||
| + | > p.val | ||
| + | [1] 0.003696 | ||
| + | > | ||
| + | > # double check | ||
| + | > summary(lm.y.x1x2) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = y ~ x1 + x2, data = df) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -1.217 -0.578 -0.151 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | x1 | ||
| + | x2 -0.54473 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 0.93 on 7 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > anova(lm.y.x1x2) | ||
| + | Analysis of Variance Table | ||
| + | |||
| + | Response: y | ||
| + | Df Sum Sq Mean Sq F value Pr(> | ||
| + | x1 | ||
| + | x2 | ||
| + | Residuals | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | > | ||
| + | > summary(lm(y~x2+x1, | ||
| + | |||
| + | Call: | ||
| + | lm(formula = y ~ x2 + x1, data = df) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -1.217 -0.578 -0.151 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | x2 -0.54473 | ||
| + | x1 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 0.93 on 7 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > anova(lm(y~x2+x1, | ||
| + | Analysis of Variance Table | ||
| + | |||
| + | Response: y | ||
| + | Df Sum Sq Mean Sq F value Pr(> | ||
| + | x2 | ||
| + | x1 | ||
| + | Residuals | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | > | ||
| + | > # note on 2 t-tests in summary | ||
| + | > # anova에서의 x1, x2에 대한 테스트와 | ||
| + | > # lm에서의 x1, x2에 대한 테스트 (t-test) 간에 | ||
| + | > # 차이가 있음에 주의 (x1, x2에 대한 Pr 값이 | ||
| + | > # 다름). 그 이유는 | ||
| + | > # t-tests는 __pr__ 테스트로 테스트를 | ||
| + | > # (spr, zero_order_r 테스트가 아님) 하고 | ||
| + | > # anova test는 x1 전체에 대한 테스트 하고 | ||
| + | > # x2는 x1에 대한 테스트 외에 나머지를 가지고 | ||
| + | > # 테스트하기 때문에 그러함 | ||
| + | > | ||
| + | > # 또한 anova test에서 두번째 IV의 F값은 | ||
| + | > # summary(lm)에서 두번 째 IV의 t값의 제곱값 | ||
| + | > # 임을 이해. 이는 두번 째 IV의 설명력을 나 | ||
| + | > # 타내는 부분이 lm과 anova 모두 같기 때문 | ||
| + | > # 반면에 첫번째 IV의 경우에는 lm 분석 때에는 | ||
| + | > # 고유의 설명력만을 가지고 (semi-partial cor^2) | ||
| + | > # 판단을 하는 반면에, anova는 x2와 공유하는 | ||
| + | > # 설명력도 포함해서 분석하기 때문에 t값의 | ||
| + | > # 제곱이 F값이 되지 못함 | ||
| + | > | ||
| + | > # beta에 대한 설명 | ||
| + | > # beta coefficient (standardized b) | ||
| + | > # beta <- b * (sd(x)/ | ||
| + | > beta1 <- b1 * (sd(df$x1)/ | ||
| + | > beta2 <- b2 * (sd(df$x2)/ | ||
| + | > beta1 | ||
| + | x1 | ||
| + | 0.6161 | ||
| + | > beta2 | ||
| + | | ||
| + | -0.4459 | ||
| + | > | ||
| + | > # install.packages(" | ||
| + | > library(lm.beta) | ||
| + | > lm.beta(lm.y.x1x2) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = y ~ x1 + x2, data = df) | ||
| + | |||
| + | Standardized Coefficients:: | ||
| + | (Intercept) | ||
| + | | ||
| + | |||
| + | > | ||
| + | > ####################################################### | ||
| + | > # partial correlation coefficient and pr2 | ||
| + | > # x2's explanation? | ||
| + | > # understand with diagrams first | ||
| + | > # then calculate with r | ||
| + | > lm.tmp.1 <- lm(x2~x1, data=df) | ||
| + | > res.x2.x1 <- lm.tmp.1$residuals | ||
| + | > | ||
| + | > lm.tmp.2 <- lm(y~x1, data=df) | ||
| + | > res.y.x1 <- lm.tmp.2$residuals | ||
| + | > | ||
| + | > lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df) | ||
| + | > summary(lm.tmp.3) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = res.y.x1 ~ res.x2.x1, data = df) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -1.217 -0.578 -0.151 | ||
| + | |||
| + | Coefficients: | ||
| + | | ||
| + | (Intercept) | ||
| + | res.x2.x1 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 0.87 on 8 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > summary(lm.tmp.3)$r.squared | ||
| + | [1] 0.4527 | ||
| + | > sqrt(summary(lm.tmp.3)$r.squared) | ||
| + | [1] 0.6729 | ||
| + | > # install.packages(" | ||
| + | > library(ppcor) | ||
| + | > partial.r <- pcor.test(df$y, | ||
| + | > str(partial.r) | ||
| + | ' | ||
| + | $ estimate : num -0.673 | ||
| + | $ p.value | ||
| + | $ statistic: num -2.41 | ||
| + | $ n : int 10 | ||
| + | $ gp : num 1 | ||
| + | $ Method | ||
| + | > partial.r$estimate | ||
| + | [1] -0.6729 | ||
| + | > summary(lm.tmp.3) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = res.y.x1 ~ res.x2.x1, data = df) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -1.217 -0.578 -0.151 | ||
| + | |||
| + | Coefficients: | ||
| + | | ||
| + | (Intercept) | ||
| + | res.x2.x1 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 0.87 on 8 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > summary(lm.tmp.3)$r.square | ||
| + | [1] 0.4527 | ||
| + | > partial.r$estimate^2 | ||
| + | [1] 0.4527 | ||
| + | > | ||
| + | > | ||
| + | > # x1's own explanation? | ||
| + | > lm.tmp.4 <- lm(x1~x2, data=df) | ||
| + | > res.x1.x2 <- lm.tmp.4$residuals | ||
| + | > | ||
| + | > lm.tmp.5 <- lm(y~x2, data=df) | ||
| + | > res.y.x2 <- lm.tmp.5$residuals | ||
| + | > | ||
| + | > lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df) | ||
| + | > summary(lm.tmp.6) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = res.y.x2 ~ res.x1.x2, data = df) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -1.217 -0.578 -0.151 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) 1.33e-17 | ||
| + | res.x1.x2 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 0.87 on 8 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > | ||
| + | > partial.r <- pcor.test(df$y, | ||
| + | > str(partial.r) | ||
| + | ' | ||
| + | $ estimate : num 0.783 | ||
| + | $ p.value | ||
| + | $ statistic: num 3.33 | ||
| + | $ n : int 10 | ||
| + | $ gp : num 1 | ||
| + | $ Method | ||
| + | > partial.r$estimate # this is partial correlation, | ||
| + | [1] 0.7825 | ||
| + | > # in order to get pr2, you should ^2 | ||
| + | > partial.r$estimate^2 | ||
| + | [1] 0.6123 | ||
| + | > summary(lm.tmp.6)$r.squared | ||
| + | [1] 0.6123 | ||
| + | > | ||
| + | > ####################################################### | ||
| + | > # semipartial correlation coefficient and spr2 | ||
| + | > # | ||
| + | > spr.y.x2.x1 <- spcor.test(df$y, | ||
| + | > spr.y.x1.x2 <- spcor.test(df$y, | ||
| + | > spr.y.x2.x1 | ||
| + | estimate p.value statistic | ||
| + | 1 -0.4087 | ||
| + | > spr.y.x1.x2 | ||
| + | estimate p.value statistic | ||
| + | 1 | ||
| + | > spr2.y.x2.x1 <- spr.y.x2.x1$estimate^2 | ||
| + | > spr2.y.x1.x2 <- spr.y.x1.x2$estimate^2 | ||
| + | > spr2.y.x2.x1 | ||
| + | [1] 0.167 | ||
| + | > spr2.y.x1.x2 | ||
| + | [1] 0.3189 | ||
| + | > | ||
| + | > lm.tmp.7 <- lm(y ~ res.x2.x1, data=df) | ||
| + | > summary(lm.tmp.7) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = y ~ res.x2.x1, data = df) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -1.862 -1.171 -0.494 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | res.x2.x1 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 1.77 on 8 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > spr2.y.x2.x1 | ||
| + | [1] 0.167 | ||
| + | > | ||
| + | > lm.tmp.8 <- lm(y~res.x1.x2, | ||
| + | > summary(lm.tmp.8) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = y ~ res.x1.x2, data = df) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -2.664 -0.608 -0.149 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | res.x1.x2 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 1.6 on 8 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > spr2.y.x1.x2 | ||
| + | [1] 0.3189 | ||
| + | > | ||
| + | > bcd # remember bcd in the above? | ||
| + | [1] 0.7981 | ||
| + | > bd <- spr2.y.x2.x1 + spr2.y.x1.x2 | ||
| + | > bd | ||
| + | [1] 0.4859 | ||
| + | > bcd - bd | ||
| + | [1] 0.3123 | ||
| + | > | ||
| + | > ####################################################### | ||
| + | > # get the common area that explain the y variable | ||
| + | > # 1. | ||
| + | > summary(lm.y.x2) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = y ~ x2, data = df) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -1.254 -0.888 -0.485 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | x2 -0.846 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 1.4 on 8 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > all.x2 <- summary(lm.y.x2)$r.squared | ||
| + | > all.x2 | ||
| + | [1] 0.4793 | ||
| + | > spr2.y.x2.x1 | ||
| + | [1] 0.167 | ||
| + | > cma.1 <- all.x2 - spr2.y.x2.x1 | ||
| + | > cma.1 | ||
| + | [1] 0.3123 | ||
| + | > | ||
| + | > # 2. | ||
| + | > summary(lm.y.x1) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = y ~ x1, data = df) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -1.519 -0.897 -0.130 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | x1 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 1.18 on 8 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > all.x1 <- summary(lm.y.x1)$r.squared | ||
| + | > all.x1 | ||
| + | [1] 0.6311 | ||
| + | > spr2.y.x1.x2 | ||
| + | [1] 0.3189 | ||
| + | > cma.2 <- all.x1 - spr2.y.x1.x2 | ||
| + | > cma.2 | ||
| + | [1] 0.3123 | ||
| + | > | ||
| + | > # OR 3. | ||
| + | > summary(lm.y.x1x2) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = y ~ x1 + x2, data = df) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -1.217 -0.578 -0.151 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | x1 | ||
| + | x2 -0.54473 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 0.93 on 7 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > r2.y.x1x2 <- summary(lm.y.x1x2)$r.square | ||
| + | > r2.y.x1x2 | ||
| + | [1] 0.7981 | ||
| + | > spr2.y.x1.x2 | ||
| + | [1] 0.3189 | ||
| + | > spr2.y.x2.x1 | ||
| + | [1] 0.167 | ||
| + | > cma.3 <- r2.y.x1x2 - (spr2.y.x1.x2 + spr2.y.x2.x1) | ||
| + | > bcd - bd | ||
| + | [1] 0.3123 | ||
| + | > cma.3 | ||
| + | [1] 0.3123 | ||
| + | > | ||
| + | > cma.1 | ||
| + | [1] 0.3123 | ||
| + | > cma.2 | ||
| + | [1] 0.3123 | ||
| + | > cma.3 | ||
| + | [1] 0.3123 | ||
| + | > | ||
| + | > # OR 애초에 simple regression과 multiple | ||
| + | > # regression에서 얻은 R2을 가지고 | ||
| + | > # 공통설명력을 알아볼 수도 있었다. | ||
| + | > r2.y.x1 <- summary(lm.y.x1)$r.square | ||
| + | > r2.y.x2 <- summary(lm.y.x2)$r.square | ||
| + | > r2.y.x1 | ||
| + | [1] 0.6311 | ||
| + | > r2.y.x2 | ||
| + | [1] 0.4793 | ||
| + | > r2.y.x1x2 <- summary(lm.y.x1x2)$r.square | ||
| + | > r2.y.x1x2 | ||
| + | [1] 0.7981 | ||
| + | > cma.4 <- r2.y.x1 + r2.y.x2 - r2.y.x1x2 | ||
| + | > cma.4 | ||
| + | [1] 0.3123 | ||
| + | > | ||
| + | > # Note that sorting out unique and common | ||
| + | > # explanation area is only possible with | ||
| + | > # semi-partial correlation determinant | ||
| + | > # NOT partial correlation determinant | ||
| + | > # because only semi-partial correlation | ||
| + | > # shares the same denominator (as total | ||
| + | > # y). | ||
| + | > ############################################# | ||
| + | > | ||
| + | > | ||
| </ | </ | ||
| + | ====== explanation. added ====== | ||
| + | {{: | ||
| + | < | ||
| + | # ex. | ||
| + | # resid(lm(y~x1, | ||
| + | # resid(lm(y~x2, | ||
| + | # resid(lm(y~x1+x2, | ||
| + | # b / delta.y = ? | ||
| + | # ce / delta.x2 = ? | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | # exp.added | ||
| + | spcor.test(df$y, | ||
| + | spcor.test(df$y, | ||
| + | spcor.test(df$y, | ||
| + | spcor.test(df$y, | ||
| + | summary(lm(y~x1+x2, | ||
| + | |||
| + | b <- spcor.test(df$y, | ||
| + | d <- spcor.test(df$y, | ||
| + | bcd <- summary(lm(y~x1+x2, | ||
| + | |||
| + | summary(lm(df$y~df$x1+df$x2, | ||
| + | (spcor.test(df$y, | ||
| + | | ||
| + | bcd - (b + d) | ||
| + | |||
| + | </ | ||
| + | ====== Another one ====== | ||
| + | [[:partial and semipartial correlation]] | ||
c/ms/2025/schedule/w13.lecture.note.1748822875.txt.gz · Last modified: by hkimscil
