c:ms:2025:schedule:w13.lecture.note
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| c:ms:2025:schedule:w13.lecture.note [2025/06/02 00:08] – hkimscil | c:ms:2025:schedule:w13.lecture.note [2025/06/08 23:51] (current) – [output] hkimscil | ||
|---|---|---|---|
| Line 5: | Line 5: | ||
| # | # | ||
| rm(list=ls()) | rm(list=ls()) | ||
| - | d <- read.csv(" | + | df <- read.csv(" |
| - | d | + | df |
| - | colnames(d) <- c(" | + | colnames(df) <- c(" |
| - | d | + | df |
| # y = 통장갯수 | # y = 통장갯수 | ||
| # x1 = 인컴 | # x1 = 인컴 | ||
| # x2 = 부양가족수 | # x2 = 부양가족수 | ||
| - | lm.y.x1 <- lm(y ~ x1, data=d) | + | lm.y.x1 <- lm(y ~ x1, data=df) |
| summary(lm.y.x1) | summary(lm.y.x1) | ||
| anova(lm.y.x1) | anova(lm.y.x1) | ||
| + | cor(df$x1, df$y)^2 | ||
| + | summary(lm.y.x1)$r.squared | ||
| - | lm.y.x2 <- lm(y ~ x2, data=d) | + | |
| + | lm.y.x2 <- lm(y ~ x2, data=df) | ||
| summary(lm.y.x2) | summary(lm.y.x2) | ||
| anova(lm.y.x2) | anova(lm.y.x2) | ||
| + | cor(df$x2, df$y)^2 | ||
| + | summary(lm.y.x2)$r.squared | ||
| - | lm.y.x1x2 <- lm(y ~ x1+x2, data=d) | + | |
| + | lm.y.x1x2 <- lm(y ~ x1+x2, data=df) | ||
| summary(lm.y.x1x2) | summary(lm.y.x1x2) | ||
| anova(lm.y.x1x2) | anova(lm.y.x1x2) | ||
| + | bcd <- summary(lm.y.x1x2)$r.squared | ||
| + | bcd | ||
| + | bc.cd <- summary(lm.y.x1)$r.squared + summary(lm.y.x2)$r.squared | ||
| + | bc.cd | ||
| + | bc.cd - bcd # note that this is the amount that shared by the two IVs | ||
| + | |||
| lm.y.x1x2$coefficient | lm.y.x1x2$coefficient | ||
| - | # y.hat = 6.399103 + (0.01184145)*x1 + (−0.54472725)*x2 | + | # y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2 |
| a <- lm.y.x1x2$coefficient[1] | a <- lm.y.x1x2$coefficient[1] | ||
| b1 <- lm.y.x1x2$coefficient[2] | b1 <- lm.y.x1x2$coefficient[2] | ||
| Line 34: | Line 46: | ||
| b2 | b2 | ||
| - | y.pred <- a + (b1 * x1) + (b2 * x2) | + | y.pred <- a + (b1 * df$x1) + (b2 * df$x2) |
| y.pred | y.pred | ||
| # or | # or | ||
| Line 40: | Line 52: | ||
| head(y.pred == y.pred2) | head(y.pred == y.pred2) | ||
| - | y.real <- y | + | y.real <- df$y |
| y.real | y.real | ||
| - | y.mean <- mean(y) | + | y.mean <- mean(df$y) |
| y.mean | y.mean | ||
| + | deviation.score <- df$y - y.mean | ||
| + | ds <- deviation.score | ||
| res <- y.real - y.pred | res <- y.real - y.pred | ||
| reg <- y.pred - y.mean | reg <- y.pred - y.mean | ||
| Line 50: | Line 64: | ||
| # remember y is sum of res + reg + y.mean | # remember y is sum of res + reg + y.mean | ||
| y2 <- res + reg + y.mean | y2 <- res + reg + y.mean | ||
| - | y==y2 | + | df$y==y2 |
| + | ss.tot <- sum(ds^2) | ||
| ss.res <- sum(res^2) | ss.res <- sum(res^2) | ||
| ss.reg <- sum(reg^2) | ss.reg <- sum(reg^2) | ||
| - | ss.tot <- var(y) * (length(y)-1) | + | ss.tot2 <- var(df$y) * (length(df$y)-1) |
| ss.tot | ss.tot | ||
| + | ss.tot2 | ||
| ss.res | ss.res | ||
| ss.reg | ss.reg | ||
| Line 62: | Line 78: | ||
| k <- 3 # # of parameters a, b1, b2 | k <- 3 # # of parameters a, b1, b2 | ||
| - | df.tot <- length(y)-1 | + | df.tot <- length(df$y)-1 |
| df.reg <- k - 1 | df.reg <- k - 1 | ||
| df.res <- df.tot - df.reg | df.res <- df.tot - df.reg | ||
| Line 78: | Line 94: | ||
| summary(lm.y.x1x2) | summary(lm.y.x1x2) | ||
| anova(lm.y.x1x2) | anova(lm.y.x1x2) | ||
| + | |||
| + | summary(lm(y~x2+x1, | ||
| + | anova(lm(y~x2+x1, | ||
| + | |||
| # note on 2 t-tests in summary | # note on 2 t-tests in summary | ||
| # anova에서의 x1, x2에 대한 테스트와 | # anova에서의 x1, x2에 대한 테스트와 | ||
| Line 89: | Line 109: | ||
| # 테스트하기 때문에 그러함 | # 테스트하기 때문에 그러함 | ||
| + | # 또한 anova test에서 두번째 IV의 F값은 | ||
| + | # summary(lm)에서 두번 째 IV의 t값의 제곱값 | ||
| + | # 임을 이해. 이는 두번 째 IV의 설명력을 나 | ||
| + | # 타내는 부분이 lm과 anova 모두 같기 때문 | ||
| + | # 반면에 첫번째 IV의 경우에는 lm 분석 때에는 | ||
| + | # 고유의 설명력만을 가지고 (semi-partial cor^2) | ||
| + | # 판단을 하는 반면에, anova는 x2와 공유하는 | ||
| + | # 설명력도 포함해서 분석하기 때문에 t값의 | ||
| + | # 제곱이 F값이 되지 못함 | ||
| + | |||
| + | # beta에 대한 설명 | ||
| # beta coefficient (standardized b) | # beta coefficient (standardized b) | ||
| # beta <- b * (sd(x)/ | # beta <- b * (sd(x)/ | ||
| - | beta1 <- b1 * (sd(x1)/ | + | beta1 <- b1 * (sd(df$x1)/sd(df$y)) |
| - | beta2 <- b2 * (sd(x2)/ | + | beta2 <- b2 * (sd(df$x2)/sd(df$y)) |
| beta1 | beta1 | ||
| beta2 | beta2 | ||
| Line 105: | Line 136: | ||
| # understand with diagrams first | # understand with diagrams first | ||
| # then calculate with r | # then calculate with r | ||
| - | lm.tmp.1 <- lm(x2~x1, data=d) | + | lm.tmp.1 <- lm(x2~x1, data=df) |
| res.x2.x1 <- lm.tmp.1$residuals | res.x2.x1 <- lm.tmp.1$residuals | ||
| - | lm.tmp.2 <- lm(y~x1, data=d) | + | lm.tmp.2 <- lm(y~x1, data=df) |
| res.y.x1 <- lm.tmp.2$residuals | res.y.x1 <- lm.tmp.2$residuals | ||
| - | lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d) | + | lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df) |
| summary(lm.tmp.3) | summary(lm.tmp.3) | ||
| + | summary(lm.tmp.3)$r.squared | ||
| + | sqrt(summary(lm.tmp.3)$r.squared) | ||
| # install.packages(" | # install.packages(" | ||
| library(ppcor) | library(ppcor) | ||
| - | partial.r <- pcor.test(y, | + | partial.r <- pcor.test(df$y, df$x2, df$x1) |
| - | partial.r | + | |
| str(partial.r) | str(partial.r) | ||
| + | partial.r$estimate | ||
| summary(lm.tmp.3) | summary(lm.tmp.3) | ||
| summary(lm.tmp.3)$r.square | summary(lm.tmp.3)$r.square | ||
| Line 125: | Line 157: | ||
| # x1's own explanation? | # x1's own explanation? | ||
| - | lm.tmp.4 <- lm(x1~x2, data=d) | + | lm.tmp.4 <- lm(x1~x2, data=df) |
| res.x1.x2 <- lm.tmp.4$residuals | res.x1.x2 <- lm.tmp.4$residuals | ||
| - | lm.tmp.5 <- lm(y~x2, data=d) | + | lm.tmp.5 <- lm(y~x2, data=df) |
| res.y.x2 <- lm.tmp.5$residuals | res.y.x2 <- lm.tmp.5$residuals | ||
| - | lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d) | + | lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df) |
| summary(lm.tmp.6) | summary(lm.tmp.6) | ||
| - | partial.r <- pcor.test(y, | + | partial.r <- pcor.test(df$y, df$x1, df$x2) |
| str(partial.r) | str(partial.r) | ||
| partial.r$estimate # this is partial correlation, | partial.r$estimate # this is partial correlation, | ||
| # in order to get pr2, you should ^2 | # in order to get pr2, you should ^2 | ||
| partial.r$estimate^2 | partial.r$estimate^2 | ||
| + | summary(lm.tmp.6)$r.squared | ||
| ####################################################### | ####################################################### | ||
| - | # | ||
| # semipartial correlation coefficient and spr2 | # semipartial correlation coefficient and spr2 | ||
| # | # | ||
| - | spr.1 <- spcor.test(y, | + | spr.y.x2.x1 |
| - | spr.2 <- spcor.test(y, | + | spr.y.x1.x2 |
| - | spr.1 | + | spr.y.x2.x1 |
| - | spr.2 | + | spr.y.x1.x2 |
| - | spr.1$estimate^2 | + | spr2.y.x2.x1 <- spr.y.x2.x1$estimate^2 |
| - | spr.2$estimate^2 | + | spr2.y.x1.x2 <- spr.y.x1.x2$estimate^2 |
| + | spr2.y.x2.x1 | ||
| + | spr2.y.x1.x2 | ||
| - | lm.tmp.7 <- lm(y ~ res.x2.x1, data = d) | + | lm.tmp.7 <- lm(y ~ res.x2.x1, data=df) |
| summary(lm.tmp.7) | summary(lm.tmp.7) | ||
| - | spr.1$estimate^2 | + | spr2.y.x2.x1 |
| - | lm.tmp.8 <- lm(y~res.x1.x2, | + | lm.tmp.8 <- lm(y~res.x1.x2, |
| summary(lm.tmp.8) | summary(lm.tmp.8) | ||
| - | spr.2$estimate^2 | + | spr2.y.x1.x2 |
| + | |||
| + | bcd # remember bcd in the above? | ||
| + | bd <- spr2.y.x2.x1 + spr2.y.x1.x2 | ||
| + | bd | ||
| + | bcd - bd | ||
| ####################################################### | ####################################################### | ||
| Line 164: | Line 203: | ||
| summary(lm.y.x2) | summary(lm.y.x2) | ||
| all.x2 <- summary(lm.y.x2)$r.squared | all.x2 <- summary(lm.y.x2)$r.squared | ||
| - | sp.x2 <- spr.1$estimate^2 | ||
| all.x2 | all.x2 | ||
| - | sp.x2 | + | spr2.y.x2.x1 |
| - | cma.1 <- all.x2 - sp.x2 | + | cma.1 <- all.x2 - spr2.y.x2.x1 |
| cma.1 | cma.1 | ||
| Line 173: | Line 211: | ||
| summary(lm.y.x1) | summary(lm.y.x1) | ||
| all.x1 <- summary(lm.y.x1)$r.squared | all.x1 <- summary(lm.y.x1)$r.squared | ||
| - | sp.x1 <- spr.2$estimate^2 | ||
| all.x1 | all.x1 | ||
| - | sp.x1 | + | spr2.y.x1.x2 |
| - | cma.2 <- all.x1 - sp.x1 | + | cma.2 <- all.x1 - spr2.y.x1.x2 |
| cma.2 | cma.2 | ||
| Line 183: | Line 220: | ||
| r2.y.x1x2 <- summary(lm.y.x1x2)$r.square | r2.y.x1x2 <- summary(lm.y.x1x2)$r.square | ||
| r2.y.x1x2 | r2.y.x1x2 | ||
| - | sp.x1 | + | spr2.y.x1.x2 |
| - | sp.x2 | + | spr2.y.x2.x1 |
| - | cma.3 <- r2.y.x1x2 - (sp.x1 + sp.x2) | + | cma.3 <- r2.y.x1x2 - (spr2.y.x1.x2 + spr2.y.x2.x1) |
| + | bcd - bd | ||
| cma.3 | cma.3 | ||
| Line 195: | Line 233: | ||
| # regression에서 얻은 R2을 가지고 | # regression에서 얻은 R2을 가지고 | ||
| # 공통설명력을 알아볼 수도 있었다. | # 공통설명력을 알아볼 수도 있었다. | ||
| - | r2.x1 <- summary(lm.y.x1)$r.square | + | r2.y.x1 <- summary(lm.y.x1)$r.square |
| - | r2.x2 <- summary(lm.y.x2)$r.square | + | r2.y.x2 <- summary(lm.y.x2)$r.square |
| - | r2.x1x2 <- summary(lm.y.x1x2)$r.square | + | r2.y.x1 |
| - | r2.x1 + r2.x2 - r2.x1x2 | + | r2.y.x2 |
| + | r2.y.x1x2 <- summary(lm.y.x1x2)$r.square | ||
| + | r2.y.x1x2 | ||
| + | cma.4 <- r2.y.x1 + r2.y.x2 - r2.y.x1x2 | ||
| + | cma.4 | ||
| # Note that sorting out unique and common | # Note that sorting out unique and common | ||
| Line 209: | Line 251: | ||
| ############################################# | ############################################# | ||
| - | ############################################# | ||
| - | # scratch | ||
| - | ############################################# | ||
| - | lm.x1x2 <- lm(x1~x2, data = d) | ||
| - | lm.x2x1 <- lm(x2~x1, data = d) | ||
| - | summary(lm.y.x1) | + | </ |
| - | summary(lm.y.x2) | + | ====== output ====== |
| - | summary(lm.y.x1x2) | + | < |
| + | > # multiple regression: a simple e.g. | ||
| + | > # | ||
| + | > # | ||
| + | > rm(list=ls()) | ||
| + | > df <- read.csv(" | ||
| + | > df | ||
| + | | ||
| + | 1 6 220 5 | ||
| + | 2 5 190 6 | ||
| + | 3 7 260 3 | ||
| + | 4 7 200 4 | ||
| + | 5 8 330 2 | ||
| + | 6 | ||
| + | 7 8 210 3 | ||
| + | 8 | ||
| + | 9 9 320 1 | ||
| + | 10 | ||
| + | > | ||
| + | > colnames(df) <- c(" | ||
| + | > df | ||
| + | y x1 x2 | ||
| + | 1 6 220 5 | ||
| + | 2 5 190 6 | ||
| + | 3 7 260 3 | ||
| + | 4 7 200 4 | ||
| + | 5 8 330 2 | ||
| + | 6 10 490 4 | ||
| + | 7 8 210 3 | ||
| + | 8 11 380 2 | ||
| + | 9 9 320 1 | ||
| + | 10 9 270 3 | ||
| + | > # y = 통장갯수 | ||
| + | > # x1 = 인컴 | ||
| + | > # x2 = 부양가족수 | ||
| + | > lm.y.x1 <- lm(y ~ x1, data=df) | ||
| + | > summary(lm.y.x1) | ||
| - | resx1 <- lm.x1x2$residuals | + | Call: |
| - | regx1 <- lm.x1x2$fitted.values-mean(x1) | + | lm(formula |
| - | lm.y.resx1 <- lm(y~resx1, data = d) | + | |
| - | summary(lm.y.resx1) # 0.3189 no sig. | + | |
| - | lm.y.regx1 <- lm(y~regx1, data = d) | + | |
| - | summary(lm.y.regx1) # 0.4793, p = .027; | + | |
| - | # but, this is 100% correlated part with x2 | + | |
| - | summary(lm.y.x2) | + | |
| - | cor(regx1, | + | |
| - | cor(resx1, | + | |
| - | resx2 <- lm.x2x1$residuals | + | Residuals: |
| - | regx2 <- lm.x2x1$fitted.values-mean(x2) | + | |
| - | lm.y.resx2 <- lm(y~resx2, data = d) | + | -1.519 -0.897 -0.130 1.006 1.580 |
| - | summary(lm.y.resx2) | + | |
| - | lm.y.regx2 <- lm(y~regx2, data = d) | + | |
| - | summary(lm.y.regx2) | + | |
| - | summary(lm.y.x1) | + | |
| - | cor(regx2,x1) # 1 because of this | + | Coefficients: |
| - | cor(resx2,x1) # 0 | + | Estimate Std. Error t value Pr(>|t|) |
| - | # the below two are the same | + | (Intercept) |
| - | summary(lm(y~x1, data = d)) | + | x1 0.01527 |
| - | summary(lm(y~regx2, | + | --- |
| + | Signif. codes: | ||
| - | summary(lm(y~x2, | + | Residual standard error: 1.18 on 8 degrees of freedom |
| - | summary(lm(y~x1, data = d)) # 0.6311 | + | Multiple R-squared: |
| - | summary(lm(y~resx1, data = d)) # 0.3189 | + | F-statistic: |
| - | summary(lm(y~x1+x2, | + | |
| - | summary(lm(y~x1, data = d))$r.square | + | > anova(lm.y.x1) |
| - | summary(lm(y~resx1, data = d))$r.square | + | Analysis of Variance Table |
| - | summary(lm(y~x1, data = d))$r.square - summary(lm(y~resx1, | + | |
| - | sqrt(summary(lm(y~resx1, data = d))$r.square) | + | Response: y |
| - | spcor.test(y,x1,x2) | + | Df Sum Sq Mean Sq F value Pr(> |
| + | x1 | ||
| + | Residuals | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | > cor(df$x1, df$y)^2 | ||
| + | [1] 0.6311 | ||
| + | > summary(lm.y.x1)$r.squared | ||
| + | [1] 0.6311 | ||
| + | > | ||
| + | > | ||
| + | > lm.y.x2 <- lm(y ~ x2, data=df) | ||
| + | > summary(lm.y.x2) | ||
| - | sqrt(summary(lm(y~x1, data = d))$r.square | + | Call: |
| - | - summary(lm(y~resx1, data = d))$r.square) | + | lm(formula |
| - | (summary(lm(x1~x2, data =d))$r.square) | + | |
| - | (summary(lm(y~x1, | + | |
| - | a <- summary(lm(y~resx1, | + | Residuals: |
| - | c <- summary(lm(y~x1, | + | |
| - | b <- summary(lm(y~regx1, | + | -1.254 -0.888 -0.485 0.496 2.592 |
| - | summary(lm(y~resx1, | + | Coefficients: |
| - | summary(lm(y~resx2, data = d)) | + | |
| - | summary(lm(y~x1, data = d)) | + | (Intercept) 10.791 |
| - | summary(lm(y~x2, data = d)) | + | x2 |
| + | --- | ||
| + | Signif. codes: | ||
| - | cor(regx2, x1) | + | Residual standard error: 1.4 on 8 degrees of freedom |
| - | cor(regx1, x2) | + | Multiple R-squared: |
| + | F-statistic: | ||
| - | ab <- summary(lm(y~x1, data = d))$r.square | + | > anova(lm.y.x2) |
| - | a <- summary(lm(y~resx1, | + | Analysis of Variance Table |
| - | ab | + | |
| - | a | + | |
| - | ab-a | + | |
| - | bc <- summary(lm(y~x2, data = d))$r.square | + | Response: y |
| - | c <- summary(lm(y~resx2, data = d))$r.square | + | Df Sum Sq Mean Sq F value Pr(> |
| - | bc | + | x2 |
| - | c | + | Residuals |
| - | bc-c | + | --- |
| - | ab-a | + | Signif. codes: |
| + | > cor(df$x2, df$y)^2 | ||
| + | [1] 0.4793 | ||
| + | > summary(lm.y.x2)$r.squared | ||
| + | [1] 0.4793 | ||
| + | > | ||
| + | > | ||
| + | > lm.y.x1x2 | ||
| + | > summary(lm.y.x1x2) | ||
| - | abc <- summary(lm(y~x1+x2, data = d))$r.square | + | Call: |
| - | abc | + | lm(formula = y ~ x1 + x2, data = df) |
| - | a | + | |
| - | c | + | |
| - | bc | + | |
| - | a+(bc-c)+c | + | |
| - | a+bc | + | |
| - | ab+c | + | |
| - | summary(lm(y~regx2+regx1, | + | Residuals: |
| + | | ||
| + | -1.217 -0.578 -0.151 | ||
| - | summary(lm(y~x1+x2, data = d)) | + | Coefficients: |
| - | summary(lm(y~resx1+resx2, | + | Estimate Std. Error t value Pr(>|t|) |
| + | (Intercept) | ||
| + | x1 | ||
| + | x2 -0.54473 | ||
| + | --- | ||
| + | Signif. codes: | ||
| - | summary(lm(y~resx1, | + | Residual standard error: 0.93 on 7 degrees of freedom |
| - | summary(lm(y~resx2, | + | Multiple R-squared: |
| - | cor(resx1,resx2) | + | F-statistic: |
| - | cor(x1,x2) | + | |
| - | spcor.test(y, | + | > anova(lm.y.x1x2) |
| - | spcor.test(y, | + | Analysis of Variance Table |
| - | cor(y,x1)^2 | + | Response: y |
| - | cor(y,x2)^2 | + | Df Sum Sq Mean Sq F value Pr(> |
| + | x1 | ||
| + | x2 | ||
| + | Residuals | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | > bcd <- summary(lm.y.x1x2)$r.squared | ||
| + | > bcd | ||
| + | [1] 0.7981 | ||
| + | > bc.cd <- summary(lm.y.x1)$r.squared + summary(lm.y.x2)$r.squared | ||
| + | > bc.cd | ||
| + | [1] 1.11 | ||
| + | > bc.cd - bcd # note that this is the amount that shared by the two IVs | ||
| + | [1] 0.3123 | ||
| + | > | ||
| + | > | ||
| + | > lm.y.x1x2$coefficient | ||
| + | (Intercept) | ||
| + | 6.39910 | ||
| + | > # y.hat = 6.399103 + (0.01184145)*x1 + (? | ||
| + | > a <- lm.y.x1x2$coefficient[1] | ||
| + | > b1 <- lm.y.x1x2$coefficient[2] | ||
| + | > b2 <- lm.y.x1x2$coefficient[3] | ||
| + | > a | ||
| + | (Intercept) | ||
| + | 6.399 | ||
| + | > b1 | ||
| + | x1 | ||
| + | 0.01184 | ||
| + | > b2 | ||
| + | x2 | ||
| + | -0.5447 | ||
| + | > | ||
| + | > y.pred <- a + (b1 * df$x1) + (b2 * df$x2) | ||
| + | > y.pred | ||
| + | | ||
| + | > # or | ||
| + | > y.pred2 <- predict(lm.y.x1x2) | ||
| + | > head(y.pred == y.pred2) | ||
| + | | ||
| + | TRUE TRUE TRUE TRUE TRUE TRUE | ||
| + | > | ||
| + | > y.real <- df$y | ||
| + | > y.real | ||
| + | | ||
| + | > y.mean <- mean(df$y) | ||
| + | > y.mean | ||
| + | [1] 8 | ||
| + | > | ||
| + | > deviation.score <- df$y - y.mean | ||
| + | > ds <- deviation.score | ||
| + | > res <- y.real - y.pred | ||
| + | > reg <- y.pred - y.mean | ||
| + | > y.mean | ||
| + | [1] 8 | ||
| + | > # remember y is sum of res + reg + y.mean | ||
| + | > y2 <- res + reg + y.mean | ||
| + | > df$y==y2 | ||
| + | [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE | ||
| + | > | ||
| + | > ss.tot <- sum(ds^2) | ||
| + | > ss.res <- sum(res^2) | ||
| + | > ss.reg <- sum(reg^2) | ||
| + | > | ||
| + | > ss.tot2 <- var(df$y) * (length(df$y)-1) | ||
| + | > ss.tot | ||
| + | [1] 30 | ||
| + | > ss.tot2 | ||
| + | [1] 30 | ||
| + | > ss.res | ||
| + | [1] 6.056 | ||
| + | > ss.reg | ||
| + | [1] 23.94 | ||
| + | > ss.res+ss.reg | ||
| + | [1] 30 | ||
| + | > | ||
| + | > k <- 3 # # of parameters a, b1, b2 | ||
| + | > df.tot <- length(df$y)-1 | ||
| + | > df.reg <- k - 1 | ||
| + | > df.res <- df.tot - df.reg | ||
| + | > | ||
| + | > ms.reg <- ss.reg/ | ||
| + | > ms.res <- ss.res/ | ||
| + | > ms.reg | ||
| + | [1] 11.97 | ||
| + | > ms.res | ||
| + | [1] 0.8652 | ||
| + | > f.val <- ms.reg/ | ||
| + | > f.val | ||
| + | [1] 13.84 | ||
| + | > p.val <- pf(f.val, df.reg, df.res, lower.tail = F) | ||
| + | > p.val | ||
| + | [1] 0.003696 | ||
| + | > | ||
| + | > # double check | ||
| + | > summary(lm.y.x1x2) | ||
| - | cor(y, | + | Call: |
| - | cor(y,resx2)^2 | + | lm(formula = y ~ x1 + x2, data = df) |
| - | + | ||
| - | spcor.test(y, | + | |
| - | spcor.test(y,x2,x1)$estimate^2 | + | |
| - | ############################################## | + | Residuals: |
| + | | ||
| + | -1.217 -0.578 -0.151 | ||
| - | # install.packages(" | + | Coefficients: |
| - | library(lavaan) | + | Estimate Std. Error t value Pr(>|t|) |
| + | (Intercept) | ||
| + | x1 | ||
| + | x2 -0.54473 | ||
| + | --- | ||
| + | Signif. codes: | ||
| - | specmod <- " | + | Residual standard error: 0.93 on 7 degrees of freedom |
| - | # path c | + | Multiple R-squared: |
| - | # identifying path c (prime) by putting c* | + | F-statistic: |
| - | y ~ c*x2 | + | |
| - | # path a | + | > anova(lm.y.x1x2) |
| - | x1 ~ a*x2 | + | Analysis of Variance Table |
| - | # path b | + | Response: |
| - | | + | Df Sum Sq Mean Sq F value Pr(> |
| - | | + | x1 1 18.93 |
| - | # indirect effect (a*b): Sobel test (Delta Method) | + | x2 |
| - | | + | Residuals |
| - | # sobel test 라 부름 | + | --- |
| - | | + | Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 |
| - | " | + | > |
| - | # Fit/ | + | > summary(lm(y~x2+x1, data=df)) |
| - | fitmod <- sem(specmod, data=d) | + | |
| - | # summarize the model | + | |
| - | summary(fitmod, | + | |
| - | summary(lm(x2~x1, | + | Call: |
| - | summary(lm(y~resx2, data = d)) | + | lm(formula |
| - | summary(lm(y~x1, data = d)) | + | |
| - | summary(lm(y~x2, | + | |
| - | ############################################# | + | |
| - | # e.g. 2 | + | |
| - | ############################################# | + | |
| - | d.yyk <- read.csv(" | + | |
| - | d.yyk | + | |
| - | d.yyk <- subset(d.yyk, | + | |
| - | d.yyk | + | |
| - | attach(d.yyk) | + | Residuals: |
| - | lm.happiness.bmistress <- lm(happiness ~ bmi + stress, data=d.yyk) | + | |
| - | summary(lm.happiness.bmistress) | + | -1.217 -0.578 -0.151 0.664 1.191 |
| - | anova(lm.happiness.bmistress) | + | |
| - | plot(d.yyk) | + | |
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | x2 -0.54473 | ||
| + | x1 | ||
| + | --- | ||
| + | Signif. codes: | ||
| - | lm.happiness.bmi <- lm(happiness ~ bmi, data=d.yyk) | + | Residual standard error: 0.93 on 7 degrees of freedom |
| - | summary(lm.happiness.bmi) | + | Multiple R-squared: |
| - | anova(lm.happiness.bmi) | + | F-statistic: |
| - | lm.happiness.stress <- lm(happiness | + | > anova(lm(y~x2+x1, data=df)) |
| - | summary(lm.happiness.stress) | + | Analysis of Variance Table |
| - | anova(lm.happiness.stress) | + | |
| - | # 공통부분을 찾기 | + | Response: y |
| - | # methd. 1 | + | Df Sum Sq Mean Sq F value Pr(> |
| - | ab <- summary(lm.happiness.bmi)$r.square | + | x2 |
| - | bc <- summary(lm.happiness.stress)$r.square | + | x1 1 9.57 9.57 11.1 0.0127 * |
| - | abc <- summary(lm.happiness.bmistress)$r.square | + | Residuals |
| - | b <- ab+bc-abc | + | --- |
| - | b | + | Signif. codes: |
| - | # spcor | + | > |
| - | # spcor method | + | > # note on 2 t-tests in summary |
| - | spcor.test(happiness, | + | > # anova에서의 x1, x2에 대한 테스트와 |
| - | a <- spcor.test(happiness, | + | > # lm에서의 x1, x2에 대한 테스트 (t-test) 간에 |
| - | spcor.test(happiness, | + | > # 차이가 있음에 주의 (x1, x2에 대한 Pr 값이 |
| - | c <- spcor.test(happiness, | + | > # 다름). 그 이유는 |
| - | abc <- summary(lm.happiness.bmistress)$r.square | + | > # t-tests는 __pr__ 테스트로 테스트를 |
| - | abc-a-c | + | > # (spr, zero_order_r 테스트가 아님) 하고 |
| + | > # anova test는 x1 전체에 대한 테스트 하고 | ||
| + | > # x2는 x1에 대한 테스트 외에 나머지를 가지고 | ||
| + | > # 테스트하기 때문에 그러함 | ||
| + | > | ||
| + | > # 또한 anova test에서 두번째 IV의 F값은 | ||
| + | > # summary(lm)에서 두번 째 IV의 t값의 제곱값 | ||
| + | > # 임을 이해. 이는 두번 째 IV의 설명력을 나 | ||
| + | > # 타내는 부분이 lm과 anova 모두 같기 때문 | ||
| + | > # 반면에 첫번째 IV의 경우에는 lm 분석 때에는 | ||
| + | > # 고유의 설명력만을 가지고 (semi-partial cor^2) | ||
| + | > # 판단을 하는 반면에, anova는 x2와 공유하는 | ||
| + | > # 설명력도 포함해서 분석하기 때문에 t값의 | ||
| + | > # 제곱이 F값이 되지 못함 | ||
| + | > | ||
| + | > # beta에 대한 설명 | ||
| + | > # beta coefficient | ||
| + | > # beta <- b * (sd(x)/sd(y)) | ||
| + | > beta1 <- b1 * (sd(df$x1)/sd(df$y)) | ||
| + | > beta2 <- b2 * (sd(df$x2)/sd(df$y)) | ||
| + | > beta1 | ||
| + | x1 | ||
| + | 0.6161 | ||
| + | > beta2 | ||
| + | | ||
| + | -0.4459 | ||
| + | > | ||
| + | > # install.packages(" | ||
| + | > library(lm.beta) | ||
| + | > lm.beta(lm.y.x1x2) | ||
| - | # tidious one | + | Call: |
| - | # omitt | + | lm(formula = y ~ x1 + x2, data = df) |
| - | # bmi --> stress --> happiness | + | Standardized Coefficients:: |
| - | summary(lm.happiness.bmi) | + | (Intercept) |
| - | summary(lm.happiness.stress) | + | NA 0.6161 -0.4459 |
| - | summary(lm.happiness.bmistress) | + | |
| - | lm.bmi.stress | + | > |
| - | res.lm.bmi.stress | + | > ####################################################### |
| - | lm.happiness.res.lm.bmi.stress | + | > # partial correlation coefficient and pr2 |
| - | summary(lm.happiness.res.lm.bmi.stress) | + | > # x2's explanation? |
| + | > # understand with diagrams first | ||
| + | > # then calculate with r | ||
| + | > lm.tmp.1 <- lm(x2~x1, data=df) | ||
| + | > res.x2.x1 <- lm.tmp.1$residuals | ||
| + | > | ||
| + | > lm.tmp.2 <- lm(y~x1, data=df) | ||
| + | > res.y.x1 <- lm.tmp.2$residuals | ||
| + | > | ||
| + | > lm.tmp.3 | ||
| + | > summary(lm.tmp.3) | ||
| - | lm.stress.bmi <- lm(stress~bmi, data = d.yyk) | + | Call: |
| - | reg.lm.stress.bmi <- lm.stress.bmi$fitted.values - mean(stress) | + | lm(formula |
| - | lm.happiness.reg.lm.stress.bmi <- lm(happiness~reg.lm.stress.bmi) | + | |
| - | summary(lm.happiness.reg.lm.stress.bmi) | + | |
| - | summary(lm.happiness.stress) | + | |
| - | a <- summary(lm.happiness.res.lm.bmi.stress)$r.square | + | Residuals: |
| - | b <- summary(lm.happiness.reg.lm.stress.bmi)$r.square | + | |
| - | c <- summary(lm.happiness.stress)$r.square | + | -1.217 -0.578 -0.151 0.664 1.191 |
| - | a | + | |
| - | b | + | |
| - | c | + | |
| - | a+b | + | |
| - | lm.stress.bmi <- lm(stress~bmi, data = d.yyk) | + | Coefficients: |
| - | lm.bmi.stress <- lm(bmi~stress, | + | |
| - | summary(lm.stress.bmi) | + | (Intercept) |
| - | # stress.hat = a + b * bmi | + | res.x2.x1 -5.45e-01 |
| - | stress.hat <- -1.40167 + (0.16787 * bmi) | + | --- |
| - | stress.hat | + | Signif. codes: |
| - | reg.stress | + | |
| - | summary(lm.bmi.stress) | + | Residual standard error: 0.87 on 8 degrees of freedom |
| - | reg.bmi | + | Multiple R-squared: |
| + | F-statistic: | ||
| + | > summary(lm.tmp.3)$r.squared | ||
| + | [1] 0.4527 | ||
| + | > sqrt(summary(lm.tmp.3)$r.squared) | ||
| + | [1] 0.6729 | ||
| + | > # install.packages(" | ||
| + | > library(ppcor) | ||
| + | > partial.r <- pcor.test(df$y, | ||
| + | > str(partial.r) | ||
| + | ' | ||
| + | $ estimate : num -0.673 | ||
| + | $ p.value | ||
| + | $ statistic: num -2.41 | ||
| + | $ n : int 10 | ||
| + | $ gp : num 1 | ||
| + | $ Method | ||
| + | > partial.r$estimate | ||
| + | [1] -0.6729 | ||
| + | > summary(lm.tmp.3) | ||
| + | Call: | ||
| + | lm(formula = res.y.x1 ~ res.x2.x1, data = df) | ||
| - | reg.stress <- lm.stress.bmi$fitted.values | + | Residuals: |
| - | reg.bmi <- lm.bmi.stress$fitted.values | + | |
| + | -1.217 -0.578 -0.151 0.664 1.191 | ||
| - | reg.stress | + | Coefficients: |
| - | reg.bmi | + | |
| - | d.yyk | + | (Intercept) |
| + | res.x2.x1 | ||
| + | --- | ||
| + | Signif. codes: | ||
| - | lm.tmp <- lm(reg.stress~reg.bmi) | + | Residual standard error: 0.87 on 8 degrees of freedom |
| - | lm.tmp2 <- lm(reg.bmi~reg.stress) | + | Multiple R-squared: |
| - | summary(lm.tmp) | + | F-statistic: |
| - | summary(lm.tmp2) | + | |
| - | sqrt(summary(lm.tmp)$r.square) | + | > summary(lm.tmp.3)$r.square |
| - | summary(lm.stress.bmi) | + | [1] 0.4527 |
| - | summary(lm.bmi.stress) | + | > partial.r$estimate^2 |
| + | [1] 0.4527 | ||
| + | > | ||
| + | > | ||
| + | > # x1's own explanation? | ||
| + | > lm.tmp.4 <- lm(x1~x2, data=df) | ||
| + | > res.x1.x2 <- lm.tmp.4$residuals | ||
| + | > | ||
| + | > lm.tmp.5 <- lm(y~x2, data=df) | ||
| + | > res.y.x2 <- lm.tmp.5$residuals | ||
| + | > | ||
| + | > lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df) | ||
| + | > summary(lm.tmp.6) | ||
| - | summary(lm(happiness~stress, | + | Call: |
| - | summary(lm(happiness~reg.bmi, data = d.yyk)) | + | lm(formula |
| - | summary(lm(happiness~bmi, | + | Residuals: |
| - | summary(lm(happiness~reg.stress, data=d.yyk)) | + | Min 1Q Median |
| + | -1.217 -0.578 -0.151 | ||
| - | plot(tmp.a, tmp.b) | + | Coefficients: |
| - | plot(tmp.b, tmp.a) | + | Estimate Std. Error t value Pr(>|t|) |
| + | (Intercept) 1.33e-17 | ||
| + | res.x1.x2 | ||
| + | --- | ||
| + | Signif. codes: | ||
| - | head(d.yyk) | + | Residual standard error: 0.87 on 8 degrees of freedom |
| - | str(d.yyk) | + | Multiple R-squared: |
| - | d.yyk | + | F-statistic: |
| - | ######################### | + | |
| + | > | ||
| + | > partial.r <- pcor.test(df$y, | ||
| + | > str(partial.r) | ||
| + | 'data.frame': | ||
| + | $ estimate : num 0.783 | ||
| + | $ p.value | ||
| + | $ statistic: num 3.33 | ||
| + | $ n : int 10 | ||
| + | $ gp : num 1 | ||
| + | $ Method | ||
| + | > partial.r$estimate | ||
| + | [1] 0.7825 | ||
| + | > # in order to get pr2, you should ^2 | ||
| + | > partial.r$estimate^2 | ||
| + | [1] 0.6123 | ||
| + | > summary(lm.tmp.6)$r.squared | ||
| + | [1] 0.6123 | ||
| + | > | ||
| + | > ####################### | ||
| + | > # semipartial correlation coefficient and spr2 | ||
| + | > # | ||
| + | > spr.y.x2.x1 <- spcor.test(df$y, | ||
| + | > spr.y.x1.x2 <- spcor.test(df$y, | ||
| + | > spr.y.x2.x1 | ||
| + | estimate p.value statistic | ||
| + | 1 -0.4087 | ||
| + | > spr.y.x1.x2 | ||
| + | estimate p.value statistic | ||
| + | 1 | ||
| + | > spr2.y.x2.x1 <- spr.y.x2.x1$estimate^2 | ||
| + | > spr2.y.x1.x2 <- spr.y.x1.x2$estimate^2 | ||
| + | > spr2.y.x2.x1 | ||
| + | [1] 0.167 | ||
| + | > spr2.y.x1.x2 | ||
| + | [1] 0.3189 | ||
| + | > | ||
| + | > lm.tmp.7 <- lm(y ~ res.x2.x1, data=df) | ||
| + | > summary(lm.tmp.7) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = y ~ res.x2.x1, data = df) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -1.862 -1.171 -0.494 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | res.x2.x1 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 1.77 on 8 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > spr2.y.x2.x1 | ||
| + | [1] 0.167 | ||
| + | > | ||
| + | > lm.tmp.8 <- lm(y~res.x1.x2, | ||
| + | > summary(lm.tmp.8) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = y ~ res.x1.x2, data = df) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -2.664 -0.608 -0.149 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | res.x1.x2 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 1.6 on 8 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > spr2.y.x1.x2 | ||
| + | [1] 0.3189 | ||
| + | > | ||
| + | > bcd # remember bcd in the above? | ||
| + | [1] 0.7981 | ||
| + | > bd <- spr2.y.x2.x1 + spr2.y.x1.x2 | ||
| + | > bd | ||
| + | [1] 0.4859 | ||
| + | > bcd - bd | ||
| + | [1] 0.3123 | ||
| + | > | ||
| + | > ####################################################### | ||
| + | > # get the common area that explain the y variable | ||
| + | > # 1. | ||
| + | > summary(lm.y.x2) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = y ~ x2, data = df) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -1.254 -0.888 -0.485 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | x2 -0.846 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 1.4 on 8 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > all.x2 <- summary(lm.y.x2)$r.squared | ||
| + | > all.x2 | ||
| + | [1] 0.4793 | ||
| + | > spr2.y.x2.x1 | ||
| + | [1] 0.167 | ||
| + | > cma.1 <- all.x2 - spr2.y.x2.x1 | ||
| + | > cma.1 | ||
| + | [1] 0.3123 | ||
| + | > | ||
| + | > # 2. | ||
| + | > summary(lm.y.x1) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = y ~ x1, data = df) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -1.519 -0.897 -0.130 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | x1 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 1.18 on 8 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > all.x1 <- summary(lm.y.x1)$r.squared | ||
| + | > all.x1 | ||
| + | [1] 0.6311 | ||
| + | > spr2.y.x1.x2 | ||
| + | [1] 0.3189 | ||
| + | > cma.2 <- all.x1 - spr2.y.x1.x2 | ||
| + | > cma.2 | ||
| + | [1] 0.3123 | ||
| + | > | ||
| + | > # OR 3. | ||
| + | > summary(lm.y.x1x2) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = y ~ x1 + x2, data = df) | ||
| + | |||
| + | Residuals: | ||
| + | | ||
| + | -1.217 -0.578 -0.151 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | x1 | ||
| + | x2 -0.54473 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | |||
| + | Residual standard error: 0.93 on 7 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > r2.y.x1x2 <- summary(lm.y.x1x2)$r.square | ||
| + | > r2.y.x1x2 | ||
| + | [1] 0.7981 | ||
| + | > spr2.y.x1.x2 | ||
| + | [1] 0.3189 | ||
| + | > spr2.y.x2.x1 | ||
| + | [1] 0.167 | ||
| + | > cma.3 <- r2.y.x1x2 - (spr2.y.x1.x2 + spr2.y.x2.x1) | ||
| + | > bcd - bd | ||
| + | [1] 0.3123 | ||
| + | > cma.3 | ||
| + | [1] 0.3123 | ||
| + | > | ||
| + | > cma.1 | ||
| + | [1] 0.3123 | ||
| + | > cma.2 | ||
| + | [1] 0.3123 | ||
| + | > cma.3 | ||
| + | [1] 0.3123 | ||
| + | > | ||
| + | > # OR 애초에 simple regression과 multiple | ||
| + | > # regression에서 얻은 R2을 가지고 | ||
| + | > # 공통설명력을 알아볼 수도 있었다. | ||
| + | > r2.y.x1 <- summary(lm.y.x1)$r.square | ||
| + | > r2.y.x2 <- summary(lm.y.x2)$r.square | ||
| + | > r2.y.x1 | ||
| + | [1] 0.6311 | ||
| + | > r2.y.x2 | ||
| + | [1] 0.4793 | ||
| + | > r2.y.x1x2 <- summary(lm.y.x1x2)$r.square | ||
| + | > r2.y.x1x2 | ||
| + | [1] 0.7981 | ||
| + | > cma.4 <- r2.y.x1 + r2.y.x2 - r2.y.x1x2 | ||
| + | > cma.4 | ||
| + | [1] 0.3123 | ||
| + | > | ||
| + | > # Note that sorting out unique and common | ||
| + | > # explanation area is only possible with | ||
| + | > # semi-partial correlation determinant | ||
| + | > # NOT partial correlation determinant | ||
| + | > # because only semi-partial correlation | ||
| + | > # shares the same denominator (as total | ||
| + | > # y). | ||
| + | > ############################################# | ||
| + | > | ||
| + | > | ||
| </ | </ | ||
| - | ====== | + | |
| + | ====== | ||
| + | {{: | ||
| < | < | ||
| + | # ex. | ||
| + | # resid(lm(y~x1, | ||
| + | # resid(lm(y~x2, | ||
| + | # resid(lm(y~x1+x2, | ||
| + | # b / delta.y = ? | ||
| + | # ce / delta.x2 = ? | ||
| </ | </ | ||
| + | < | ||
| + | # exp.added | ||
| + | spcor.test(df$y, | ||
| + | spcor.test(df$y, | ||
| + | spcor.test(df$y, | ||
| + | spcor.test(df$y, | ||
| + | summary(lm(y~x1+x2, | ||
| + | |||
| + | b <- spcor.test(df$y, | ||
| + | d <- spcor.test(df$y, | ||
| + | bcd <- summary(lm(y~x1+x2, | ||
| + | |||
| + | summary(lm(df$y~df$x1+df$x2, | ||
| + | (spcor.test(df$y, | ||
| + | | ||
| + | bcd - (b + d) | ||
| + | |||
| + | </ | ||
| + | ====== Another one ====== | ||
| + | [[:partial and semipartial correlation]] | ||
c/ms/2025/schedule/w13.lecture.note.1748822885.txt.gz · Last modified: by hkimscil
