c:ms:2025:schedule:w13.lecture.note
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
c:ms:2025:schedule:w13.lecture.note [2025/06/04 07:50] – hkimscil | c:ms:2025:schedule:w13.lecture.note [2025/06/09 08:51] (current) – [output] hkimscil | ||
---|---|---|---|
Line 5: | Line 5: | ||
# | # | ||
rm(list=ls()) | rm(list=ls()) | ||
- | d <- read.csv(" | + | df <- read.csv(" |
- | d | + | df |
- | colnames(d) <- c(" | + | colnames(df) <- c(" |
- | d | + | df |
# y = 통장갯수 | # y = 통장갯수 | ||
# x1 = 인컴 | # x1 = 인컴 | ||
# x2 = 부양가족수 | # x2 = 부양가족수 | ||
- | lm.y.x1 <- lm(y ~ x1, data=d) | + | lm.y.x1 <- lm(y ~ x1, data=df) |
summary(lm.y.x1) | summary(lm.y.x1) | ||
anova(lm.y.x1) | anova(lm.y.x1) | ||
- | r.sq.y.x1 <- cor(x1, | + | cor(df$x1, df$y)^2 |
- | rsq.y.x1< | + | summary(lm.y.x1)$r.squared |
- | lm.y.x2 <- lm(y ~ x2, data=d) | + | lm.y.x2 <- lm(y ~ x2, data=df) |
summary(lm.y.x2) | summary(lm.y.x2) | ||
anova(lm.y.x2) | anova(lm.y.x2) | ||
- | rsq.y.x2 <- cor(x2, | + | cor(df$x2, df$y)^2 |
- | rsq.y.x2 <- summary(lm.y.x2)$r.squared | + | summary(lm.y.x2)$r.squared |
- | lm.y.x1x2 <- lm(y ~ x1+x2, data=d) | + | lm.y.x1x2 <- lm(y ~ x1+x2, data=df) |
summary(lm.y.x1x2) | summary(lm.y.x1x2) | ||
anova(lm.y.x1x2) | anova(lm.y.x1x2) | ||
- | rsq.y.x1x2<- summary(lm.y.x1x2)$r.squared | + | bcd <- summary(lm.y.x1x2)$r.squared |
- | rsq.y.x1 + rsq.y.x2 - rsq.y.x1x2 | + | bcd |
+ | bc.cd <- summary(lm.y.x1)$r.squared | ||
+ | bc.cd | ||
+ | bc.cd - bcd # note that this is the amount that shared by the two IVs | ||
lm.y.x1x2$coefficient | lm.y.x1x2$coefficient | ||
- | # y.hat = 6.399103 + (0.01184145)*x1 + (−0.54472725)*x2 | + | # y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2 |
a <- lm.y.x1x2$coefficient[1] | a <- lm.y.x1x2$coefficient[1] | ||
b1 <- lm.y.x1x2$coefficient[2] | b1 <- lm.y.x1x2$coefficient[2] | ||
Line 42: | Line 46: | ||
b2 | b2 | ||
- | y.pred <- a + (b1 * x1) + (b2 * x2) | + | y.pred <- a + (b1 * df$x1) + (b2 * df$x2) |
y.pred | y.pred | ||
# or | # or | ||
Line 48: | Line 52: | ||
head(y.pred == y.pred2) | head(y.pred == y.pred2) | ||
- | y.real <- y | + | y.real <- df$y |
y.real | y.real | ||
- | y.mean <- mean(y) | + | y.mean <- mean(df$y) |
y.mean | y.mean | ||
+ | deviation.score <- df$y - y.mean | ||
+ | ds <- deviation.score | ||
res <- y.real - y.pred | res <- y.real - y.pred | ||
reg <- y.pred - y.mean | reg <- y.pred - y.mean | ||
Line 58: | Line 64: | ||
# remember y is sum of res + reg + y.mean | # remember y is sum of res + reg + y.mean | ||
y2 <- res + reg + y.mean | y2 <- res + reg + y.mean | ||
- | y==y2 | + | df$y==y2 |
+ | ss.tot <- sum(ds^2) | ||
ss.res <- sum(res^2) | ss.res <- sum(res^2) | ||
ss.reg <- sum(reg^2) | ss.reg <- sum(reg^2) | ||
- | ss.tot <- var(y) * (length(y)-1) | + | ss.tot2 <- var(df$y) * (length(df$y)-1) |
ss.tot | ss.tot | ||
+ | ss.tot2 | ||
ss.res | ss.res | ||
ss.reg | ss.reg | ||
Line 70: | Line 78: | ||
k <- 3 # # of parameters a, b1, b2 | k <- 3 # # of parameters a, b1, b2 | ||
- | df.tot <- length(y)-1 | + | df.tot <- length(df$y)-1 |
df.reg <- k - 1 | df.reg <- k - 1 | ||
df.res <- df.tot - df.reg | df.res <- df.tot - df.reg | ||
Line 86: | Line 94: | ||
summary(lm.y.x1x2) | summary(lm.y.x1x2) | ||
anova(lm.y.x1x2) | anova(lm.y.x1x2) | ||
+ | |||
+ | summary(lm(y~x2+x1, | ||
+ | anova(lm(y~x2+x1, | ||
+ | |||
# note on 2 t-tests in summary | # note on 2 t-tests in summary | ||
# anova에서의 x1, x2에 대한 테스트와 | # anova에서의 x1, x2에 대한 테스트와 | ||
Line 97: | Line 109: | ||
# 테스트하기 때문에 그러함 | # 테스트하기 때문에 그러함 | ||
+ | # 또한 anova test에서 두번째 IV의 F값은 | ||
+ | # summary(lm)에서 두번 째 IV의 t값의 제곱값 | ||
+ | # 임을 이해. 이는 두번 째 IV의 설명력을 나 | ||
+ | # 타내는 부분이 lm과 anova 모두 같기 때문 | ||
+ | # 반면에 첫번째 IV의 경우에는 lm 분석 때에는 | ||
+ | # 고유의 설명력만을 가지고 (semi-partial cor^2) | ||
+ | # 판단을 하는 반면에, anova는 x2와 공유하는 | ||
+ | # 설명력도 포함해서 분석하기 때문에 t값의 | ||
+ | # 제곱이 F값이 되지 못함 | ||
+ | |||
+ | # beta에 대한 설명 | ||
# beta coefficient (standardized b) | # beta coefficient (standardized b) | ||
# beta <- b * (sd(x)/ | # beta <- b * (sd(x)/ | ||
- | beta1 <- b1 * (sd(x1)/ | + | beta1 <- b1 * (sd(df$x1)/sd(df$y)) |
- | beta2 <- b2 * (sd(x2)/ | + | beta2 <- b2 * (sd(df$x2)/sd(df$y)) |
beta1 | beta1 | ||
beta2 | beta2 | ||
Line 113: | Line 136: | ||
# understand with diagrams first | # understand with diagrams first | ||
# then calculate with r | # then calculate with r | ||
- | lm.tmp.1 <- lm(x2~x1, data=d) | + | lm.tmp.1 <- lm(x2~x1, data=df) |
res.x2.x1 <- lm.tmp.1$residuals | res.x2.x1 <- lm.tmp.1$residuals | ||
- | lm.tmp.2 <- lm(y~x1, data=d) | + | lm.tmp.2 <- lm(y~x1, data=df) |
res.y.x1 <- lm.tmp.2$residuals | res.y.x1 <- lm.tmp.2$residuals | ||
- | lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d) | + | lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df) |
summary(lm.tmp.3) | summary(lm.tmp.3) | ||
+ | summary(lm.tmp.3)$r.squared | ||
+ | sqrt(summary(lm.tmp.3)$r.squared) | ||
# install.packages(" | # install.packages(" | ||
library(ppcor) | library(ppcor) | ||
- | partial.r <- pcor.test(y, | + | partial.r <- pcor.test(df$y, df$x2, df$x1) |
- | partial.r | + | |
str(partial.r) | str(partial.r) | ||
+ | partial.r$estimate | ||
summary(lm.tmp.3) | summary(lm.tmp.3) | ||
summary(lm.tmp.3)$r.square | summary(lm.tmp.3)$r.square | ||
Line 133: | Line 157: | ||
# x1's own explanation? | # x1's own explanation? | ||
- | lm.tmp.4 <- lm(x1~x2, data=d) | + | lm.tmp.4 <- lm(x1~x2, data=df) |
res.x1.x2 <- lm.tmp.4$residuals | res.x1.x2 <- lm.tmp.4$residuals | ||
- | lm.tmp.5 <- lm(y~x2, data=d) | + | lm.tmp.5 <- lm(y~x2, data=df) |
res.y.x2 <- lm.tmp.5$residuals | res.y.x2 <- lm.tmp.5$residuals | ||
- | lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d) | + | lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df) |
summary(lm.tmp.6) | summary(lm.tmp.6) | ||
- | partial.r <- pcor.test(y, | + | partial.r <- pcor.test(df$y, df$x1, df$x2) |
str(partial.r) | str(partial.r) | ||
partial.r$estimate # this is partial correlation, | partial.r$estimate # this is partial correlation, | ||
# in order to get pr2, you should ^2 | # in order to get pr2, you should ^2 | ||
partial.r$estimate^2 | partial.r$estimate^2 | ||
+ | summary(lm.tmp.6)$r.squared | ||
####################################################### | ####################################################### | ||
- | # | ||
# semipartial correlation coefficient and spr2 | # semipartial correlation coefficient and spr2 | ||
# | # | ||
- | spr.y.x2.x1 <- spcor.test(y, | + | spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1) |
- | spr.y.x1.x2 <- spcor.test(y, | + | spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2) |
spr.y.x2.x1 | spr.y.x2.x1 | ||
spr.y.x1.x2 | spr.y.x1.x2 | ||
Line 161: | Line 185: | ||
spr2.y.x1.x2 | spr2.y.x1.x2 | ||
- | lm.tmp.7 <- lm(y ~ res.x2.x1, data = d) | + | lm.tmp.7 <- lm(y ~ res.x2.x1, data=df) |
summary(lm.tmp.7) | summary(lm.tmp.7) | ||
spr2.y.x2.x1 | spr2.y.x2.x1 | ||
- | lm.tmp.8 <- lm(y~res.x1.x2, | + | lm.tmp.8 <- lm(y~res.x1.x2, |
summary(lm.tmp.8) | summary(lm.tmp.8) | ||
spr2.y.x1.x2 | spr2.y.x1.x2 | ||
+ | bcd # remember bcd in the above? | ||
+ | bd <- spr2.y.x2.x1 + spr2.y.x1.x2 | ||
+ | bd | ||
+ | bcd - bd | ||
####################################################### | ####################################################### | ||
Line 195: | Line 223: | ||
spr2.y.x2.x1 | spr2.y.x2.x1 | ||
cma.3 <- r2.y.x1x2 - (spr2.y.x1.x2 + spr2.y.x2.x1) | cma.3 <- r2.y.x1x2 - (spr2.y.x1.x2 + spr2.y.x2.x1) | ||
+ | bcd - bd | ||
cma.3 | cma.3 | ||
Line 230: | Line 259: | ||
> # | > # | ||
> rm(list=ls()) | > rm(list=ls()) | ||
- | > d <- read.csv(" | + | > df <- read.csv(" |
- | > d | + | > df |
| | ||
1 6 220 5 | 1 6 220 5 | ||
Line 244: | Line 273: | ||
10 | 10 | ||
> | > | ||
- | > colnames(d) <- c(" | + | > colnames(df) <- c(" |
- | > d | + | > df |
y x1 x2 | y x1 x2 | ||
1 6 220 5 | 1 6 220 5 | ||
Line 260: | Line 289: | ||
> # x1 = 인컴 | > # x1 = 인컴 | ||
> # x2 = 부양가족수 | > # x2 = 부양가족수 | ||
- | > lm.y.x1 <- lm(y ~ x1, data=d) | + | > lm.y.x1 <- lm(y ~ x1, data=df) |
> summary(lm.y.x1) | > summary(lm.y.x1) | ||
Call: | Call: | ||
- | lm(formula = y ~ x1, data = d) | + | lm(formula = y ~ x1, data = df) |
Residuals: | Residuals: | ||
- | | + | |
- | -1.5189 -0.8969 -0.1297 1.0058 1.5800 | + | -1.519 -0.897 -0.130 1.006 1.580 |
Coefficients: | Coefficients: | ||
Estimate Std. Error t value Pr(> | Estimate Std. Error t value Pr(> | ||
- | (Intercept) 3.617781 | + | (Intercept) |
- | x1 0.015269 | + | x1 |
--- | --- | ||
- | Signif. codes: | + | Signif. codes: |
- | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | + | |
- | Residual standard error: 1.176 on 8 degrees of freedom | + | Residual standard error: 1.18 on 8 degrees of freedom |
- | Multiple R-squared: | + | Multiple R-squared: |
- | F-statistic: | + | F-statistic: |
> anova(lm.y.x1) | > anova(lm.y.x1) | ||
Line 286: | Line 314: | ||
Response: y | Response: y | ||
- | Df Sum Sq Mean Sq F value | + | Df Sum Sq Mean Sq F value Pr(> |
- | x1 1 18.934 18.9336 | + | x1 |
- | Residuals | + | Residuals |
--- | --- | ||
- | Signif. codes: | + | Signif. codes: |
- | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | + | > cor(df$x1, df$y)^2 |
- | > r.sq.y.x1 <- cor(x1, | + | [1] 0.6311 |
- | > rsq.y.x1< | + | > summary(lm.y.x1)$r.squared |
+ | [1] 0.6311 | ||
> | > | ||
> | > | ||
- | > lm.y.x2 <- lm(y ~ x2, data=d) | + | > lm.y.x2 <- lm(y ~ x2, data=df) |
> summary(lm.y.x2) | > summary(lm.y.x2) | ||
Call: | Call: | ||
- | lm(formula = y ~ x2, data = d) | + | lm(formula = y ~ x2, data = df) |
Residuals: | Residuals: | ||
- | | + | |
- | -1.2537 -0.8881 -0.4851 0.4963 2.5920 | + | -1.254 -0.888 -0.485 0.496 2.592 |
Coefficients: | Coefficients: | ||
Estimate Std. Error t value Pr(> | Estimate Std. Error t value Pr(> | ||
- | (Intercept) | + | (Intercept) |
- | x2 | + | x2 -0.846 |
--- | --- | ||
- | Signif. codes: | + | Signif. codes: |
- | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | + | |
- | Residual standard error: 1.397 on 8 degrees of freedom | + | Residual standard error: 1.4 on 8 degrees of freedom |
- | Multiple R-squared: | + | Multiple R-squared: |
- | F-statistic: | + | F-statistic: |
> anova(lm.y.x2) | > anova(lm.y.x2) | ||
Line 322: | Line 350: | ||
Response: y | Response: y | ||
- | Df Sum Sq Mean Sq F value Pr(> | + | Df Sum Sq Mean Sq F value Pr(> |
- | x2 1 14.378 14.3781 | + | x2 |
- | Residuals | + | Residuals |
--- | --- | ||
- | Signif. codes: | + | Signif. codes: |
- | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | + | > cor(df$x2, df$y)^2 |
- | > rsq.y.x2 <- cor(x2, | + | [1] 0.4793 |
- | > rsq.y.x2 <- summary(lm.y.x2)$r.squared | + | > summary(lm.y.x2)$r.squared |
+ | [1] 0.4793 | ||
> | > | ||
> | > | ||
- | > lm.y.x1x2 <- lm(y ~ x1+x2, data=d) | + | > lm.y.x1x2 <- lm(y ~ x1+x2, data=df) |
> summary(lm.y.x1x2) | > summary(lm.y.x1x2) | ||
Call: | Call: | ||
- | lm(formula = y ~ x1 + x2, data = d) | + | lm(formula = y ~ x1 + x2, data = df) |
Residuals: | Residuals: | ||
- | | + | |
- | -1.2173 -0.5779 -0.1515 0.6642 1.1906 | + | -1.217 -0.578 -0.151 0.664 1.191 |
Coefficients: | Coefficients: | ||
- | Estimate Std. Error t value Pr(> | + | |
- | (Intercept) | + | (Intercept) |
- | x1 0.011841 | + | x1 0.01184 |
- | x2 -0.544727 | + | x2 -0.54473 |
--- | --- | ||
- | Signif. codes: | + | Signif. codes: |
- | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | + | |
- | Residual standard error: 0.9301 on 7 degrees of freedom | + | Residual standard error: 0.93 on 7 degrees of freedom |
- | Multiple R-squared: | + | Multiple R-squared: |
- | F-statistic: | + | F-statistic: |
> anova(lm.y.x1x2) | > anova(lm.y.x1x2) | ||
Line 359: | Line 387: | ||
Response: y | Response: y | ||
- | Df Sum Sq Mean Sq F value | + | Df Sum Sq Mean Sq F value Pr(> |
- | x1 1 18.9336 18.9336 21.8841 0.002266 | + | x1 |
- | x2 | + | x2 |
- | Residuals | + | Residuals |
--- | --- | ||
- | Signif. codes: | + | Signif. codes: |
- | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | + | > bcd <- summary(lm.y.x1x2)$r.squared |
- | > rsq.y.x1x2<- summary(lm.y.x1x2)$r.squared | + | > bcd |
- | > rsq.y.x1 + rsq.y.x2 - rsq.y.x1x2 | + | [1] 0.7981 |
- | [1] 0.3122658 | + | > bc.cd <- summary(lm.y.x1)$r.squared |
+ | > bc.cd | ||
+ | [1] 1.11 | ||
+ | > bc.cd - bcd # note that this is the amount that shared by the two IVs | ||
+ | [1] 0.3123 | ||
+ | > | ||
> | > | ||
> lm.y.x1x2$coefficient | > lm.y.x1x2$coefficient | ||
(Intercept) | (Intercept) | ||
- | 6.39910298 | + | |
- | > # y.hat = 6.399103 + (0.01184145)*x1 + (−0.54472725)*x2 | + | > # y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2 |
> a <- lm.y.x1x2$coefficient[1] | > a <- lm.y.x1x2$coefficient[1] | ||
> b1 <- lm.y.x1x2$coefficient[2] | > b1 <- lm.y.x1x2$coefficient[2] | ||
Line 379: | Line 412: | ||
> a | > a | ||
(Intercept) | (Intercept) | ||
- | 6.399103 | + | |
> b1 | > b1 | ||
- | | + | x1 |
- | 0.01184145 | + | 0.01184 |
> b2 | > b2 | ||
- | | + | x2 |
- | -0.5447272 | + | -0.5447 |
> | > | ||
- | > y.pred <- a + (b1 * x1) + (b2 * x2) | + | > y.pred <- a + (b1 * df$x1) + (b2 * df$x2) |
> y.pred | > y.pred | ||
- | | + | |
- | | + | |
> # or | > # or | ||
> y.pred2 <- predict(lm.y.x1x2) | > y.pred2 <- predict(lm.y.x1x2) | ||
Line 397: | Line 429: | ||
TRUE TRUE TRUE TRUE TRUE TRUE | TRUE TRUE TRUE TRUE TRUE TRUE | ||
> | > | ||
- | > y.real <- y | + | > y.real <- df$y |
> y.real | > y.real | ||
| | ||
- | > y.mean <- mean(y) | + | > y.mean <- mean(df$y) |
> y.mean | > y.mean | ||
[1] 8 | [1] 8 | ||
> | > | ||
+ | > deviation.score <- df$y - y.mean | ||
+ | > ds <- deviation.score | ||
> res <- y.real - y.pred | > res <- y.real - y.pred | ||
> reg <- y.pred - y.mean | > reg <- y.pred - y.mean | ||
Line 410: | Line 444: | ||
> # remember y is sum of res + reg + y.mean | > # remember y is sum of res + reg + y.mean | ||
> y2 <- res + reg + y.mean | > y2 <- res + reg + y.mean | ||
- | > y==y2 | + | > df$y==y2 |
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE | [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE | ||
> | > | ||
+ | > ss.tot <- sum(ds^2) | ||
> ss.res <- sum(res^2) | > ss.res <- sum(res^2) | ||
> ss.reg <- sum(reg^2) | > ss.reg <- sum(reg^2) | ||
> | > | ||
- | > ss.tot <- var(y) * (length(y)-1) | + | > ss.tot2 <- var(df$y) * (length(df$y)-1) |
> ss.tot | > ss.tot | ||
+ | [1] 30 | ||
+ | > ss.tot2 | ||
[1] 30 | [1] 30 | ||
> ss.res | > ss.res | ||
- | [1] 6.056235 | + | [1] 6.056 |
> ss.reg | > ss.reg | ||
- | [1] 23.94376 | + | [1] 23.94 |
> ss.res+ss.reg | > ss.res+ss.reg | ||
[1] 30 | [1] 30 | ||
> | > | ||
> k <- 3 # # of parameters a, b1, b2 | > k <- 3 # # of parameters a, b1, b2 | ||
- | > df.tot <- length(y)-1 | + | > df.tot <- length(df$y)-1 |
> df.reg <- k - 1 | > df.reg <- k - 1 | ||
> df.res <- df.tot - df.reg | > df.res <- df.tot - df.reg | ||
Line 434: | Line 471: | ||
> ms.res <- ss.res/ | > ms.res <- ss.res/ | ||
> ms.reg | > ms.reg | ||
- | [1] 11.97188 | + | [1] 11.97 |
> ms.res | > ms.res | ||
- | [1] 0.8651765 | + | [1] 0.8652 |
> f.val <- ms.reg/ | > f.val <- ms.reg/ | ||
> f.val | > f.val | ||
- | [1] 13.8375 | + | [1] 13.84 |
> p.val <- pf(f.val, df.reg, df.res, lower.tail = F) | > p.val <- pf(f.val, df.reg, df.res, lower.tail = F) | ||
> p.val | > p.val | ||
- | [1] 0.003696453 | + | [1] 0.003696 |
> | > | ||
> # double check | > # double check | ||
Line 448: | Line 485: | ||
Call: | Call: | ||
- | lm(formula = y ~ x1 + x2, data = d) | + | lm(formula = y ~ x1 + x2, data = df) |
Residuals: | Residuals: | ||
- | | + | |
- | -1.2173 -0.5779 -0.1515 0.6642 1.1906 | + | -1.217 -0.578 -0.151 0.664 1.191 |
Coefficients: | Coefficients: | ||
- | Estimate Std. Error t value Pr(> | + | |
- | (Intercept) | + | (Intercept) |
- | x1 0.011841 | + | x1 0.01184 |
- | x2 -0.544727 | + | x2 -0.54473 |
--- | --- | ||
- | Signif. codes: | + | Signif. codes: |
- | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | + | |
- | Residual standard error: 0.9301 on 7 degrees of freedom | + | Residual standard error: 0.93 on 7 degrees of freedom |
- | Multiple R-squared: | + | Multiple R-squared: |
- | F-statistic: | + | F-statistic: |
> anova(lm.y.x1x2) | > anova(lm.y.x1x2) | ||
Line 471: | Line 507: | ||
Response: y | Response: y | ||
- | Df Sum Sq Mean Sq F value | + | Df Sum Sq Mean Sq F value Pr(> |
- | x1 1 18.9336 18.9336 21.8841 0.002266 | + | x1 |
- | x2 | + | x2 |
- | Residuals | + | Residuals |
--- | --- | ||
- | Signif. codes: | + | Signif. codes: |
- | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | + | > |
+ | > summary(lm(y~x2+x1, | ||
+ | |||
+ | Call: | ||
+ | lm(formula = y ~ x2 + x1, data = df) | ||
+ | |||
+ | Residuals: | ||
+ | | ||
+ | -1.217 -0.578 -0.151 | ||
+ | |||
+ | Coefficients: | ||
+ | Estimate Std. Error t value Pr(> | ||
+ | (Intercept) | ||
+ | x2 -0.54473 | ||
+ | x1 | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | |||
+ | Residual standard error: 0.93 on 7 degrees of freedom | ||
+ | Multiple R-squared: | ||
+ | F-statistic: | ||
+ | |||
+ | > anova(lm(y~x2+x1, | ||
+ | Analysis of Variance Table | ||
+ | |||
+ | Response: y | ||
+ | Df Sum Sq Mean Sq F value Pr(> | ||
+ | x2 | ||
+ | x1 | ||
+ | Residuals | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | > | ||
> # note on 2 t-tests in summary | > # note on 2 t-tests in summary | ||
> # anova에서의 x1, x2에 대한 테스트와 | > # anova에서의 x1, x2에 대한 테스트와 | ||
Line 489: | Line 557: | ||
> # 테스트하기 때문에 그러함 | > # 테스트하기 때문에 그러함 | ||
> | > | ||
+ | > # 또한 anova test에서 두번째 IV의 F값은 | ||
+ | > # summary(lm)에서 두번 째 IV의 t값의 제곱값 | ||
+ | > # 임을 이해. 이는 두번 째 IV의 설명력을 나 | ||
+ | > # 타내는 부분이 lm과 anova 모두 같기 때문 | ||
+ | > # 반면에 첫번째 IV의 경우에는 lm 분석 때에는 | ||
+ | > # 고유의 설명력만을 가지고 (semi-partial cor^2) | ||
+ | > # 판단을 하는 반면에, anova는 x2와 공유하는 | ||
+ | > # 설명력도 포함해서 분석하기 때문에 t값의 | ||
+ | > # 제곱이 F값이 되지 못함 | ||
+ | > | ||
+ | > # beta에 대한 설명 | ||
> # beta coefficient (standardized b) | > # beta coefficient (standardized b) | ||
> # beta <- b * (sd(x)/ | > # beta <- b * (sd(x)/ | ||
- | > beta1 <- b1 * (sd(x1)/ | + | > beta1 <- b1 * (sd(df$x1)/sd(df$y)) |
- | > beta2 <- b2 * (sd(x2)/ | + | > beta2 <- b2 * (sd(df$x2)/sd(df$y)) |
> beta1 | > beta1 | ||
- | | + | |
- | 0.616097 | + | 0.6161 |
> beta2 | > beta2 | ||
- | | + | x2 |
- | -0.4458785 | + | -0.4459 |
> | > | ||
> # install.packages(" | > # install.packages(" | ||
Line 505: | Line 584: | ||
Call: | Call: | ||
- | lm(formula = y ~ x1 + x2, data = d) | + | lm(formula = y ~ x1 + x2, data = df) |
Standardized Coefficients:: | Standardized Coefficients:: | ||
(Intercept) | (Intercept) | ||
- | | + | |
> | > | ||
Line 517: | Line 596: | ||
> # understand with diagrams first | > # understand with diagrams first | ||
> # then calculate with r | > # then calculate with r | ||
- | > lm.tmp.1 <- lm(x2~x1, data=d) | + | > lm.tmp.1 <- lm(x2~x1, data=df) |
> res.x2.x1 <- lm.tmp.1$residuals | > res.x2.x1 <- lm.tmp.1$residuals | ||
> | > | ||
- | > lm.tmp.2 <- lm(y~x1, data=d) | + | > lm.tmp.2 <- lm(y~x1, data=df) |
> res.y.x1 <- lm.tmp.2$residuals | > res.y.x1 <- lm.tmp.2$residuals | ||
> | > | ||
- | > lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d) | + | > lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df) |
> summary(lm.tmp.3) | > summary(lm.tmp.3) | ||
Call: | Call: | ||
- | lm(formula = res.y.x1 ~ res.x2.x1, data = d) | + | lm(formula = res.y.x1 ~ res.x2.x1, data = df) |
Residuals: | Residuals: | ||
- | | + | |
- | -1.2173 -0.5779 -0.1515 0.6642 1.1906 | + | -1.217 -0.578 -0.151 0.664 1.191 |
Coefficients: | Coefficients: | ||
- | | + | Estimate Std. Error t value Pr(> |
- | (Intercept) | + | (Intercept) |
- | res.x2.x1 | + | res.x2.x1 |
--- | --- | ||
- | Signif. codes: | + | Signif. codes: |
- | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | + | |
- | Residual standard error: 0.8701 on 8 degrees of freedom | + | Residual standard error: 0.87 on 8 degrees of freedom |
- | Multiple R-squared: | + | Multiple R-squared: |
- | F-statistic: | + | F-statistic: |
- | > | + | > summary(lm.tmp.3)$r.squared |
+ | [1] 0.4527 | ||
+ | > sqrt(summary(lm.tmp.3)$r.squared) | ||
+ | [1] 0.6729 | ||
> # install.packages(" | > # install.packages(" | ||
> library(ppcor) | > library(ppcor) | ||
- | > partial.r <- pcor.test(y, | + | > partial.r <- pcor.test(df$y, df$x2, df$x1) |
- | > partial.r | + | |
- | | + | |
- | 1 -0.672856 0.04702022 -2.406425 10 1 pearson | + | |
> str(partial.r) | > str(partial.r) | ||
' | ' | ||
Line 560: | Line 638: | ||
$ gp : num 1 | $ gp : num 1 | ||
$ Method | $ Method | ||
+ | > partial.r$estimate | ||
+ | [1] -0.6729 | ||
> summary(lm.tmp.3) | > summary(lm.tmp.3) | ||
Call: | Call: | ||
- | lm(formula = res.y.x1 ~ res.x2.x1, data = d) | + | lm(formula = res.y.x1 ~ res.x2.x1, data = df) |
Residuals: | Residuals: | ||
- | | + | |
- | -1.2173 -0.5779 -0.1515 0.6642 1.1906 | + | -1.217 -0.578 -0.151 0.664 1.191 |
Coefficients: | Coefficients: | ||
- | | + | Estimate Std. Error t value Pr(> |
- | (Intercept) | + | (Intercept) |
- | res.x2.x1 | + | res.x2.x1 |
--- | --- | ||
- | Signif. codes: | + | Signif. codes: |
- | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | + | |
- | Residual standard error: 0.8701 on 8 degrees of freedom | + | Residual standard error: 0.87 on 8 degrees of freedom |
- | Multiple R-squared: | + | Multiple R-squared: |
- | F-statistic: | + | F-statistic: |
> summary(lm.tmp.3)$r.square | > summary(lm.tmp.3)$r.square | ||
- | [1] 0.4527352 | + | [1] 0.4527 |
> partial.r$estimate^2 | > partial.r$estimate^2 | ||
- | [1] 0.4527352 | + | [1] 0.4527 |
> | > | ||
> | > | ||
> # x1's own explanation? | > # x1's own explanation? | ||
- | > lm.tmp.4 <- lm(x1~x2, data=d) | + | > lm.tmp.4 <- lm(x1~x2, data=df) |
> res.x1.x2 <- lm.tmp.4$residuals | > res.x1.x2 <- lm.tmp.4$residuals | ||
> | > | ||
- | > lm.tmp.5 <- lm(y~x2, data=d) | + | > lm.tmp.5 <- lm(y~x2, data=df) |
> res.y.x2 <- lm.tmp.5$residuals | > res.y.x2 <- lm.tmp.5$residuals | ||
> | > | ||
- | > lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d) | + | > lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df) |
> summary(lm.tmp.6) | > summary(lm.tmp.6) | ||
Call: | Call: | ||
- | lm(formula = res.y.x2 ~ res.x1.x2, data = d) | + | lm(formula = res.y.x2 ~ res.x1.x2, data = df) |
Residuals: | Residuals: | ||
- | | + | |
- | -1.2173 -0.5779 -0.1515 0.6642 1.1906 | + | -1.217 -0.578 -0.151 0.664 1.191 |
Coefficients: | Coefficients: | ||
- | Estimate Std. Error t value Pr(> | + | |
- | (Intercept) 1.330e-17 2.751e-01 | + | (Intercept) 1.33e-17 |
- | res.x1.x2 | + | res.x1.x2 |
--- | --- | ||
- | Signif. codes: | + | Signif. codes: |
- | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | + | |
- | Residual standard error: 0.8701 on 8 degrees of freedom | + | Residual standard error: 0.87 on 8 degrees of freedom |
- | Multiple R-squared: | + | Multiple R-squared: |
- | F-statistic: | + | F-statistic: |
> | > | ||
- | > partial.r <- pcor.test(y, | + | > partial.r <- pcor.test(df$y, df$x1, df$x2) |
> str(partial.r) | > str(partial.r) | ||
' | ' | ||
Line 627: | Line 705: | ||
$ Method | $ Method | ||
> partial.r$estimate # this is partial correlation, | > partial.r$estimate # this is partial correlation, | ||
- | [1] 0.7825112 | + | [1] 0.7825 |
> # in order to get pr2, you should ^2 | > # in order to get pr2, you should ^2 | ||
> partial.r$estimate^2 | > partial.r$estimate^2 | ||
- | [1] 0.6123238 | + | [1] 0.6123 |
+ | > summary(lm.tmp.6)$r.squared | ||
+ | [1] 0.6123 | ||
> | > | ||
> ####################################################### | > ####################################################### | ||
- | > # | ||
> # semipartial correlation coefficient and spr2 | > # semipartial correlation coefficient and spr2 | ||
> # | > # | ||
- | > spr.y.x2.x1 <- spcor.test(y, | + | > spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1) |
- | > spr.y.x1.x2 <- spcor.test(y, | + | > spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2) |
> spr.y.x2.x1 | > spr.y.x2.x1 | ||
- | estimate | + | |
- | 1 -0.4086619 | + | 1 -0.4087 |
> spr.y.x1.x2 | > spr.y.x1.x2 | ||
- | estimate | + | |
- | 1 0.5646726 | + | 1 |
> spr2.y.x2.x1 <- spr.y.x2.x1$estimate^2 | > spr2.y.x2.x1 <- spr.y.x2.x1$estimate^2 | ||
> spr2.y.x1.x2 <- spr.y.x1.x2$estimate^2 | > spr2.y.x1.x2 <- spr.y.x1.x2$estimate^2 | ||
> spr2.y.x2.x1 | > spr2.y.x2.x1 | ||
- | [1] 0.1670045 | + | [1] 0.167 |
> spr2.y.x1.x2 | > spr2.y.x1.x2 | ||
- | [1] 0.3188552 | + | [1] 0.3189 |
> | > | ||
- | > lm.tmp.7 <- lm(y ~ res.x2.x1, data = d) | + | > lm.tmp.7 <- lm(y ~ res.x2.x1, data=df) |
> summary(lm.tmp.7) | > summary(lm.tmp.7) | ||
Call: | Call: | ||
- | lm(formula = y ~ res.x2.x1, data = d) | + | lm(formula = y ~ res.x2.x1, data = df) |
Residuals: | Residuals: | ||
- | | + | |
- | -1.8617 -1.1712 -0.4940 0.5488 3.0771 | + | -1.862 -1.171 -0.494 0.549 3.077 |
Coefficients: | Coefficients: | ||
Estimate Std. Error t value Pr(> | Estimate Std. Error t value Pr(> | ||
- | (Intercept) | + | (Intercept) |
- | res.x2.x1 | + | res.x2.x1 |
--- | --- | ||
- | Signif. codes: | + | Signif. codes: |
- | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | + | |
- | Residual standard error: 1.767 on 8 degrees of freedom | + | Residual standard error: 1.77 on 8 degrees of freedom |
- | Multiple R-squared: | + | Multiple R-squared: |
- | F-statistic: | + | F-statistic: |
> spr2.y.x2.x1 | > spr2.y.x2.x1 | ||
- | [1] 0.1670045 | + | [1] 0.167 |
> | > | ||
- | > lm.tmp.8 <- lm(y~res.x1.x2, | + | > lm.tmp.8 <- lm(y~res.x1.x2, |
> summary(lm.tmp.8) | > summary(lm.tmp.8) | ||
Call: | Call: | ||
- | lm(formula = y ~ res.x1.x2, data = d) | + | lm(formula = y ~ res.x1.x2, data = df) |
Residuals: | Residuals: | ||
- | | + | |
- | -2.6642 -0.6084 -0.1492 1.2192 2.2901 | + | -2.664 -0.608 -0.149 1.219 2.290 |
Coefficients: | Coefficients: | ||
Estimate Std. Error t value Pr(> | Estimate Std. Error t value Pr(> | ||
- | (Intercept) 8.000000 | + | (Intercept) |
- | res.x1.x2 | + | res.x1.x2 |
--- | --- | ||
- | Signif. codes: | + | Signif. codes: |
- | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | + | |
- | Residual standard error: 1.598 on 8 degrees of freedom | + | Residual standard error: 1.6 on 8 degrees of freedom |
- | Multiple R-squared: | + | Multiple R-squared: |
- | F-statistic: | + | F-statistic: |
> spr2.y.x1.x2 | > spr2.y.x1.x2 | ||
- | [1] 0.3188552 | + | [1] 0.3189 |
> | > | ||
+ | > bcd # remember bcd in the above? | ||
+ | [1] 0.7981 | ||
+ | > bd <- spr2.y.x2.x1 + spr2.y.x1.x2 | ||
+ | > bd | ||
+ | [1] 0.4859 | ||
+ | > bcd - bd | ||
+ | [1] 0.3123 | ||
> | > | ||
> ####################################################### | > ####################################################### | ||
Line 708: | Line 792: | ||
Call: | Call: | ||
- | lm(formula = y ~ x2, data = d) | + | lm(formula = y ~ x2, data = df) |
Residuals: | Residuals: | ||
- | | + | |
- | -1.2537 -0.8881 -0.4851 0.4963 2.5920 | + | -1.254 -0.888 -0.485 0.496 2.592 |
Coefficients: | Coefficients: | ||
Estimate Std. Error t value Pr(> | Estimate Std. Error t value Pr(> | ||
- | (Intercept) | + | (Intercept) |
- | x2 | + | x2 -0.846 |
--- | --- | ||
- | Signif. codes: | + | Signif. codes: |
- | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | + | |
- | Residual standard error: 1.397 on 8 degrees of freedom | + | Residual standard error: 1.4 on 8 degrees of freedom |
- | Multiple R-squared: | + | Multiple R-squared: |
- | F-statistic: | + | F-statistic: |
> all.x2 <- summary(lm.y.x2)$r.squared | > all.x2 <- summary(lm.y.x2)$r.squared | ||
> all.x2 | > all.x2 | ||
- | [1] 0.4792703 | + | [1] 0.4793 |
> spr2.y.x2.x1 | > spr2.y.x2.x1 | ||
- | [1] 0.1670045 | + | [1] 0.167 |
> cma.1 <- all.x2 - spr2.y.x2.x1 | > cma.1 <- all.x2 - spr2.y.x2.x1 | ||
> cma.1 | > cma.1 | ||
- | [1] 0.3122658 | + | [1] 0.3123 |
> | > | ||
> # 2. | > # 2. | ||
Line 739: | Line 822: | ||
Call: | Call: | ||
- | lm(formula = y ~ x1, data = d) | + | lm(formula = y ~ x1, data = df) |
Residuals: | Residuals: | ||
- | | + | |
- | -1.5189 -0.8969 -0.1297 1.0058 1.5800 | + | -1.519 -0.897 -0.130 1.006 1.580 |
Coefficients: | Coefficients: | ||
Estimate Std. Error t value Pr(> | Estimate Std. Error t value Pr(> | ||
- | (Intercept) 3.617781 | + | (Intercept) |
- | x1 0.015269 | + | x1 |
--- | --- | ||
- | Signif. codes: | + | Signif. codes: |
- | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | + | |
- | Residual standard error: 1.176 on 8 degrees of freedom | + | Residual standard error: 1.18 on 8 degrees of freedom |
- | Multiple R-squared: | + | Multiple R-squared: |
- | F-statistic: | + | F-statistic: |
> all.x1 <- summary(lm.y.x1)$r.squared | > all.x1 <- summary(lm.y.x1)$r.squared | ||
> all.x1 | > all.x1 | ||
- | [1] 0.631121 | + | [1] 0.6311 |
> spr2.y.x1.x2 | > spr2.y.x1.x2 | ||
- | [1] 0.3188552 | + | [1] 0.3189 |
> cma.2 <- all.x1 - spr2.y.x1.x2 | > cma.2 <- all.x1 - spr2.y.x1.x2 | ||
> cma.2 | > cma.2 | ||
- | [1] 0.3122658 | + | [1] 0.3123 |
> | > | ||
> # OR 3. | > # OR 3. | ||
Line 770: | Line 852: | ||
Call: | Call: | ||
- | lm(formula = y ~ x1 + x2, data = d) | + | lm(formula = y ~ x1 + x2, data = df) |
Residuals: | Residuals: | ||
- | | + | |
- | -1.2173 -0.5779 -0.1515 0.6642 1.1906 | + | -1.217 -0.578 -0.151 0.664 1.191 |
Coefficients: | Coefficients: | ||
- | Estimate Std. Error t value Pr(> | + | |
- | (Intercept) | + | (Intercept) |
- | x1 0.011841 | + | x1 0.01184 |
- | x2 -0.544727 | + | x2 -0.54473 |
--- | --- | ||
- | Signif. codes: | + | Signif. codes: |
- | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | + | |
- | Residual standard error: 0.9301 on 7 degrees of freedom | + | Residual standard error: 0.93 on 7 degrees of freedom |
- | Multiple R-squared: | + | Multiple R-squared: |
- | F-statistic: | + | F-statistic: |
> r2.y.x1x2 <- summary(lm.y.x1x2)$r.square | > r2.y.x1x2 <- summary(lm.y.x1x2)$r.square | ||
> r2.y.x1x2 | > r2.y.x1x2 | ||
- | [1] 0.7981255 | + | [1] 0.7981 |
> spr2.y.x1.x2 | > spr2.y.x1.x2 | ||
- | [1] 0.3188552 | + | [1] 0.3189 |
> spr2.y.x2.x1 | > spr2.y.x2.x1 | ||
- | [1] 0.1670045 | + | [1] 0.167 |
> cma.3 <- r2.y.x1x2 - (spr2.y.x1.x2 + spr2.y.x2.x1) | > cma.3 <- r2.y.x1x2 - (spr2.y.x1.x2 + spr2.y.x2.x1) | ||
+ | > bcd - bd | ||
+ | [1] 0.3123 | ||
> cma.3 | > cma.3 | ||
- | [1] 0.3122658 | + | [1] 0.3123 |
> | > | ||
> cma.1 | > cma.1 | ||
- | [1] 0.3122658 | + | [1] 0.3123 |
> cma.2 | > cma.2 | ||
- | [1] 0.3122658 | + | [1] 0.3123 |
> cma.3 | > cma.3 | ||
- | [1] 0.3122658 | + | [1] 0.3123 |
> | > | ||
> # OR 애초에 simple regression과 multiple | > # OR 애초에 simple regression과 multiple | ||
Line 813: | Line 896: | ||
> r2.y.x2 <- summary(lm.y.x2)$r.square | > r2.y.x2 <- summary(lm.y.x2)$r.square | ||
> r2.y.x1 | > r2.y.x1 | ||
- | [1] 0.631121 | + | [1] 0.6311 |
> r2.y.x2 | > r2.y.x2 | ||
- | [1] 0.4792703 | + | [1] 0.4793 |
> r2.y.x1x2 <- summary(lm.y.x1x2)$r.square | > r2.y.x1x2 <- summary(lm.y.x1x2)$r.square | ||
> r2.y.x1x2 | > r2.y.x1x2 | ||
- | [1] 0.7981255 | + | [1] 0.7981 |
> cma.4 <- r2.y.x1 + r2.y.x2 - r2.y.x1x2 | > cma.4 <- r2.y.x1 + r2.y.x2 - r2.y.x1x2 | ||
> cma.4 | > cma.4 | ||
- | [1] 0.3122658 | + | [1] 0.3123 |
> | > | ||
> # Note that sorting out unique and common | > # Note that sorting out unique and common | ||
Line 835: | Line 918: | ||
</ | </ | ||
+ | ====== explanation. added ====== | ||
+ | {{: | ||
+ | < | ||
+ | # ex. | ||
+ | # resid(lm(y~x1, | ||
+ | # resid(lm(y~x2, | ||
+ | # resid(lm(y~x1+x2, | ||
+ | # b / delta.y = ? | ||
+ | # ce / delta.x2 = ? | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | # exp.added | ||
+ | spcor.test(df$y, | ||
+ | spcor.test(df$y, | ||
+ | spcor.test(df$y, | ||
+ | spcor.test(df$y, | ||
+ | summary(lm(y~x1+x2, | ||
+ | |||
+ | b <- spcor.test(df$y, | ||
+ | d <- spcor.test(df$y, | ||
+ | bcd <- summary(lm(y~x1+x2, | ||
+ | |||
+ | summary(lm(df$y~df$x1+df$x2, | ||
+ | (spcor.test(df$y, | ||
+ | | ||
+ | bcd - (b + d) | ||
+ | |||
+ | </ | ||
+ | ====== Another one ====== | ||
+ | [[:partial and semipartial correlation]] | ||
c/ms/2025/schedule/w13.lecture.note.1748991003.txt.gz · Last modified: 2025/06/04 07:50 by hkimscil