Differences

This shows you the differences between two versions of the page.

--- c:ms:2025:schedule:w13.lecture.note [2025/06/03 23:59] – [MR] hkimscil
+++ c:ms:2025:schedule:w13.lecture.note [2025/06/08 23:51] (current) – [output] hkimscil
@@ Line 5: / Line 5: @@
 #
 rm(list=ls())
-d <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")
+df <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")
-d
+df
-colnames(d) <- c("y", "x1", "x2")
+colnames(df) <- c("y", "x1", "x2")
-d
+df
 # y = 통장갯수
 # x1 = 인컴
 # x2 = 부양가족수
-lm.y.x1 <- lm(y ~ x1, data=d)
+lm.y.x1 <- lm(y ~ x1, data=df)
 summary(lm.y.x1)
 anova(lm.y.x1)
-r.sq.y.x1 <- cor(x1,y)^2
+cor(df$x1, df$y)^2
-rsq.y.x1<-summary(lm.y.x1)$r.squared
+summary(lm.y.x1)$r.squared
-lm.y.x2 <- lm(y ~ x2, data=d)
+lm.y.x2 <- lm(y ~ x2, data=df)
 summary(lm.y.x2)
 anova(lm.y.x2)
-rsq.y.x2 <- cor(x2,y)^2
+cor(df$x2, df$y)^2
-rsq.y.x2 <- summary(lm.y.x2)$r.squared
+summary(lm.y.x2)$r.squared
-lm.y.x1x2 <- lm(y ~ x1+x2, data=d)
+lm.y.x1x2 <- lm(y ~ x1+x2, data=df)
 summary(lm.y.x1x2)
 anova(lm.y.x1x2)
-rsq.y.x1x2<- summary(lm.y.x1x2)$r.squared
+bcd <- summary(lm.y.x1x2)$r.squared
-rsq.y.x1 + rsq.y.x2 - rsq.y.x1x2
+bcd
+bc.cd <- summary(lm.y.x1)$r.squared + summary(lm.y.x2)$r.squared
+bc.cd
+bc.cd - bcd  # note that this is the amount that shared by the two IVs
 lm.y.x1x2$coefficient
-# y.hat = 6.399103 + (0.01184145)*x1 + (−0.54472725)*x2
+# y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2
 a <- lm.y.x1x2$coefficient[1]
 b1 <- lm.y.x1x2$coefficient[2]
@@ Line 42: / Line 46: @@
 b2
-y.pred <- a + (b1 * x1) + (b2 * x2)
+y.pred <- a + (b1 * df$x1) + (b2 * df$x2)
 y.pred
 # or
@@ Line 48: / Line 52: @@
 head(y.pred == y.pred2)
-y.real <- y
+y.real <- df$y
 y.real
-y.mean <- mean(y)
+y.mean <- mean(df$y)
 y.mean
+deviation.score <- df$y - y.mean
+ds <- deviation.score
 res <- y.real - y.pred
 reg <- y.pred - y.mean
@@ Line 58: / Line 64: @@
 # remember y is sum of res + reg + y.mean
 y2 <- res + reg + y.mean
-y==y2
+df$y==y2
+ss.tot <- sum(ds^2)
 ss.res <- sum(res^2)
 ss.reg <- sum(reg^2)
-ss.tot <- var(y) * (length(y)-1)
+ss.tot2 <- var(df$y) * (length(df$y)-1)
 ss.tot
+ss.tot2
 ss.res
 ss.reg
@@ Line 70: / Line 78: @@
 k <- 3 # # of parameters a, b1, b2
-df.tot <- length(y)-1
+df.tot <- length(df$y)-1
 df.reg <- k - 1
 df.res <- df.tot - df.reg
@@ Line 86: / Line 94: @@
 summary(lm.y.x1x2)
 anova(lm.y.x1x2)
+summary(lm(y~x2+x1, data=df))
+anova(lm(y~x2+x1, data=df))
 # note on 2 t-tests in summary
 # anova에서의 x1, x2에 대한 테스트와
@@ Line 97: / Line 109: @@
 # 테스트하기 때문에 그러함
+# 또한 anova test에서 두번째 IV의 F값은
+# summary(lm)에서 두번 째 IV의 t값의 제곱값
+# 임을 이해. 이는 두번 째 IV의 설명력을 나
+# 타내는 부분이 lm과 anova 모두 같기 때문
+# 반면에 첫번째 IV의 경우에는 lm 분석 때에는
+# 고유의 설명력만을 가지고 (semi-partial cor^2)
+# 판단을 하는 반면에, anova는 x2와 공유하는
+# 설명력도 포함해서 분석하기 때문에 t값의
+# 제곱이 F값이 되지 못함
+# beta에 대한 설명
 # beta coefficient (standardized b)
 # beta <- b * (sd(x)/sd(y))
-beta1 <- b1 * (sd(x1)/sd(y))
+beta1 <- b1 * (sd(df$x1)/sd(df$y))
-beta2 <- b2 * (sd(x2)/sd(y))
+beta2 <- b2 * (sd(df$x2)/sd(df$y))
 beta1
 beta2
@@ Line 113: / Line 136: @@
 # understand with diagrams first
 # then calculate with r
-lm.tmp.1 <- lm(x2~x1, data=d)
+lm.tmp.1 <- lm(x2~x1, data=df)
 res.x2.x1 <- lm.tmp.1$residuals
-lm.tmp.2 <- lm(y~x1, data=d)
+lm.tmp.2 <- lm(y~x1, data=df)
 res.y.x1 <- lm.tmp.2$residuals
-lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d)
+lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df)
 summary(lm.tmp.3)
+summary(lm.tmp.3)$r.squared
+sqrt(summary(lm.tmp.3)$r.squared)
 # install.packages("ppcor")
 library(ppcor)
-partial.r <- pcor.test(y, x2, x1)
+partial.r <- pcor.test(df$y, df$x2, df$x1)
-partial.r
 str(partial.r)
+partial.r$estimate
 summary(lm.tmp.3)
 summary(lm.tmp.3)$r.square
@@ Line 133: / Line 157: @@
 # x1's own explanation?
-lm.tmp.4 <- lm(x1~x2, data=d)
+lm.tmp.4 <- lm(x1~x2, data=df)
 res.x1.x2 <- lm.tmp.4$residuals
-lm.tmp.5 <- lm(y~x2, data=d)
+lm.tmp.5 <- lm(y~x2, data=df)
 res.y.x2 <- lm.tmp.5$residuals
-lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d)
+lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df)
 summary(lm.tmp.6)
-partial.r <- pcor.test(y, x1, x2)
+partial.r <- pcor.test(df$y, df$x1, df$x2)
 str(partial.r)
 partial.r$estimate # this is partial correlation, not pr^2
 # in order to get pr2, you should ^2
 partial.r$estimate^2
+summary(lm.tmp.6)$r.squared
 #######################################################
-#
 # semipartial correlation coefficient and spr2
 #
-spr.y.x2.x1 <- spcor.test(y,x2,x1)
+spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1)
-spr.y.x1.x2 <- spcor.test(y,x1,x2)
+spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2)
 spr.y.x2.x1
 spr.y.x1.x2
@@ Line 161: / Line 185: @@
 spr2.y.x1.x2
-lm.tmp.7 <- lm(y ~ res.x2.x1, data = d)
+lm.tmp.7 <- lm(y ~ res.x2.x1, data=df)
 summary(lm.tmp.7)
 spr2.y.x2.x1
-lm.tmp.8 <- lm(y~res.x1.x2, data = d)
+lm.tmp.8 <- lm(y~res.x1.x2, data=df)
 summary(lm.tmp.8)
 spr2.y.x1.x2
+bcd # remember bcd in the above?
+bd <- spr2.y.x2.x1 + spr2.y.x1.x2
+bd
+bcd - bd
 #######################################################
 # get the common area that explain the y variable
 # 1.
-# lm.tmp.9
 summary(lm.y.x2)
 all.x2 <- summary(lm.y.x2)$r.squared
@@ Line 183: / Line 209: @@
 # 2.
-# lm.tmp.10
 summary(lm.y.x1)
 all.x1 <- summary(lm.y.x1)$r.squared
@@ Line 192: / Line 217: @@
 # OR 3.
-# lm.tmp.11
 summary(lm.y.x1x2)
 r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
@@ Line 199: / Line 223: @@
 spr2.y.x2.x1
 cma.3 <- r2.y.x1x2 - (spr2.y.x1.x2 + spr2.y.x2.x1)
+bcd - bd
 cma.3
@@ Line 234: / Line 259: @@
 > #
 > rm(list=ls())
-> d <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")
+> df <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")
-> d
+> df
    bankaccount income famnum
             6    220      5
@@ Line 248: / Line 273: @@
            9    270      3
 >
-> colnames(d) <- c("y", "x1", "x2")
+> colnames(df) <- c("y", "x1", "x2")
-> d
+> df
     y  x1 x2
    6 220  5
@@ Line 264: / Line 289: @@
 > # x1 = 인컴
 > # x2 = 부양가족수
-> lm.y.x1 <- lm(y ~ x1, data=d)
+> lm.y.x1 <- lm(y ~ x1, data=df)
 > summary(lm.y.x1)
 Call:
-lm(formula = y ~ x1, data = d)
+lm(formula = y ~ x1, data = df)
 Residuals:
-    Min      1Q  Median      3Q     Max
+   Min     1Q Median     3Q    Max
--1.5189 -0.8969 -0.1297  1.0058  1.5800
+-1.519 -0.897 -0.130  1.006  1.580
 Coefficients:
             Estimate Std. Error t value Pr(>|t|)
-(Intercept) 3.617781   1.241518   2.914  0.01947 *
+(Intercept)  3.61778    1.24152    2.91    0.019 *
-x1          0.015269   0.004127   3.700  0.00605 **
+x1           0.01527    0.00413    3.70    0.006 **
 ---
-Signif. codes:
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 1.176 on 8 degrees of freedom
+Residual standard error: 1.18 on 8 degrees of freedom
-Multiple R-squared:  0.6311,	Adjusted R-squared:  0.585
+Multiple R-squared:  0.631,	Adjusted R-squared:  0.585
-F-statistic: 13.69 on 1 and 8 DF,  p-value: 0.006046
+F-statistic: 13.7 on 1 and 8 DF,  p-value: 0.00605
 > anova(lm.y.x1)
@@ Line 290: / Line 314: @@
 Response: y
-          Df Sum Sq Mean Sq F value   Pr(>F)
+          Df Sum Sq Mean Sq F value Pr(>F)
-x1         1 18.934 18.9336  13.687 0.006046 **
+x1         1   18.9   18.93    13.7  0.006 **
-Residuals  8 11.066  1.3833
+Residuals  8   11.1    1.38
 ---
-Signif. codes:
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+> cor(df$x1, df$y)^2
-> r.sq.y.x1 <- cor(x1,y)^2
+[1] 0.6311
-> rsq.y.x1<-summary(lm.y.x1)$r.squared
+> summary(lm.y.x1)$r.squared
+[1] 0.6311
 >
 >
-> lm.y.x2 <- lm(y ~ x2, data=d)
+> lm.y.x2 <- lm(y ~ x2, data=df)
 > summary(lm.y.x2)
 Call:
-lm(formula = y ~ x2, data = d)
+lm(formula = y ~ x2, data = df)
 Residuals:
-    Min      1Q  Median      3Q     Max
+   Min     1Q Median     3Q    Max
--1.2537 -0.8881 -0.4851  0.4963  2.5920
+-1.254 -0.888 -0.485  0.496  2.592
 Coefficients:
             Estimate Std. Error t value Pr(>|t|)
-(Intercept)  10.7910     1.1195   9.639 1.12e-05 ***
+(Intercept)   10.791      1.119    9.64  1.1e-05 ***
-x2           -0.8458     0.3117  -2.713   0.0265 *
+x2            -0.846      0.312   -2.71    0.027 *
 ---
-Signif. codes:
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 1.397 on 8 degrees of freedom
+Residual standard error: 1.4 on 8 degrees of freedom
-Multiple R-squared:  0.4793,	Adjusted R-squared:  0.4142
+Multiple R-squared:  0.479,	Adjusted R-squared:  0.414
-F-statistic: 7.363 on 1 and 8 DF,  p-value: 0.02651
+F-statistic: 7.36 on 1 and 8 DF,  p-value: 0.0265
 > anova(lm.y.x2)
@@ Line 326: / Line 350: @@
 Response: y
-          Df Sum Sq Mean Sq F value  Pr(>F)
+          Df Sum Sq Mean Sq F value Pr(>F)
-x2         1 14.378 14.3781  7.3631 0.02651 *
+x2         1   14.4   14.38    7.36  0.027 *
-Residuals  8 15.622  1.9527
+Residuals  8   15.6    1.95
 ---
-Signif. codes:
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+> cor(df$x2, df$y)^2
-> rsq.y.x2 <- cor(x2,y)^2
+[1] 0.4793
-> rsq.y.x2 <- summary(lm.y.x2)$r.squared
+> summary(lm.y.x2)$r.squared
+[1] 0.4793
 >
 >
-> lm.y.x1x2 <- lm(y ~ x1+x2, data=d)
+> lm.y.x1x2 <- lm(y ~ x1+x2, data=df)
 > summary(lm.y.x1x2)
 Call:
-lm(formula = y ~ x1 + x2, data = d)
+lm(formula = y ~ x1 + x2, data = df)
 Residuals:
-    Min      1Q  Median      3Q     Max
+   Min     1Q Median     3Q    Max
--1.2173 -0.5779 -0.1515  0.6642  1.1906
+-1.217 -0.578 -0.151  0.664  1.191
 Coefficients:
-             Estimate Std. Error t value Pr(>|t|)
+            Estimate Std. Error t value Pr(>|t|)
-(Intercept)  6.399103   1.516539   4.220  0.00394 **
+(Intercept)  6.39910    1.51654    4.22   0.0039 **
-x1           0.011841   0.003561   3.325  0.01268 *
+x1           0.01184    0.00356    3.33   0.0127 *
-x2          -0.544727   0.226364  -2.406  0.04702 *
+x2          -0.54473    0.22636   -2.41   0.0470 *
 ---
-Signif. codes:
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 0.9301 on 7 degrees of freedom
+Residual standard error: 0.93 on 7 degrees of freedom
-Multiple R-squared:  0.7981,	Adjusted R-squared:  0.7404
+Multiple R-squared:  0.798,	Adjusted R-squared:  0.74
-F-statistic: 13.84 on 2 and 7 DF,  p-value: 0.003696
+F-statistic: 13.8 on 2 and 7 DF,  p-value: 0.0037
 > anova(lm.y.x1x2)
@@ Line 363: / Line 387: @@
 Response: y
-          Df  Sum Sq Mean Sq F value   Pr(>F)
+          Df Sum Sq Mean Sq F value Pr(>F)
-x1         1 18.9336 18.9336 21.8841 0.002266 **
+x1         1  18.93   18.93   21.88 0.0023 **
-x2         1  5.0101  5.0101  5.7909 0.047020 *
+x2         1   5.01    5.01    5.79 0.0470 *
-Residuals  7  6.0562  0.8652
+Residuals  7   6.06    0.87
 ---
-Signif. codes:
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+> bcd <- summary(lm.y.x1x2)$r.squared
-> rsq.y.x1x2<- summary(lm.y.x1x2)$r.squared
+> bcd
-> rsq.y.x1 + rsq.y.x2 - rsq.y.x1x2
+[1] 0.7981
-[1] 0.3122658
+> bc.cd <- summary(lm.y.x1)$r.squared + summary(lm.y.x2)$r.squared
+> bc.cd
+[1] 1.11
+> bc.cd - bcd  # note that this is the amount that shared by the two IVs
+[1] 0.3123
+>
 >
 > lm.y.x1x2$coefficient
 (Intercept)          x1          x2
-.39910298  0.01184145 -0.54472725
+.39910     0.01184    -0.54473
-> # y.hat = 6.399103 + (0.01184145)*x1 + (−0.54472725)*x2
+> # y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2
 > a <- lm.y.x1x2$coefficient[1]
 > b1 <- lm.y.x1x2$coefficient[2]
@@ Line 383: / Line 412: @@
 > a
 (Intercept)
-.399103
+.399
 > b1
-        x1
+     x1
-.01184145
+.01184
 > b2
-        x2
+     x2
--0.5447272
+-0.5447
 >
-> y.pred <- a + (b1 * x1) + (b2 * x2)
+> y.pred <- a + (b1 * df$x1) + (b2 * df$x2)
 > y.pred
- [1]  6.280586  5.380616  7.843699  6.588485  9.217328
+ [1]  6.281  5.381  7.844  6.588  9.217 10.023  7.252  9.809  9.644  7.962
- [6] 10.022506  7.251626  9.809401  9.643641  7.962113
 > # or
 > y.pred2 <- predict(lm.y.x1x2)
@@ Line 401: / Line 429: @@
 TRUE TRUE TRUE TRUE TRUE TRUE
 >
-> y.real <- y
+> y.real <- df$y
 > y.real
  [1]  6  5  7  7  8 10  8 11  9  9
-> y.mean <- mean(y)
+> y.mean <- mean(df$y)
 > y.mean
 [1] 8
 >
+> deviation.score <- df$y - y.mean
+> ds <- deviation.score
 > res <- y.real - y.pred
 > reg <- y.pred - y.mean
@@ Line 414: / Line 444: @@
 > # remember y is sum of res + reg + y.mean
 > y2 <- res + reg + y.mean
-> y==y2
+> df$y==y2
  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 >
+> ss.tot <- sum(ds^2)
 > ss.res <- sum(res^2)
 > ss.reg <- sum(reg^2)
 >
-> ss.tot <- var(y) * (length(y)-1)
+> ss.tot2 <- var(df$y) * (length(df$y)-1)
 > ss.tot
+[1] 30
+> ss.tot2
 [1] 30
 > ss.res
-[1] 6.056235
+[1] 6.056
 > ss.reg
-[1] 23.94376
+[1] 23.94
 > ss.res+ss.reg
 [1] 30
 >
 > k <- 3 # # of parameters a, b1, b2
-> df.tot <- length(y)-1
+> df.tot <- length(df$y)-1
 > df.reg <- k - 1
 > df.res <- df.tot - df.reg
@@ Line 438: / Line 471: @@
 > ms.res <- ss.res/df.res
 > ms.reg
-[1] 11.97188
+[1] 11.97
 > ms.res
-[1] 0.8651765
+[1] 0.8652
 > f.val <- ms.reg/ms.res
 > f.val
-[1] 13.8375
+[1] 13.84
 > p.val <- pf(f.val, df.reg, df.res, lower.tail = F)
 > p.val
-[1] 0.003696453
+[1] 0.003696
 >
 > # double check
@@ Line 452: / Line 485: @@
 Call:
-lm(formula = y ~ x1 + x2, data = d)
+lm(formula = y ~ x1 + x2, data = df)
 Residuals:
-    Min      1Q  Median      3Q     Max
+   Min     1Q Median     3Q    Max
--1.2173 -0.5779 -0.1515  0.6642  1.1906
+-1.217 -0.578 -0.151  0.664  1.191
 Coefficients:
-             Estimate Std. Error t value Pr(>|t|)
+            Estimate Std. Error t value Pr(>|t|)
-(Intercept)  6.399103   1.516539   4.220  0.00394 **
+(Intercept)  6.39910    1.51654    4.22   0.0039 **
-x1           0.011841   0.003561   3.325  0.01268 *
+x1           0.01184    0.00356    3.33   0.0127 *
-x2          -0.544727   0.226364  -2.406  0.04702 *
+x2          -0.54473    0.22636   -2.41   0.0470 *
 ---
-Signif. codes:
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 0.9301 on 7 degrees of freedom
+Residual standard error: 0.93 on 7 degrees of freedom
-Multiple R-squared:  0.7981,	Adjusted R-squared:  0.7404
+Multiple R-squared:  0.798,	Adjusted R-squared:  0.74
-F-statistic: 13.84 on 2 and 7 DF,  p-value: 0.003696
+F-statistic: 13.8 on 2 and 7 DF,  p-value: 0.0037
 > anova(lm.y.x1x2)
@@ Line 475: / Line 507: @@
 Response: y
-          Df  Sum Sq Mean Sq F value   Pr(>F)
+          Df Sum Sq Mean Sq F value Pr(>F)
-x1         1 18.9336 18.9336 21.8841 0.002266 **
+x1         1  18.93   18.93   21.88 0.0023 **
-x2         1  5.0101  5.0101  5.7909 0.047020 *
+x2         1   5.01    5.01    5.79 0.0470 *
-Residuals  7  6.0562  0.8652
+Residuals  7   6.06    0.87
 ---
-Signif. codes:
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+>
+> summary(lm(y~x2+x1, data=df))
+Call:
+lm(formula = y ~ x2 + x1, data = df)
+Residuals:
+   Min     1Q Median     3Q    Max
+-1.217 -0.578 -0.151  0.664  1.191
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)
+(Intercept)  6.39910    1.51654    4.22   0.0039 **
+x2          -0.54473    0.22636   -2.41   0.0470 *
+x1           0.01184    0.00356    3.33   0.0127 *
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 0.93 on 7 degrees of freedom
+Multiple R-squared:  0.798,	Adjusted R-squared:  0.74
+F-statistic: 13.8 on 2 and 7 DF,  p-value: 0.0037
+> anova(lm(y~x2+x1, data=df))
+Analysis of Variance Table
+Response: y
+          Df Sum Sq Mean Sq F value Pr(>F)
+x2         1  14.38   14.38    16.6 0.0047 **
+x1         1   9.57    9.57    11.1 0.0127 *
+Residuals  7   6.06    0.87
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+>
 > # note on 2 t-tests in summary
 > # anova에서의 x1, x2에 대한 테스트와
@@ Line 493: / Line 557: @@
 > # 테스트하기 때문에 그러함
 >
+> # 또한 anova test에서 두번째 IV의 F값은
+> # summary(lm)에서 두번 째 IV의 t값의 제곱값
+> # 임을 이해. 이는 두번 째 IV의 설명력을 나
+> # 타내는 부분이 lm과 anova 모두 같기 때문
+> # 반면에 첫번째 IV의 경우에는 lm 분석 때에는
+> # 고유의 설명력만을 가지고 (semi-partial cor^2)
+> # 판단을 하는 반면에, anova는 x2와 공유하는
+> # 설명력도 포함해서 분석하기 때문에 t값의
+> # 제곱이 F값이 되지 못함
+>
+> # beta에 대한 설명
 > # beta coefficient (standardized b)
 > # beta <- b * (sd(x)/sd(y))
-> beta1 <- b1 * (sd(x1)/sd(y))
+> beta1 <- b1 * (sd(df$x1)/sd(df$y))
-> beta2 <- b2 * (sd(x2)/sd(y))
+> beta2 <- b2 * (sd(df$x2)/sd(df$y))
 > beta1
-      x1
+    x1
-.616097
+.6161
 > beta2
-        x2
+     x2
--0.4458785
+-0.4459
 >
 > # install.packages("lm.beta")
@@ Line 509: / Line 584: @@
 Call:
-lm(formula = y ~ x1 + x2, data = d)
+lm(formula = y ~ x1 + x2, data = df)
 Standardized Coefficients::
 (Intercept)          x1          x2
-         NA   0.6160970  -0.4458785
+         NA      0.6161     -0.4459
 >
@@ Line 521: / Line 596: @@
 > # understand with diagrams first
 > # then calculate with r
-> lm.tmp.1 <- lm(x2~x1, data=d)
+> lm.tmp.1 <- lm(x2~x1, data=df)
 > res.x2.x1 <- lm.tmp.1$residuals
 >
-> lm.tmp.2 <- lm(y~x1, data=d)
+> lm.tmp.2 <- lm(y~x1, data=df)
 > res.y.x1 <- lm.tmp.2$residuals
 >
-> lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d)
+> lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df)
 > summary(lm.tmp.3)
 Call:
-lm(formula = res.y.x1 ~ res.x2.x1, data = d)
+lm(formula = res.y.x1 ~ res.x2.x1, data = df)
 Residuals:
-    Min      1Q  Median      3Q     Max
+   Min     1Q Median     3Q    Max
--1.2173 -0.5779 -0.1515  0.6642  1.1906
+-1.217 -0.578 -0.151  0.664  1.191
 Coefficients:
-              Estimate Std. Error t value Pr(>|t|)
+             Estimate Std. Error t value Pr(>|t|)
-(Intercept)  6.281e-18  2.751e-01   0.000    1.000
+(Intercept)  6.28e-18   2.75e-01    0.00    1.000
-res.x2.x1   -5.447e-01  2.117e-01  -2.573    0.033 *
+res.x2.x1   -5.45e-01   2.12e-01   -2.57    0.033 *
 ---
-Signif. codes:
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 0.8701 on 8 degrees of freedom
+Residual standard error: 0.87 on 8 degrees of freedom
-Multiple R-squared:  0.4527,	Adjusted R-squared:  0.3843
+Multiple R-squared:  0.453,	Adjusted R-squared:  0.384
-F-statistic: 6.618 on 1 and 8 DF,  p-value: 0.033
+F-statistic: 6.62 on 1 and 8 DF,  p-value: 0.033
->
+> summary(lm.tmp.3)$r.squared
+[1] 0.4527
+> sqrt(summary(lm.tmp.3)$r.squared)
+[1] 0.6729
 > # install.packages("ppcor")
 > library(ppcor)
-> partial.r <- pcor.test(y, x2, x1)
+> partial.r <- pcor.test(df$y, df$x2, df$x1)
-> partial.r
-   estimate    p.value statistic  n gp  Method
--0.672856 0.04702022 -2.406425 10  1 pearson
 > str(partial.r)
 'data.frame':	1 obs. of  6 variables:
@@ Line 564: / Line 638: @@
  $ gp       : num 1
  $ Method   : chr "pearson"
+> partial.r$estimate
+[1] -0.6729
 > summary(lm.tmp.3)
 Call:
-lm(formula = res.y.x1 ~ res.x2.x1, data = d)
+lm(formula = res.y.x1 ~ res.x2.x1, data = df)
 Residuals:
-    Min      1Q  Median      3Q     Max
+   Min     1Q Median     3Q    Max
--1.2173 -0.5779 -0.1515  0.6642  1.1906
+-1.217 -0.578 -0.151  0.664  1.191
 Coefficients:
-              Estimate Std. Error t value Pr(>|t|)
+             Estimate Std. Error t value Pr(>|t|)
-(Intercept)  6.281e-18  2.751e-01   0.000    1.000
+(Intercept)  6.28e-18   2.75e-01    0.00    1.000
-res.x2.x1   -5.447e-01  2.117e-01  -2.573    0.033 *
+res.x2.x1   -5.45e-01   2.12e-01   -2.57    0.033 *
 ---
-Signif. codes:
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 0.8701 on 8 degrees of freedom
+Residual standard error: 0.87 on 8 degrees of freedom
-Multiple R-squared:  0.4527,	Adjusted R-squared:  0.3843
+Multiple R-squared:  0.453,	Adjusted R-squared:  0.384
-F-statistic: 6.618 on 1 and 8 DF,  p-value: 0.033
+F-statistic: 6.62 on 1 and 8 DF,  p-value: 0.033
 > summary(lm.tmp.3)$r.square
-[1] 0.4527352
+[1] 0.4527
 > partial.r$estimate^2
-[1] 0.4527352
+[1] 0.4527
 >
 >
 > # x1's own explanation?
-> lm.tmp.4 <- lm(x1~x2, data=d)
+> lm.tmp.4 <- lm(x1~x2, data=df)
 > res.x1.x2 <- lm.tmp.4$residuals
 >
-> lm.tmp.5 <- lm(y~x2, data=d)
+> lm.tmp.5 <- lm(y~x2, data=df)
 > res.y.x2 <- lm.tmp.5$residuals
 >
-> lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d)
+> lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df)
 > summary(lm.tmp.6)
 Call:
-lm(formula = res.y.x2 ~ res.x1.x2, data = d)
+lm(formula = res.y.x2 ~ res.x1.x2, data = df)
 Residuals:
-    Min      1Q  Median      3Q     Max
+   Min     1Q Median     3Q    Max
--1.2173 -0.5779 -0.1515  0.6642  1.1906
+-1.217 -0.578 -0.151  0.664  1.191
 Coefficients:
-             Estimate Std. Error t value Pr(>|t|)
+            Estimate Std. Error t value Pr(>|t|)
-(Intercept) 1.330e-17  2.751e-01   0.000  1.00000
+(Intercept) 1.33e-17   2.75e-01    0.00   1.0000
-res.x1.x2   1.184e-02  3.331e-03   3.555  0.00746 **
+res.x1.x2   1.18e-02   3.33e-03    3.55   0.0075 **
 ---
-Signif. codes:
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 0.8701 on 8 degrees of freedom
+Residual standard error: 0.87 on 8 degrees of freedom
-Multiple R-squared:  0.6123,	Adjusted R-squared:  0.5639
+Multiple R-squared:  0.612,	Adjusted R-squared:  0.564
-F-statistic: 12.64 on 1 and 8 DF,  p-value: 0.007458
+F-statistic: 12.6 on 1 and 8 DF,  p-value: 0.00746
 >
-> partial.r <- pcor.test(y, x1, x2)
+> partial.r <- pcor.test(df$y, df$x1, df$x2)
 > str(partial.r)
 'data.frame':	1 obs. of  6 variables:
@@ Line 631: / Line 705: @@
  $ Method   : chr "pearson"
 > partial.r$estimate # this is partial correlation, not pr^2
-[1] 0.7825112
+[1] 0.7825
 > # in order to get pr2, you should ^2
 > partial.r$estimate^2
-[1] 0.6123238
+[1] 0.6123
+> summary(lm.tmp.6)$r.squared
+[1] 0.6123
 >
 > #######################################################
-> #
 > # semipartial correlation coefficient and spr2
 > #
-> spr.y.x2.x1 <- spcor.test(y,x2,x1)
+> spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1)
-> spr.y.x1.x2 <- spcor.test(y,x1,x2)
+> spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2)
 > spr.y.x2.x1
-    estimate   p.value statistic  n gp  Method
+  estimate p.value statistic  n gp  Method
--0.4086619 0.2748117 -1.184655 10  1 pearson
+  -0.4087  0.2748    -1.185 10  1 pearson
 > spr.y.x1.x2
-   estimate  p.value statistic  n gp  Method
+  estimate p.value statistic  n gp  Method
-0.5646726 0.113182  1.810198 10  1 pearson
+   0.5647  0.1132      1.81 10  1 pearson
 > spr2.y.x2.x1 <- spr.y.x2.x1$estimate^2
 > spr2.y.x1.x2 <- spr.y.x1.x2$estimate^2
 > spr2.y.x2.x1
-[1] 0.1670045
+[1] 0.167
 > spr2.y.x1.x2
-[1] 0.3188552
+[1] 0.3189
 >
-> lm.tmp.7 <- lm(y ~ res.x2.x1, data = d)
+> lm.tmp.7 <- lm(y ~ res.x2.x1, data=df)
 > summary(lm.tmp.7)
 Call:
-lm(formula = y ~ res.x2.x1, data = d)
+lm(formula = y ~ res.x2.x1, data = df)
 Residuals:
-    Min      1Q  Median      3Q     Max
+   Min     1Q Median     3Q    Max
--1.8617 -1.1712 -0.4940  0.5488  3.0771
+-1.862 -1.171 -0.494  0.549  3.077
 Coefficients:
             Estimate Std. Error t value Pr(>|t|)
-(Intercept)   8.0000     0.5589  14.314 5.54e-07 ***
+(Intercept)    8.000      0.559   14.31  5.5e-07 ***
-res.x2.x1    -0.5447     0.4301  -1.266    0.241
+res.x2.x1     -0.545      0.430   -1.27     0.24
 ---
-Signif. codes:
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 1.767 on 8 degrees of freedom
+Residual standard error: 1.77 on 8 degrees of freedom
-Multiple R-squared:  0.167,	Adjusted R-squared:  0.06288
+Multiple R-squared:  0.167,	Adjusted R-squared:  0.0629
-F-statistic: 1.604 on 1 and 8 DF,  p-value: 0.241
+F-statistic:  1.6 on 1 and 8 DF,  p-value: 0.241
 > spr2.y.x2.x1
-[1] 0.1670045
+[1] 0.167
 >
-> lm.tmp.8 <- lm(y~res.x1.x2, data = d)
+> lm.tmp.8 <- lm(y~res.x1.x2, data=df)
 > summary(lm.tmp.8)
 Call:
-lm(formula = y ~ res.x1.x2, data = d)
+lm(formula = y ~ res.x1.x2, data = df)
 Residuals:
-    Min      1Q  Median      3Q     Max
+   Min     1Q Median     3Q    Max
--2.6642 -0.6084 -0.1492  1.2192  2.2901
+-2.664 -0.608 -0.149  1.219  2.290
 Coefficients:
             Estimate Std. Error t value Pr(>|t|)
-(Intercept) 8.000000   0.505400  15.829 2.54e-07 ***
+(Intercept)  8.00000    0.50540   15.83  2.5e-07 ***
-res.x1.x2   0.011841   0.006119   1.935    0.089 .
+res.x1.x2    0.01184    0.00612    1.94    0.089 .
 ---
-Signif. codes:
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 1.598 on 8 degrees of freedom
+Residual standard error: 1.6 on 8 degrees of freedom
-Multiple R-squared:  0.3189,	Adjusted R-squared:  0.2337
+Multiple R-squared:  0.319,	Adjusted R-squared:  0.234
-F-statistic: 3.745 on 1 and 8 DF,  p-value: 0.08901
+F-statistic: 3.74 on 1 and 8 DF,  p-value: 0.089
 > spr2.y.x1.x2
-[1] 0.3188552
+[1] 0.3189
 >
+> bcd # remember bcd in the above?
+[1] 0.7981
+> bd <- spr2.y.x2.x1 + spr2.y.x1.x2
+> bd
+[1] 0.4859
+> bcd - bd
+[1] 0.3123
 >
 > #######################################################
@@ Line 712: / Line 792: @@
 Call:
-lm(formula = y ~ x2, data = d)
+lm(formula = y ~ x2, data = df)
 Residuals:
-    Min      1Q  Median      3Q     Max
+   Min     1Q Median     3Q    Max
--1.2537 -0.8881 -0.4851  0.4963  2.5920
+-1.254 -0.888 -0.485  0.496  2.592
 Coefficients:
             Estimate Std. Error t value Pr(>|t|)
-(Intercept)  10.7910     1.1195   9.639 1.12e-05 ***
+(Intercept)   10.791      1.119    9.64  1.1e-05 ***
-x2           -0.8458     0.3117  -2.713   0.0265 *
+x2            -0.846      0.312   -2.71    0.027 *
 ---
-Signif. codes:
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 1.397 on 8 degrees of freedom
+Residual standard error: 1.4 on 8 degrees of freedom
-Multiple R-squared:  0.4793,	Adjusted R-squared:  0.4142
+Multiple R-squared:  0.479,	Adjusted R-squared:  0.414
-F-statistic: 7.363 on 1 and 8 DF,  p-value: 0.02651
+F-statistic: 7.36 on 1 and 8 DF,  p-value: 0.0265
 > all.x2 <- summary(lm.y.x2)$r.squared
 > all.x2
-[1] 0.4792703
+[1] 0.4793
 > spr2.y.x2.x1
-[1] 0.1670045
+[1] 0.167
 > cma.1 <- all.x2 - spr2.y.x2.x1
 > cma.1
-[1] 0.3122658
+[1] 0.3123
 >
 > # 2.
@@ Line 743: / Line 822: @@
 Call:
-lm(formula = y ~ x1, data = d)
+lm(formula = y ~ x1, data = df)
 Residuals:
-    Min      1Q  Median      3Q     Max
+   Min     1Q Median     3Q    Max
--1.5189 -0.8969 -0.1297  1.0058  1.5800
+-1.519 -0.897 -0.130  1.006  1.580
 Coefficients:
             Estimate Std. Error t value Pr(>|t|)
-(Intercept) 3.617781   1.241518   2.914  0.01947 *
+(Intercept)  3.61778    1.24152    2.91    0.019 *
-x1          0.015269   0.004127   3.700  0.00605 **
+x1           0.01527    0.00413    3.70    0.006 **
 ---
-Signif. codes:
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 1.176 on 8 degrees of freedom
+Residual standard error: 1.18 on 8 degrees of freedom
-Multiple R-squared:  0.6311,	Adjusted R-squared:  0.585
+Multiple R-squared:  0.631,	Adjusted R-squared:  0.585
-F-statistic: 13.69 on 1 and 8 DF,  p-value: 0.006046
+F-statistic: 13.7 on 1 and 8 DF,  p-value: 0.00605
 > all.x1 <- summary(lm.y.x1)$r.squared
 > all.x1
-[1] 0.631121
+[1] 0.6311
 > spr2.y.x1.x2
-[1] 0.3188552
+[1] 0.3189
 > cma.2 <- all.x1 - spr2.y.x1.x2
 > cma.2
-[1] 0.3122658
+[1] 0.3123
 >
 > # OR 3.
@@ Line 774: / Line 852: @@
 Call:
-lm(formula = y ~ x1 + x2, data = d)
+lm(formula = y ~ x1 + x2, data = df)
 Residuals:
-    Min      1Q  Median      3Q     Max
+   Min     1Q Median     3Q    Max
--1.2173 -0.5779 -0.1515  0.6642  1.1906
+-1.217 -0.578 -0.151  0.664  1.191
 Coefficients:
-             Estimate Std. Error t value Pr(>|t|)
+            Estimate Std. Error t value Pr(>|t|)
-(Intercept)  6.399103   1.516539   4.220  0.00394 **
+(Intercept)  6.39910    1.51654    4.22   0.0039 **
-x1           0.011841   0.003561   3.325  0.01268 *
+x1           0.01184    0.00356    3.33   0.0127 *
-x2          -0.544727   0.226364  -2.406  0.04702 *
+x2          -0.54473    0.22636   -2.41   0.0470 *
 ---
-Signif. codes:
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 0.9301 on 7 degrees of freedom
+Residual standard error: 0.93 on 7 degrees of freedom
-Multiple R-squared:  0.7981,	Adjusted R-squared:  0.7404
+Multiple R-squared:  0.798,	Adjusted R-squared:  0.74
-F-statistic: 13.84 on 2 and 7 DF,  p-value: 0.003696
+F-statistic: 13.8 on 2 and 7 DF,  p-value: 0.0037
 > r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
 > r2.y.x1x2
-[1] 0.7981255
+[1] 0.7981
 > spr2.y.x1.x2
-[1] 0.3188552
+[1] 0.3189
 > spr2.y.x2.x1
-[1] 0.1670045
+[1] 0.167
 > cma.3 <- r2.y.x1x2 - (spr2.y.x1.x2 + spr2.y.x2.x1)
+> bcd - bd
+[1] 0.3123
 > cma.3
-[1] 0.3122658
+[1] 0.3123
 >
 > cma.1
-[1] 0.3122658
+[1] 0.3123
 > cma.2
-[1] 0.3122658
+[1] 0.3123
 > cma.3
-[1] 0.3122658
+[1] 0.3123
 >
 > # OR 애초에 simple regression과 multiple
@@ Line 817: / Line 896: @@
 > r2.y.x2 <- summary(lm.y.x2)$r.square
 > r2.y.x1
-[1] 0.631121
+[1] 0.6311
 > r2.y.x2
-[1] 0.4792703
+[1] 0.4793
 > r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
 > r2.y.x1x2
-[1] 0.7981255
+[1] 0.7981
 > cma.4 <- r2.y.x1 + r2.y.x2 - r2.y.x1x2
 > cma.4
-[1] 0.3122658
+[1] 0.3123
 >
 > # Note that sorting out unique and common
@@ Line 837: / Line 916: @@
 >
 >
+</code>
+====== explanation. added ======
+{{:c:ms:2025:schedule:pasted:20250609-074413.png?400}}
+<code>
+# ex.
+# resid(lm(y~x1, data=df)) = bc / delta.y
+# resid(lm(y~x2, data=df)) = cd / delta.y
+# resid(lm(y~x1+x2, data=df)) = bcd / delta.y
+# b / delta.y = ?
+# ce / delta.x2 = ?
+</code>
+<code>
+# exp.added
+spcor.test(df$y, df$x1, df$x2)
+spcor.test(df$y, df$x1, df$x2)$estimate
+spcor.test(df$y, df$x1, df$x2)$estimate^2
+spcor.test(df$y, df$x2, df$x1)$estimate^2
+summary(lm(y~x1+x2, data=df))$r.square
+b <- spcor.test(df$y, df$x1, df$x2)$estimate^2
+d <- spcor.test(df$y, df$x2, df$x1)$estimate^2
+bcd <- summary(lm(y~x1+x2, data=df))$r.square
+summary(lm(df$y~df$x1+df$x2, data=df))$r.square -
+  (spcor.test(df$y, df$x1, df$x2)$estimate^2 +
+     spcor.test(df$y, df$x2, df$x1)$estimate^2)
+bcd - (b + d)
 </code>