Differences

This shows you the differences between two versions of the page.

--- c:ms:2025:schedule:w13.lecture.note [2025/06/02 00:08] – hkimscil
+++ c:ms:2025:schedule:w13.lecture.note [2025/06/08 23:51] (current) – [output] hkimscil
@@ Line 5: / Line 5: @@
 #
 rm(list=ls())
-d <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")
+df <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")
-d
+df
-colnames(d) <- c("y", "x1", "x2")
+colnames(df) <- c("y", "x1", "x2")
-d
+df
 # y = 통장갯수
 # x1 = 인컴
 # x2 = 부양가족수
-lm.y.x1 <- lm(y ~ x1, data=d)
+lm.y.x1 <- lm(y ~ x1, data=df)
 summary(lm.y.x1)
 anova(lm.y.x1)
+cor(df$x1, df$y)^2
+summary(lm.y.x1)$r.squared
-lm.y.x2 <- lm(y ~ x2, data=d)
+lm.y.x2 <- lm(y ~ x2, data=df)
 summary(lm.y.x2)
 anova(lm.y.x2)
+cor(df$x2, df$y)^2
+summary(lm.y.x2)$r.squared
-lm.y.x1x2 <- lm(y ~ x1+x2, data=d)
+lm.y.x1x2 <- lm(y ~ x1+x2, data=df)
 summary(lm.y.x1x2)
 anova(lm.y.x1x2)
+bcd <- summary(lm.y.x1x2)$r.squared
+bcd
+bc.cd <- summary(lm.y.x1)$r.squared + summary(lm.y.x2)$r.squared
+bc.cd
+bc.cd - bcd  # note that this is the amount that shared by the two IVs
 lm.y.x1x2$coefficient
-# y.hat = 6.399103 + (0.01184145)*x1 + (−0.54472725)*x2
+# y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2
 a <- lm.y.x1x2$coefficient[1]
 b1 <- lm.y.x1x2$coefficient[2]
@@ Line 34: / Line 46: @@
 b2
-y.pred <- a + (b1 * x1) + (b2 * x2)
+y.pred <- a + (b1 * df$x1) + (b2 * df$x2)
 y.pred
 # or
@@ Line 40: / Line 52: @@
 head(y.pred == y.pred2)
-y.real <- y
+y.real <- df$y
 y.real
-y.mean <- mean(y)
+y.mean <- mean(df$y)
 y.mean
+deviation.score <- df$y - y.mean
+ds <- deviation.score
 res <- y.real - y.pred
 reg <- y.pred - y.mean
@@ Line 50: / Line 64: @@
 # remember y is sum of res + reg + y.mean
 y2 <- res + reg + y.mean
-y==y2
+df$y==y2
+ss.tot <- sum(ds^2)
 ss.res <- sum(res^2)
 ss.reg <- sum(reg^2)
-ss.tot <- var(y) * (length(y)-1)
+ss.tot2 <- var(df$y) * (length(df$y)-1)
 ss.tot
+ss.tot2
 ss.res
 ss.reg
@@ Line 62: / Line 78: @@
 k <- 3 # # of parameters a, b1, b2
-df.tot <- length(y)-1
+df.tot <- length(df$y)-1
 df.reg <- k - 1
 df.res <- df.tot - df.reg
@@ Line 78: / Line 94: @@
 summary(lm.y.x1x2)
 anova(lm.y.x1x2)
+summary(lm(y~x2+x1, data=df))
+anova(lm(y~x2+x1, data=df))
 # note on 2 t-tests in summary
 # anova에서의 x1, x2에 대한 테스트와
@@ Line 89: / Line 109: @@
 # 테스트하기 때문에 그러함
+# 또한 anova test에서 두번째 IV의 F값은
+# summary(lm)에서 두번 째 IV의 t값의 제곱값
+# 임을 이해. 이는 두번 째 IV의 설명력을 나
+# 타내는 부분이 lm과 anova 모두 같기 때문
+# 반면에 첫번째 IV의 경우에는 lm 분석 때에는
+# 고유의 설명력만을 가지고 (semi-partial cor^2)
+# 판단을 하는 반면에, anova는 x2와 공유하는
+# 설명력도 포함해서 분석하기 때문에 t값의
+# 제곱이 F값이 되지 못함
+# beta에 대한 설명
 # beta coefficient (standardized b)
 # beta <- b * (sd(x)/sd(y))
-beta1 <- b1 * (sd(x1)/sd(y))
+beta1 <- b1 * (sd(df$x1)/sd(df$y))
-beta2 <- b2 * (sd(x2)/sd(y))
+beta2 <- b2 * (sd(df$x2)/sd(df$y))
 beta1
 beta2
@@ Line 105: / Line 136: @@
 # understand with diagrams first
 # then calculate with r
-lm.tmp.1 <- lm(x2~x1, data=d)
+lm.tmp.1 <- lm(x2~x1, data=df)
 res.x2.x1 <- lm.tmp.1$residuals
-lm.tmp.2 <- lm(y~x1, data=d)
+lm.tmp.2 <- lm(y~x1, data=df)
 res.y.x1 <- lm.tmp.2$residuals
-lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d)
+lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df)
 summary(lm.tmp.3)
+summary(lm.tmp.3)$r.squared
+sqrt(summary(lm.tmp.3)$r.squared)
 # install.packages("ppcor")
 library(ppcor)
-partial.r <- pcor.test(y, x2, x1)
+partial.r <- pcor.test(df$y, df$x2, df$x1)
-partial.r
 str(partial.r)
+partial.r$estimate
 summary(lm.tmp.3)
 summary(lm.tmp.3)$r.square
@@ Line 125: / Line 157: @@
 # x1's own explanation?
-lm.tmp.4 <- lm(x1~x2, data=d)
+lm.tmp.4 <- lm(x1~x2, data=df)
 res.x1.x2 <- lm.tmp.4$residuals
-lm.tmp.5 <- lm(y~x2, data=d)
+lm.tmp.5 <- lm(y~x2, data=df)
 res.y.x2 <- lm.tmp.5$residuals
-lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d)
+lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df)
 summary(lm.tmp.6)
-partial.r <- pcor.test(y, x1, x2)
+partial.r <- pcor.test(df$y, df$x1, df$x2)
 str(partial.r)
 partial.r$estimate # this is partial correlation, not pr^2
 # in order to get pr2, you should ^2
 partial.r$estimate^2
+summary(lm.tmp.6)$r.squared
 #######################################################
-#
 # semipartial correlation coefficient and spr2
 #
-spr.1 <- spcor.test(y,x2,x1)
+spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1)
-spr.2 <- spcor.test(y,x1,x2)
+spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2)
-spr.1
+spr.y.x2.x1
-spr.2
+spr.y.x1.x2
-spr.1$estimate^2
+spr2.y.x2.x1 <- spr.y.x2.x1$estimate^2
-spr.2$estimate^2
+spr2.y.x1.x2 <- spr.y.x1.x2$estimate^2
+spr2.y.x2.x1
+spr2.y.x1.x2
-lm.tmp.7 <- lm(y ~ res.x2.x1, data = d)
+lm.tmp.7 <- lm(y ~ res.x2.x1, data=df)
 summary(lm.tmp.7)
-spr.1$estimate^2
+spr2.y.x2.x1
-lm.tmp.8 <- lm(y~res.x1.x2, data = d)
+lm.tmp.8 <- lm(y~res.x1.x2, data=df)
 summary(lm.tmp.8)
-spr.2$estimate^2
+spr2.y.x1.x2
+bcd # remember bcd in the above?
+bd <- spr2.y.x2.x1 + spr2.y.x1.x2
+bd
+bcd - bd
 #######################################################
@@ Line 164: / Line 203: @@
 summary(lm.y.x2)
 all.x2 <- summary(lm.y.x2)$r.squared
-sp.x2 <- spr.1$estimate^2
 all.x2
-sp.x2
+spr2.y.x2.x1
-cma.1 <- all.x2 - sp.x2
+cma.1 <- all.x2 - spr2.y.x2.x1
 cma.1
@@ Line 173: / Line 211: @@
 summary(lm.y.x1)
 all.x1 <- summary(lm.y.x1)$r.squared
-sp.x1 <- spr.2$estimate^2
 all.x1
-sp.x1
+spr2.y.x1.x2
-cma.2 <- all.x1 - sp.x1
+cma.2 <- all.x1 - spr2.y.x1.x2
 cma.2
@@ Line 183: / Line 220: @@
 r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
 r2.y.x1x2
-sp.x1
+spr2.y.x1.x2
-sp.x2
+spr2.y.x2.x1
-cma.3 <- r2.y.x1x2 - (sp.x1 + sp.x2)
+cma.3 <- r2.y.x1x2 - (spr2.y.x1.x2 + spr2.y.x2.x1)
+bcd - bd
 cma.3
@@ Line 195: / Line 233: @@
 # regression에서 얻은 R2을 가지고
 # 공통설명력을 알아볼 수도 있었다.
-r2.x1 <- summary(lm.y.x1)$r.square
+r2.y.x1 <- summary(lm.y.x1)$r.square
-r2.x2 <- summary(lm.y.x2)$r.square
+r2.y.x2 <- summary(lm.y.x2)$r.square
-r2.x1x2 <- summary(lm.y.x1x2)$r.square
+r2.y.x1
-r2.x1 + r2.x2 - r2.x1x2
+r2.y.x2
+r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
+r2.y.x1x2
+cma.4 <- r2.y.x1 + r2.y.x2 - r2.y.x1x2
+cma.4
 # Note that sorting out unique and common
@@ Line 209: / Line 251: @@
 #############################################
-#############################################
-# scratch
-#############################################
-lm.x1x2 <- lm(x1~x2, data = d)
-lm.x2x1 <- lm(x2~x1, data = d)
-summary(lm.y.x1)
+</code>
-summary(lm.y.x2)
+====== output ======
-summary(lm.y.x1x2)
+<code>
+> # multiple regression: a simple e.g.
+> #
+> #
+> rm(list=ls())
+> df <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")
+> df
+   bankaccount income famnum
+            6    220      5
+            5    190      6
+            7    260      3
+            7    200      4
+            8    330      2
+           10    490      4
+            8    210      3
+           11    380      2
+            9    320      1
+           9    270      3
+>
+> colnames(df) <- c("y", "x1", "x2")
+> df
+    y  x1 x2
+   6 220  5
+   5 190  6
+   7 260  3
+   7 200  4
+   8 330  2
+  10 490  4
+   8 210  3
+  11 380  2
+   9 320  1
+  9 270  3
+> # y = 통장갯수
+> # x1 = 인컴
+> # x2 = 부양가족수
+> lm.y.x1 <- lm(y ~ x1, data=df)
+> summary(lm.y.x1)
-resx1 <- lm.x1x2$residuals
+Call:
-regx1 <- lm.x1x2$fitted.values-mean(x1)
+lm(formula = y ~ x1, data = df)
-lm.y.resx1 <- lm(y~resx1, data = d)
-summary(lm.y.resx1) # 0.3189 no sig.
-lm.y.regx1 <- lm(y~regx1, data = d)
-summary(lm.y.regx1) # 0.4793, p = .027;
-# but, this is 100% correlated part with x2
-summary(lm.y.x2)
-cor(regx1,x2) # 1
-cor(resx1,x2) # 0
-resx2 <- lm.x2x1$residuals
+Residuals:
-regx2 <- lm.x2x1$fitted.values-mean(x2)
+   Min     1Q Median     3Q    Max
-lm.y.resx2 <- lm(y~resx2, data = d)
+-1.519 -0.897 -0.130  1.006  1.580
-summary(lm.y.resx2)
-lm.y.regx2 <- lm(y~regx2, data = d)
-summary(lm.y.regx2)
-summary(lm.y.x1)
-cor(regx2,x1) # 1 because of this
+Coefficients:
-cor(resx2,x1) # 0
+            Estimate Std. Error t value Pr(>|t|)
-# the below two are the same
+(Intercept)  3.61778    1.24152    2.91    0.019 *
-summary(lm(y~x1, data = d))
+x1           0.01527    0.00413    3.70    0.006 **
-summary(lm(y~regx2, data = d))
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-summary(lm(y~x2, data = d)) # 0.4793
+Residual standard error: 1.18 on 8 degrees of freedom
-summary(lm(y~x1, data = d)) # 0.6311
+Multiple R-squared:  0.631,	Adjusted R-squared:  0.585
-summary(lm(y~resx1, data = d)) # 0.3189
+F-statistic: 13.7 on 1 and 8 DF,  p-value: 0.00605
-summary(lm(y~x1+x2, data = d))
-summary(lm(y~x1, data = d))$r.square
+> anova(lm.y.x1)
-summary(lm(y~resx1, data = d))$r.square
+Analysis of Variance Table
-summary(lm(y~x1, data = d))$r.square - summary(lm(y~resx1, data = d))$r.square
-sqrt(summary(lm(y~resx1, data = d))$r.square)
+Response: y
-spcor.test(y,x1,x2)
+          Df Sum Sq Mean Sq F value Pr(>F)
+x1         1   18.9   18.93    13.7  0.006 **
+Residuals  8   11.1    1.38
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+> cor(df$x1, df$y)^2
+[1] 0.6311
+> summary(lm.y.x1)$r.squared
+[1] 0.6311
+>
+>
+> lm.y.x2 <- lm(y ~ x2, data=df)
+> summary(lm.y.x2)
-sqrt(summary(lm(y~x1, data = d))$r.square
+Call:
-     - summary(lm(y~resx1, data = d))$r.square)
+lm(formula = y ~ x2, data = df)
-(summary(lm(x1~x2, data =d))$r.square)
-(summary(lm(y~x1, data = d))$r.square - summary(lm(y~resx1, data = d))$r.square)
-a <- summary(lm(y~resx1, data = d))$r.square
+Residuals:
-c <- summary(lm(y~x1, data = d))$r.square
+   Min     1Q Median     3Q    Max
-b <- summary(lm(y~regx1, data = d))$r.square
+-1.254 -0.888 -0.485  0.496  2.592
-summary(lm(y~resx1, data = d))
+Coefficients:
-summary(lm(y~resx2, data = d))
+            Estimate Std. Error t value Pr(>|t|)
-summary(lm(y~x1, data = d))
+(Intercept)   10.791      1.119    9.64  1.1e-05 ***
-summary(lm(y~x2, data = d))
+x2            -0.846      0.312   -2.71    0.027 *
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-cor(regx2, x1)
+Residual standard error: 1.4 on 8 degrees of freedom
-cor(regx1, x2)
+Multiple R-squared:  0.479,	Adjusted R-squared:  0.414
+F-statistic: 7.36 on 1 and 8 DF,  p-value: 0.0265
-ab <- summary(lm(y~x1, data = d))$r.square
+> anova(lm.y.x2)
-a <- summary(lm(y~resx1, data = d))$r.square
+Analysis of Variance Table
-ab
-a
-ab-a
-bc <- summary(lm(y~x2, data = d))$r.square
+Response: y
-c <- summary(lm(y~resx2, data = d))$r.square
+          Df Sum Sq Mean Sq F value Pr(>F)
-bc
+x2         1   14.4   14.38    7.36  0.027 *
-c
+Residuals  8   15.6    1.95
-bc-c
+---
-ab-a
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+> cor(df$x2, df$y)^2
+[1] 0.4793
+> summary(lm.y.x2)$r.squared
+[1] 0.4793
+>
+>
+> lm.y.x1x2 <- lm(y ~ x1+x2, data=df)
+> summary(lm.y.x1x2)
-abc <- summary(lm(y~x1+x2, data = d))$r.square
+Call:
-abc
+lm(formula = y ~ x1 + x2, data = df)
-a
-c
-bc
-a+(bc-c)+c
-a+bc
-ab+c
-summary(lm(y~regx2+regx1, data = d))
+Residuals:
+   Min     1Q Median     3Q    Max
+-1.217 -0.578 -0.151  0.664  1.191
-summary(lm(y~x1+x2, data = d))
+Coefficients:
-summary(lm(y~resx1+resx2, data = d))
+            Estimate Std. Error t value Pr(>|t|)
+(Intercept)  6.39910    1.51654    4.22   0.0039 **
+x1           0.01184    0.00356    3.33   0.0127 *
+x2          -0.54473    0.22636   -2.41   0.0470 *
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-summary(lm(y~resx1, data =d))
+Residual standard error: 0.93 on 7 degrees of freedom
-summary(lm(y~resx2, data =d))
+Multiple R-squared:  0.798,	Adjusted R-squared:  0.74
-cor(resx1,resx2)
+F-statistic: 13.8 on 2 and 7 DF,  p-value: 0.0037
-cor(x1,x2)
-spcor.test(y,resx1,resx2)$estimate^2
+> anova(lm.y.x1x2)
-spcor.test(y,resx2,resx1)$estimate^2
+Analysis of Variance Table
-cor(y,x1)^2
+Response: y
-cor(y,x2)^2
+          Df Sum Sq Mean Sq F value Pr(>F)
+x1         1  18.93   18.93   21.88 0.0023 **
+x2         1   5.01    5.01    5.79 0.0470 *
+Residuals  7   6.06    0.87
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+> bcd <- summary(lm.y.x1x2)$r.squared
+> bcd
+[1] 0.7981
+> bc.cd <- summary(lm.y.x1)$r.squared + summary(lm.y.x2)$r.squared
+> bc.cd
+[1] 1.11
+> bc.cd - bcd  # note that this is the amount that shared by the two IVs
+[1] 0.3123
+>
+>
+> lm.y.x1x2$coefficient
+(Intercept)          x1          x2
+.39910     0.01184    -0.54473
+> # y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2
+> a <- lm.y.x1x2$coefficient[1]
+> b1 <- lm.y.x1x2$coefficient[2]
+> b2 <- lm.y.x1x2$coefficient[3]
+> a
+(Intercept)
+.399
+> b1
+     x1
+.01184
+> b2
+     x2
+-0.5447
+>
+> y.pred <- a + (b1 * df$x1) + (b2 * df$x2)
+> y.pred
+ [1]  6.281  5.381  7.844  6.588  9.217 10.023  7.252  9.809  9.644  7.962
+> # or
+> y.pred2 <- predict(lm.y.x1x2)
+> head(y.pred == y.pred2)
+    2    3    4    5    6
+TRUE TRUE TRUE TRUE TRUE TRUE
+>
+> y.real <- df$y
+> y.real
+ [1]  6  5  7  7  8 10  8 11  9  9
+> y.mean <- mean(df$y)
+> y.mean
+[1] 8
+>
+> deviation.score <- df$y - y.mean
+> ds <- deviation.score
+> res <- y.real - y.pred
+> reg <- y.pred - y.mean
+> y.mean
+[1] 8
+> # remember y is sum of res + reg + y.mean
+> y2 <- res + reg + y.mean
+> df$y==y2
+ [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
+>
+> ss.tot <- sum(ds^2)
+> ss.res <- sum(res^2)
+> ss.reg <- sum(reg^2)
+>
+> ss.tot2 <- var(df$y) * (length(df$y)-1)
+> ss.tot
+[1] 30
+> ss.tot2
+[1] 30
+> ss.res
+[1] 6.056
+> ss.reg
+[1] 23.94
+> ss.res+ss.reg
+[1] 30
+>
+> k <- 3 # # of parameters a, b1, b2
+> df.tot <- length(df$y)-1
+> df.reg <- k - 1
+> df.res <- df.tot - df.reg
+>
+> ms.reg <- ss.reg/df.reg
+> ms.res <- ss.res/df.res
+> ms.reg
+[1] 11.97
+> ms.res
+[1] 0.8652
+> f.val <- ms.reg/ms.res
+> f.val
+[1] 13.84
+> p.val <- pf(f.val, df.reg, df.res, lower.tail = F)
+> p.val
+[1] 0.003696
+>
+> # double check
+> summary(lm.y.x1x2)
-cor(y,resx1)^2
+Call:
-cor(y,resx2)^2
+lm(formula = y ~ x1 + x2, data = df)
-spcor.test(y,x1,x2)$estimate^2
-spcor.test(y,x2,x1)$estimate^2
-##############################################
+Residuals:
+   Min     1Q Median     3Q    Max
+-1.217 -0.578 -0.151  0.664  1.191
-# install.packages("lavann")
+Coefficients:
-library(lavaan)
+            Estimate Std. Error t value Pr(>|t|)
+(Intercept)  6.39910    1.51654    4.22   0.0039 **
+x1           0.01184    0.00356    3.33   0.0127 *
+x2          -0.54473    0.22636   -2.41   0.0470 *
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-specmod <- "
+Residual standard error: 0.93 on 7 degrees of freedom
-    # path c
+Multiple R-squared:  0.798,	Adjusted R-squared:  0.74
-    # identifying path c (prime) by putting c*
+F-statistic: 13.8 on 2 and 7 DF,  p-value: 0.0037
-    y ~ c*x2
-    # path a
+> anova(lm.y.x1x2)
-    x1 ~ a*x2
+Analysis of Variance Table
-    # path b
+Response: y
-    y ~ b*x1
+          Df Sum Sq Mean Sq F value Pr(>F)
+x1         1  18.93   18.93   21.88 0.0023 **
-    # indirect effect (a*b): Sobel test (Delta Method)
+x2         1   5.01    5.01    5.79 0.0470 *
-    # 간접효과 a path x b path 를 구해서 얻음
+Residuals  7   6.06    0.87
-    # sobel test 라 부름
+---
-    ab := a*b
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-"
+>
-# Fit/estimate the model
+> summary(lm(y~x2+x1, data=df))
-fitmod <- sem(specmod, data=d)
-# summarize the model
-summary(fitmod, fit.measures=TRUE, rsquare=T)
-summary(lm(x2~x1, data = d))
+Call:
-summary(lm(y~resx2, data = d))
+lm(formula = y ~ x2 + x1, data = df)
-summary(lm(y~x1, data = d))
-summary(lm(y~x2, data =d))
-#############################################
-# e.g. 2
-#############################################
-d.yyk <- read.csv("http://commres.net/wiki/_media/d.yyk.csv")
-d.yyk
-d.yyk <- subset(d.yyk, select = -c(1))
-d.yyk
-attach(d.yyk)
+Residuals:
-lm.happiness.bmistress <- lm(happiness ~ bmi + stress, data=d.yyk)
+   Min     1Q Median     3Q    Max
-summary(lm.happiness.bmistress)
+-1.217 -0.578 -0.151  0.664  1.191
-anova(lm.happiness.bmistress)
-plot(d.yyk)
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)
+(Intercept)  6.39910    1.51654    4.22   0.0039 **
+x2          -0.54473    0.22636   -2.41   0.0470 *
+x1           0.01184    0.00356    3.33   0.0127 *
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-lm.happiness.bmi <- lm(happiness ~ bmi, data=d.yyk)
+Residual standard error: 0.93 on 7 degrees of freedom
-summary(lm.happiness.bmi)
+Multiple R-squared:  0.798,	Adjusted R-squared:  0.74
-anova(lm.happiness.bmi)
+F-statistic: 13.8 on 2 and 7 DF,  p-value: 0.0037
-lm.happiness.stress <- lm(happiness ~ stress, data = d.yyk)
+> anova(lm(y~x2+x1, data=df))
-summary(lm.happiness.stress)
+Analysis of Variance Table
-anova(lm.happiness.stress)
-# 공통부분을 찾기
+Response: y
-# methd. 1
+          Df Sum Sq Mean Sq F value Pr(>F)
-ab <- summary(lm.happiness.bmi)$r.square
+x2         1  14.38   14.38    16.6 0.0047 **
-bc <- summary(lm.happiness.stress)$r.square
+x1         1   9.57    9.57    11.1 0.0127 *
-abc <- summary(lm.happiness.bmistress)$r.square
+Residuals  7   6.06    0.87
-b <- ab+bc-abc
+---
-b
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-# spcor
+>
-# spcor method
+> # note on 2 t-tests in summary
-spcor.test(happiness,bmi,stress)
+> # anova에서의 x1, x2에 대한 테스트와
-a <- spcor.test(happiness,bmi,stress)$estimate^2
+> # lm에서의 x1, x2에 대한 테스트 (t-test) 간에
-spcor.test(happiness,stress,bmi)
+> # 차이가 있음에 주의 (x1, x2에 대한 Pr 값이
-c <- spcor.test(happiness,stress,bmi)$estimate^2
+> # 다름). 그 이유는
-abc <- summary(lm.happiness.bmistress)$r.square
+> # t-tests는 __pr__ 테스트로 테스트를
-abc-a-c
+> # (spr, zero_order_r 테스트가 아님) 하고
+> # anova test는 x1 전체에 대한 테스트 하고
+> # x2는 x1에 대한 테스트 외에 나머지를 가지고
+> # 테스트하기 때문에 그러함
+>
+> # 또한 anova test에서 두번째 IV의 F값은
+> # summary(lm)에서 두번 째 IV의 t값의 제곱값
+> # 임을 이해. 이는 두번 째 IV의 설명력을 나
+> # 타내는 부분이 lm과 anova 모두 같기 때문
+> # 반면에 첫번째 IV의 경우에는 lm 분석 때에는
+> # 고유의 설명력만을 가지고 (semi-partial cor^2)
+> # 판단을 하는 반면에, anova는 x2와 공유하는
+> # 설명력도 포함해서 분석하기 때문에 t값의
+> # 제곱이 F값이 되지 못함
+>
+> # beta에 대한 설명
+> # beta coefficient (standardized b)
+> # beta <- b * (sd(x)/sd(y))
+> beta1 <- b1 * (sd(df$x1)/sd(df$y))
+> beta2 <- b2 * (sd(df$x2)/sd(df$y))
+> beta1
+    x1
+.6161
+> beta2
+     x2
+-0.4459
+>
+> # install.packages("lm.beta")
+> library(lm.beta)
+> lm.beta(lm.y.x1x2)
-# tidious one
+Call:
-# omitt
+lm(formula = y ~ x1 + x2, data = df)
-# bmi --> stress --> happiness
+Standardized Coefficients::
-summary(lm.happiness.bmi)
+(Intercept)          x1          x2
-summary(lm.happiness.stress)
+         NA      0.6161     -0.4459
-summary(lm.happiness.bmistress)
-lm.bmi.stress <- lm(bmi~stress, data = d.yyk)
+>
-res.lm.bmi.stress <- lm.bmi.stress$residuals
+> #######################################################
-lm.happiness.res.lm.bmi.stress <- lm(happiness~res.lm.bmi.stress)
+> # partial correlation coefficient and pr2
-summary(lm.happiness.res.lm.bmi.stress)
+> # x2's explanation?
+> # understand with diagrams first
+> # then calculate with r
+> lm.tmp.1 <- lm(x2~x1, data=df)
+> res.x2.x1 <- lm.tmp.1$residuals
+>
+> lm.tmp.2 <- lm(y~x1, data=df)
+> res.y.x1 <- lm.tmp.2$residuals
+>
+> lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df)
+> summary(lm.tmp.3)
-lm.stress.bmi <- lm(stress~bmi, data = d.yyk)
+Call:
-reg.lm.stress.bmi <- lm.stress.bmi$fitted.values - mean(stress)
+lm(formula = res.y.x1 ~ res.x2.x1, data = df)
-lm.happiness.reg.lm.stress.bmi <- lm(happiness~reg.lm.stress.bmi)
-summary(lm.happiness.reg.lm.stress.bmi)
-summary(lm.happiness.stress)
-a <- summary(lm.happiness.res.lm.bmi.stress)$r.square
+Residuals:
-b <- summary(lm.happiness.reg.lm.stress.bmi)$r.square
+   Min     1Q Median     3Q    Max
-c <- summary(lm.happiness.stress)$r.square
+-1.217 -0.578 -0.151  0.664  1.191
-a
-b
-c
-a+b
-lm.stress.bmi <- lm(stress~bmi, data = d.yyk)
+Coefficients:
-lm.bmi.stress <- lm(bmi~stress, data = d.yyk)
+             Estimate Std. Error t value Pr(>|t|)
-summary(lm.stress.bmi)
+(Intercept)  6.28e-18   2.75e-01    0.00    1.000
-# stress.hat = a + b * bmi
+res.x2.x1   -5.45e-01   2.12e-01   -2.57    0.033 *
-stress.hat <- -1.40167 + (0.16787 * bmi)
+---
-stress.hat
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-reg.stress
-summary(lm.bmi.stress)
+Residual standard error: 0.87 on 8 degrees of freedom
-reg.bmi
+Multiple R-squared:  0.453,	Adjusted R-squared:  0.384
+F-statistic: 6.62 on 1 and 8 DF,  p-value: 0.033
+> summary(lm.tmp.3)$r.squared
+[1] 0.4527
+> sqrt(summary(lm.tmp.3)$r.squared)
+[1] 0.6729
+> # install.packages("ppcor")
+> library(ppcor)
+> partial.r <- pcor.test(df$y, df$x2, df$x1)
+> str(partial.r)
+'data.frame':	1 obs. of  6 variables:
+ $ estimate : num -0.673
+ $ p.value  : num 0.047
+ $ statistic: num -2.41
+ $ n        : int 10
+ $ gp       : num 1
+ $ Method   : chr "pearson"
+> partial.r$estimate
+[1] -0.6729
+> summary(lm.tmp.3)
+Call:
+lm(formula = res.y.x1 ~ res.x2.x1, data = df)
-reg.stress <- lm.stress.bmi$fitted.values
+Residuals:
-reg.bmi <- lm.bmi.stress$fitted.values
+   Min     1Q Median     3Q    Max
+-1.217 -0.578 -0.151  0.664  1.191
-reg.stress
+Coefficients:
-reg.bmi
+             Estimate Std. Error t value Pr(>|t|)
-d.yyk
+(Intercept)  6.28e-18   2.75e-01    0.00    1.000
+res.x2.x1   -5.45e-01   2.12e-01   -2.57    0.033 *
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-lm.tmp <- lm(reg.stress~reg.bmi)
+Residual standard error: 0.87 on 8 degrees of freedom
-lm.tmp2 <- lm(reg.bmi~reg.stress)
+Multiple R-squared:  0.453,	Adjusted R-squared:  0.384
-summary(lm.tmp)
+F-statistic: 6.62 on 1 and 8 DF,  p-value: 0.033
-summary(lm.tmp2)
-sqrt(summary(lm.tmp)$r.square)
+> summary(lm.tmp.3)$r.square
-summary(lm.stress.bmi)
+[1] 0.4527
-summary(lm.bmi.stress)
+> partial.r$estimate^2
+[1] 0.4527
+>
+>
+> # x1's own explanation?
+> lm.tmp.4 <- lm(x1~x2, data=df)
+> res.x1.x2 <- lm.tmp.4$residuals
+>
+> lm.tmp.5 <- lm(y~x2, data=df)
+> res.y.x2 <- lm.tmp.5$residuals
+>
+> lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df)
+> summary(lm.tmp.6)
-summary(lm(happiness~stress, data = d.yyk))
+Call:
-summary(lm(happiness~reg.bmi, data = d.yyk))
+lm(formula = res.y.x2 ~ res.x1.x2, data = df)
-summary(lm(happiness~bmi, data=d.yyk))
+Residuals:
-summary(lm(happiness~reg.stress, data=d.yyk))
+   Min     1Q Median     3Q    Max
+-1.217 -0.578 -0.151  0.664  1.191
-plot(tmp.a, tmp.b)
+Coefficients:
-plot(tmp.b, tmp.a)
+            Estimate Std. Error t value Pr(>|t|)
+(Intercept) 1.33e-17   2.75e-01    0.00   1.0000
+res.x1.x2   1.18e-02   3.33e-03    3.55   0.0075 **
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-head(d.yyk)
+Residual standard error: 0.87 on 8 degrees of freedom
-str(d.yyk)
+Multiple R-squared:  0.612,	Adjusted R-squared:  0.564
-d.yyk
+F-statistic: 12.6 on 1 and 8 DF,  p-value: 0.00746
-#########################
+>
+> partial.r <- pcor.test(df$y, df$x1, df$x2)
+> str(partial.r)
+'data.frame':	1 obs. of  6 variables:
+ $ estimate : num 0.783
+ $ p.value  : num 0.0127
+ $ statistic: num 3.33
+ $ n        : int 10
+ $ gp       : num 1
+ $ Method   : chr "pearson"
+> partial.r$estimate # this is partial correlation, not pr^2
+[1] 0.7825
+> # in order to get pr2, you should ^2
+> partial.r$estimate^2
+[1] 0.6123
+> summary(lm.tmp.6)$r.squared
+[1] 0.6123
+>
+> #######################################################
+> # semipartial correlation coefficient and spr2
+> #
+> spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1)
+> spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2)
+> spr.y.x2.x1
+  estimate p.value statistic  n gp  Method
+  -0.4087  0.2748    -1.185 10  1 pearson
+> spr.y.x1.x2
+  estimate p.value statistic  n gp  Method
+   0.5647  0.1132      1.81 10  1 pearson
+> spr2.y.x2.x1 <- spr.y.x2.x1$estimate^2
+> spr2.y.x1.x2 <- spr.y.x1.x2$estimate^2
+> spr2.y.x2.x1
+[1] 0.167
+> spr2.y.x1.x2
+[1] 0.3189
+>
+> lm.tmp.7 <- lm(y ~ res.x2.x1, data=df)
+> summary(lm.tmp.7)
+Call:
+lm(formula = y ~ res.x2.x1, data = df)
+Residuals:
+   Min     1Q Median     3Q    Max
+-1.862 -1.171 -0.494  0.549  3.077
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)
+(Intercept)    8.000      0.559   14.31  5.5e-07 ***
+res.x2.x1     -0.545      0.430   -1.27     0.24
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 1.77 on 8 degrees of freedom
+Multiple R-squared:  0.167,	Adjusted R-squared:  0.0629
+F-statistic:  1.6 on 1 and 8 DF,  p-value: 0.241
+> spr2.y.x2.x1
+[1] 0.167
+>
+> lm.tmp.8 <- lm(y~res.x1.x2, data=df)
+> summary(lm.tmp.8)
+Call:
+lm(formula = y ~ res.x1.x2, data = df)
+Residuals:
+   Min     1Q Median     3Q    Max
+-2.664 -0.608 -0.149  1.219  2.290
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)
+(Intercept)  8.00000    0.50540   15.83  2.5e-07 ***
+res.x1.x2    0.01184    0.00612    1.94    0.089 .
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 1.6 on 8 degrees of freedom
+Multiple R-squared:  0.319,	Adjusted R-squared:  0.234
+F-statistic: 3.74 on 1 and 8 DF,  p-value: 0.089
+> spr2.y.x1.x2
+[1] 0.3189
+>
+> bcd # remember bcd in the above?
+[1] 0.7981
+> bd <- spr2.y.x2.x1 + spr2.y.x1.x2
+> bd
+[1] 0.4859
+> bcd - bd
+[1] 0.3123
+>
+> #######################################################
+> # get the common area that explain the y variable
+> # 1.
+> summary(lm.y.x2)
+Call:
+lm(formula = y ~ x2, data = df)
+Residuals:
+   Min     1Q Median     3Q    Max
+-1.254 -0.888 -0.485  0.496  2.592
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)
+(Intercept)   10.791      1.119    9.64  1.1e-05 ***
+x2            -0.846      0.312   -2.71    0.027 *
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 1.4 on 8 degrees of freedom
+Multiple R-squared:  0.479,	Adjusted R-squared:  0.414
+F-statistic: 7.36 on 1 and 8 DF,  p-value: 0.0265
+> all.x2 <- summary(lm.y.x2)$r.squared
+> all.x2
+[1] 0.4793
+> spr2.y.x2.x1
+[1] 0.167
+> cma.1 <- all.x2 - spr2.y.x2.x1
+> cma.1
+[1] 0.3123
+>
+> # 2.
+> summary(lm.y.x1)
+Call:
+lm(formula = y ~ x1, data = df)
+Residuals:
+   Min     1Q Median     3Q    Max
+-1.519 -0.897 -0.130  1.006  1.580
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)
+(Intercept)  3.61778    1.24152    2.91    0.019 *
+x1           0.01527    0.00413    3.70    0.006 **
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 1.18 on 8 degrees of freedom
+Multiple R-squared:  0.631,	Adjusted R-squared:  0.585
+F-statistic: 13.7 on 1 and 8 DF,  p-value: 0.00605
+> all.x1 <- summary(lm.y.x1)$r.squared
+> all.x1
+[1] 0.6311
+> spr2.y.x1.x2
+[1] 0.3189
+> cma.2 <- all.x1 - spr2.y.x1.x2
+> cma.2
+[1] 0.3123
+>
+> # OR 3.
+> summary(lm.y.x1x2)
+Call:
+lm(formula = y ~ x1 + x2, data = df)
+Residuals:
+   Min     1Q Median     3Q    Max
+-1.217 -0.578 -0.151  0.664  1.191
+Coefficients:
+            Estimate Std. Error t value Pr(>|t|)
+(Intercept)  6.39910    1.51654    4.22   0.0039 **
+x1           0.01184    0.00356    3.33   0.0127 *
+x2          -0.54473    0.22636   -2.41   0.0470 *
+---
+Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Residual standard error: 0.93 on 7 degrees of freedom
+Multiple R-squared:  0.798,	Adjusted R-squared:  0.74
+F-statistic: 13.8 on 2 and 7 DF,  p-value: 0.0037
+> r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
+> r2.y.x1x2
+[1] 0.7981
+> spr2.y.x1.x2
+[1] 0.3189
+> spr2.y.x2.x1
+[1] 0.167
+> cma.3 <- r2.y.x1x2 - (spr2.y.x1.x2 + spr2.y.x2.x1)
+> bcd - bd
+[1] 0.3123
+> cma.3
+[1] 0.3123
+>
+> cma.1
+[1] 0.3123
+> cma.2
+[1] 0.3123
+> cma.3
+[1] 0.3123
+>
+> # OR 애초에 simple regression과 multiple
+> # regression에서 얻은 R2을 가지고
+> # 공통설명력을 알아볼 수도 있었다.
+> r2.y.x1 <- summary(lm.y.x1)$r.square
+> r2.y.x2 <- summary(lm.y.x2)$r.square
+> r2.y.x1
+[1] 0.6311
+> r2.y.x2
+[1] 0.4793
+> r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
+> r2.y.x1x2
+[1] 0.7981
+> cma.4 <- r2.y.x1 + r2.y.x2 - r2.y.x1x2
+> cma.4
+[1] 0.3123
+>
+> # Note that sorting out unique and common
+> # explanation area is only possible with
+> # semi-partial correlation determinant
+> # NOT partial correlation determinant
+> # because only semi-partial correlation
+> # shares the same denominator (as total
+> # y).
+> #############################################
+>
+>
 </code>
-====== output ======
+====== explanation. added ======
+{{:c:ms:2025:schedule:pasted:20250609-074413.png?400}}
 <code>
+# ex.
+# resid(lm(y~x1, data=df)) = bc / delta.y
+# resid(lm(y~x2, data=df)) = cd / delta.y
+# resid(lm(y~x1+x2, data=df)) = bcd / delta.y
+# b / delta.y = ?
+# ce / delta.x2 = ?
 </code>
+<code>
+# exp.added
+spcor.test(df$y, df$x1, df$x2)
+spcor.test(df$y, df$x1, df$x2)$estimate
+spcor.test(df$y, df$x1, df$x2)$estimate^2
+spcor.test(df$y, df$x2, df$x1)$estimate^2
+summary(lm(y~x1+x2, data=df))$r.square
+b <- spcor.test(df$y, df$x1, df$x2)$estimate^2
+d <- spcor.test(df$y, df$x2, df$x1)$estimate^2
+bcd <- summary(lm(y~x1+x2, data=df))$r.square
+summary(lm(df$y~df$x1+df$x2, data=df))$r.square -
+  (spcor.test(df$y, df$x1, df$x2)$estimate^2 +
+     spcor.test(df$y, df$x2, df$x1)$estimate^2)
+bcd - (b + d)
+</code>
+====== Another one ======
+[[:partial and semipartial correlation]]