User Tools

Site Tools


c:ms:2025:schedule:w13.lecture.note

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Next revision
Previous revision
c:ms:2025:schedule:w13.lecture.note [2025/06/02 09:07] – created hkimscilc:ms:2025:schedule:w13.lecture.note [2025/06/09 08:51] (current) – [output] hkimscil
Line 1: Line 1:
 ====== MR ====== ====== MR ======
 <code> <code>
 +# multiple regression: a simple e.g.
 +#
 +#
 +rm(list=ls())
 +df <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv"
 +df
 +
 +colnames(df) <- c("y", "x1", "x2")
 +df
 +# y = 통장갯수
 +# x1 = 인컴
 +# x2 = 부양가족수 
 +lm.y.x1 <- lm(y ~ x1, data=df)
 +summary(lm.y.x1)
 +anova(lm.y.x1)
 +cor(df$x1, df$y)^2
 +summary(lm.y.x1)$r.squared
 +
 +
 +lm.y.x2 <- lm(y ~ x2, data=df)
 +summary(lm.y.x2)
 +anova(lm.y.x2)
 +cor(df$x2, df$y)^2
 +summary(lm.y.x2)$r.squared
 +
 +
 +lm.y.x1x2 <- lm(y ~ x1+x2, data=df)
 +summary(lm.y.x1x2)
 +anova(lm.y.x1x2)
 +bcd <- summary(lm.y.x1x2)$r.squared
 +bcd
 +bc.cd <- summary(lm.y.x1)$r.squared + summary(lm.y.x2)$r.squared
 +bc.cd
 +bc.cd - bcd  # note that this is the amount that shared by the two IVs
 +
 +
 +lm.y.x1x2$coefficient
 +# y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2 
 +a <- lm.y.x1x2$coefficient[1]
 +b1 <- lm.y.x1x2$coefficient[2]
 +b2 <- lm.y.x1x2$coefficient[3]
 +a
 +b1
 +b2 
 +
 +y.pred <- a + (b1 * df$x1) + (b2 * df$x2)
 +y.pred 
 +# or 
 +y.pred2 <- predict(lm.y.x1x2)
 +head(y.pred == y.pred2)
 +
 +y.real <- df$y
 +y.real
 +y.mean <- mean(df$y)
 +y.mean 
 +
 +deviation.score <- df$y - y.mean
 +ds <- deviation.score
 +res <- y.real - y.pred
 +reg <- y.pred - y.mean
 +y.mean 
 +# remember y is sum of res + reg + y.mean
 +y2 <- res + reg + y.mean
 +df$y==y2
 +
 +ss.tot <- sum(ds^2)
 +ss.res <- sum(res^2)
 +ss.reg <- sum(reg^2)
 +
 +ss.tot2 <- var(df$y) * (length(df$y)-1)
 +ss.tot
 +ss.tot2
 +ss.res
 +ss.reg
 +ss.res+ss.reg
 +
 +k <- 3 # # of parameters a, b1, b2
 +df.tot <- length(df$y)-1
 +df.reg <- k - 1
 +df.res <- df.tot - df.reg
 +
 +ms.reg <- ss.reg/df.reg
 +ms.res <- ss.res/df.res
 +ms.reg
 +ms.res
 +f.val <- ms.reg/ms.res
 +f.val
 +p.val <- pf(f.val, df.reg, df.res, lower.tail = F)
 +p.val
 +
 +# double check 
 +summary(lm.y.x1x2)
 +anova(lm.y.x1x2)
 +
 +summary(lm(y~x2+x1, data=df))
 +anova(lm(y~x2+x1, data=df))
 +
 +# note on 2 t-tests in summary
 +# anova에서의 x1, x2에 대한 테스트와 
 +# lm에서의 x1, x2에 대한 테스트 (t-test) 간에
 +# 차이가 있음에 주의 (x1, x2에 대한 Pr 값이 
 +# 다름). 그 이유는 
 +# t-tests는 __pr__ 테스트로 테스트를 
 +# (spr, zero_order_r 테스트가 아님) 하고
 +# anova test는 x1 전체에 대한 테스트 하고
 +# x2는 x1에 대한 테스트 외에 나머지를 가지고
 +# 테스트하기 때문에 그러함
 +
 +# 또한 anova test에서 두번째 IV의 F값은 
 +# summary(lm)에서 두번 째 IV의 t값의 제곱값
 +# 임을 이해. 이는 두번 째 IV의 설명력을 나
 +# 타내는 부분이 lm과 anova 모두 같기 때문
 +# 반면에 첫번째 IV의 경우에는 lm 분석 때에는
 +# 고유의 설명력만을 가지고 (semi-partial cor^2)
 +# 판단을 하는 반면에, anova는 x2와 공유하는 
 +# 설명력도 포함해서 분석하기 때문에 t값의
 +# 제곱이 F값이 되지 못함
 +
 +# beta에 대한 설명
 +# beta coefficient (standardized b)
 +# beta <- b * (sd(x)/sd(y))
 +beta1 <- b1 * (sd(df$x1)/sd(df$y))
 +beta2 <- b2 * (sd(df$x2)/sd(df$y))
 +beta1
 +beta2
 +
 +# install.packages("lm.beta")
 +library(lm.beta)
 +lm.beta(lm.y.x1x2)
 +
 +#######################################################
 +# partial correlation coefficient and pr2
 +# x2's explanation? 
 +# understand with diagrams first
 +# then calculate with r
 +lm.tmp.1 <- lm(x2~x1, data=df)
 +res.x2.x1 <- lm.tmp.1$residuals
 +
 +lm.tmp.2 <- lm(y~x1, data=df)
 +res.y.x1 <- lm.tmp.2$residuals
 +
 +lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df)
 +summary(lm.tmp.3)
 +summary(lm.tmp.3)$r.squared
 +sqrt(summary(lm.tmp.3)$r.squared)
 +# install.packages("ppcor")
 +library(ppcor)
 +partial.r <- pcor.test(df$y, df$x2, df$x1)
 +str(partial.r)
 +partial.r$estimate
 +summary(lm.tmp.3)
 +summary(lm.tmp.3)$r.square
 +partial.r$estimate^2
 +
 +
 +# x1's own explanation?
 +lm.tmp.4 <- lm(x1~x2, data=df)
 +res.x1.x2 <- lm.tmp.4$residuals
 +
 +lm.tmp.5 <- lm(y~x2, data=df)
 +res.y.x2 <- lm.tmp.5$residuals
 +
 +lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df)
 +summary(lm.tmp.6)
 +
 +partial.r <- pcor.test(df$y, df$x1, df$x2)
 +str(partial.r)
 +partial.r$estimate # this is partial correlation, not pr^2
 +# in order to get pr2, you should ^2
 +partial.r$estimate^2
 +summary(lm.tmp.6)$r.squared
 +
 +#######################################################
 +# semipartial correlation coefficient and spr2
 +#
 +spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1)
 +spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2)
 +spr.y.x2.x1
 +spr.y.x1.x2
 +spr2.y.x2.x1 <- spr.y.x2.x1$estimate^2
 +spr2.y.x1.x2 <- spr.y.x1.x2$estimate^2
 +spr2.y.x2.x1 
 +spr2.y.x1.x2 
 +
 +lm.tmp.7 <- lm(y ~ res.x2.x1, data=df)
 +summary(lm.tmp.7)
 +spr2.y.x2.x1 
 +
 +lm.tmp.8 <- lm(y~res.x1.x2, data=df)
 +summary(lm.tmp.8)
 +spr2.y.x1.x2 
 +
 +bcd # remember bcd in the above?
 +bd <- spr2.y.x2.x1 + spr2.y.x1.x2 
 +bd
 +bcd - bd
 +
 +#######################################################
 +# get the common area that explain the y variable
 +# 1.
 +summary(lm.y.x2)
 +all.x2 <- summary(lm.y.x2)$r.squared
 +all.x2
 +spr2.y.x2.x1
 +cma.1 <- all.x2 - spr2.y.x2.x1
 +cma.1
 +
 +# 2.
 +summary(lm.y.x1)
 +all.x1 <- summary(lm.y.x1)$r.squared
 +all.x1
 +spr2.y.x1.x2
 +cma.2 <- all.x1 - spr2.y.x1.x2
 +cma.2
 +
 +# OR 3.
 +summary(lm.y.x1x2)
 +r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
 +r2.y.x1x2
 +spr2.y.x1.x2
 +spr2.y.x2.x1
 +cma.3 <- r2.y.x1x2 - (spr2.y.x1.x2 + spr2.y.x2.x1)
 +bcd - bd
 +cma.3
 +
 +cma.1 
 +cma.2
 +cma.3
 +
 +# OR 애초에 simple regression과 multiple 
 +# regression에서 얻은 R2을 가지고 
 +# 공통설명력을 알아볼 수도 있었다.
 +r2.y.x1 <- summary(lm.y.x1)$r.square
 +r2.y.x2 <- summary(lm.y.x2)$r.square
 +r2.y.x1
 +r2.y.x2
 +r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
 +r2.y.x1x2
 +cma.4 <- r2.y.x1 + r2.y.x2 - r2.y.x1x2
 +cma.4
 +
 +# Note that sorting out unique and common
 +# explanation area is only possible with 
 +# semi-partial correlation determinant
 +# NOT partial correlation determinant
 +# because only semi-partial correlation
 +# shares the same denominator (as total 
 +# y).
 +#############################################
 +
 +
 </code> </code>
 ====== output ====== ====== output ======
 <code> <code>
 +> # multiple regression: a simple e.g.
 +> #
 +> #
 +> rm(list=ls())
 +> df <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv"
 +> df
 +   bankaccount income famnum
 +1            6    220      5
 +2            5    190      6
 +3            7    260      3
 +4            7    200      4
 +5            8    330      2
 +6           10    490      4
 +7            8    210      3
 +8           11    380      2
 +9            9    320      1
 +10              270      3
 +
 +> colnames(df) <- c("y", "x1", "x2")
 +> df
 +    y  x1 x2
 +1   6 220  5
 +2   5 190  6
 +3   7 260  3
 +4   7 200  4
 +5   8 330  2
 +6  10 490  4
 +7   8 210  3
 +8  11 380  2
 +9   9 320  1
 +10  9 270  3
 +> # y = 통장갯수
 +> # x1 = 인컴
 +> # x2 = 부양가족수 
 +> lm.y.x1 <- lm(y ~ x1, data=df)
 +> summary(lm.y.x1)
 +
 +Call:
 +lm(formula = y ~ x1, data = df)
 +
 +Residuals:
 +   Min     1Q Median     3Q    Max 
 +-1.519 -0.897 -0.130  1.006  1.580 
 +
 +Coefficients:
 +            Estimate Std. Error t value Pr(>|t|)   
 +(Intercept)  3.61778    1.24152    2.91    0.019 * 
 +x1           0.01527    0.00413    3.70    0.006 **
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 1.18 on 8 degrees of freedom
 +Multiple R-squared:  0.631, Adjusted R-squared:  0.585 
 +F-statistic: 13.7 on 1 and 8 DF,  p-value: 0.00605
 +
 +> anova(lm.y.x1)
 +Analysis of Variance Table
 +
 +Response: y
 +          Df Sum Sq Mean Sq F value Pr(>F)   
 +x1           18.9   18.93    13.7  0.006 **
 +Residuals  8   11.1    1.38                  
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +> cor(df$x1, df$y)^2
 +[1] 0.6311
 +> summary(lm.y.x1)$r.squared
 +[1] 0.6311
 +
 +
 +> lm.y.x2 <- lm(y ~ x2, data=df)
 +> summary(lm.y.x2)
 +
 +Call:
 +lm(formula = y ~ x2, data = df)
 +
 +Residuals:
 +   Min     1Q Median     3Q    Max 
 +-1.254 -0.888 -0.485  0.496  2.592 
 +
 +Coefficients:
 +            Estimate Std. Error t value Pr(>|t|)    
 +(Intercept)   10.791      1.119    9.64  1.1e-05 ***
 +x2            -0.846      0.312   -2.71    0.027 *  
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 1.4 on 8 degrees of freedom
 +Multiple R-squared:  0.479, Adjusted R-squared:  0.414 
 +F-statistic: 7.36 on 1 and 8 DF,  p-value: 0.0265
 +
 +> anova(lm.y.x2)
 +Analysis of Variance Table
 +
 +Response: y
 +          Df Sum Sq Mean Sq F value Pr(>F)  
 +x2           14.4   14.38    7.36  0.027 *
 +Residuals  8   15.6    1.95                 
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +> cor(df$x2, df$y)^2
 +[1] 0.4793
 +> summary(lm.y.x2)$r.squared
 +[1] 0.4793
 +
 +
 +> lm.y.x1x2 <- lm(y ~ x1+x2, data=df)
 +> summary(lm.y.x1x2)
 +
 +Call:
 +lm(formula = y ~ x1 + x2, data = df)
 +
 +Residuals:
 +   Min     1Q Median     3Q    Max 
 +-1.217 -0.578 -0.151  0.664  1.191 
 +
 +Coefficients:
 +            Estimate Std. Error t value Pr(>|t|)   
 +(Intercept)  6.39910    1.51654    4.22   0.0039 **
 +x1           0.01184    0.00356    3.33   0.0127 * 
 +x2          -0.54473    0.22636   -2.41   0.0470 * 
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 0.93 on 7 degrees of freedom
 +Multiple R-squared:  0.798, Adjusted R-squared:  0.74 
 +F-statistic: 13.8 on 2 and 7 DF,  p-value: 0.0037
 +
 +> anova(lm.y.x1x2)
 +Analysis of Variance Table
 +
 +Response: y
 +          Df Sum Sq Mean Sq F value Pr(>F)   
 +x1          18.93   18.93   21.88 0.0023 **
 +x2           5.01    5.01    5.79 0.0470 * 
 +Residuals  7   6.06    0.87                  
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +> bcd <- summary(lm.y.x1x2)$r.squared
 +> bcd
 +[1] 0.7981
 +> bc.cd <- summary(lm.y.x1)$r.squared + summary(lm.y.x2)$r.squared
 +> bc.cd
 +[1] 1.11
 +> bc.cd - bcd  # note that this is the amount that shared by the two IVs
 +[1] 0.3123
 +
 +
 +> lm.y.x1x2$coefficient
 +(Intercept)          x1          x2 
 +    6.39910     0.01184    -0.54473 
 +> # y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2 
 +> a <- lm.y.x1x2$coefficient[1]
 +> b1 <- lm.y.x1x2$coefficient[2]
 +> b2 <- lm.y.x1x2$coefficient[3]
 +> a
 +(Intercept) 
 +      6.399 
 +> b1
 +     x1 
 +0.01184 
 +> b2 
 +     x2 
 +-0.5447 
 +
 +> y.pred <- a + (b1 * df$x1) + (b2 * df$x2)
 +> y.pred 
 + [1]  6.281  5.381  7.844  6.588  9.217 10.023  7.252  9.809  9.644  7.962
 +> # or 
 +> y.pred2 <- predict(lm.y.x1x2)
 +> head(y.pred == y.pred2)
 +      2    3    4    5    6 
 +TRUE TRUE TRUE TRUE TRUE TRUE 
 +
 +> y.real <- df$y
 +> y.real
 + [1]  6  5  7  7  8 10  8 11  9  9
 +> y.mean <- mean(df$y)
 +> y.mean 
 +[1] 8
 +
 +> deviation.score <- df$y - y.mean
 +> ds <- deviation.score
 +> res <- y.real - y.pred
 +> reg <- y.pred - y.mean
 +> y.mean 
 +[1] 8
 +> # remember y is sum of res + reg + y.mean
 +> y2 <- res + reg + y.mean
 +> df$y==y2
 + [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 +
 +> ss.tot <- sum(ds^2)
 +> ss.res <- sum(res^2)
 +> ss.reg <- sum(reg^2)
 +
 +> ss.tot2 <- var(df$y) * (length(df$y)-1)
 +> ss.tot
 +[1] 30
 +> ss.tot2
 +[1] 30
 +> ss.res
 +[1] 6.056
 +> ss.reg
 +[1] 23.94
 +> ss.res+ss.reg
 +[1] 30
 +
 +> k <- 3 # # of parameters a, b1, b2
 +> df.tot <- length(df$y)-1
 +> df.reg <- k - 1
 +> df.res <- df.tot - df.reg
 +
 +> ms.reg <- ss.reg/df.reg
 +> ms.res <- ss.res/df.res
 +> ms.reg
 +[1] 11.97
 +> ms.res
 +[1] 0.8652
 +> f.val <- ms.reg/ms.res
 +> f.val
 +[1] 13.84
 +> p.val <- pf(f.val, df.reg, df.res, lower.tail = F)
 +> p.val
 +[1] 0.003696
 +
 +> # double check 
 +> summary(lm.y.x1x2)
 +
 +Call:
 +lm(formula = y ~ x1 + x2, data = df)
 +
 +Residuals:
 +   Min     1Q Median     3Q    Max 
 +-1.217 -0.578 -0.151  0.664  1.191 
 +
 +Coefficients:
 +            Estimate Std. Error t value Pr(>|t|)   
 +(Intercept)  6.39910    1.51654    4.22   0.0039 **
 +x1           0.01184    0.00356    3.33   0.0127 * 
 +x2          -0.54473    0.22636   -2.41   0.0470 * 
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 0.93 on 7 degrees of freedom
 +Multiple R-squared:  0.798, Adjusted R-squared:  0.74 
 +F-statistic: 13.8 on 2 and 7 DF,  p-value: 0.0037
 +
 +> anova(lm.y.x1x2)
 +Analysis of Variance Table
 +
 +Response: y
 +          Df Sum Sq Mean Sq F value Pr(>F)   
 +x1          18.93   18.93   21.88 0.0023 **
 +x2           5.01    5.01    5.79 0.0470 * 
 +Residuals  7   6.06    0.87                  
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +> summary(lm(y~x2+x1, data=df))
 +
 +Call:
 +lm(formula = y ~ x2 + x1, data = df)
 +
 +Residuals:
 +   Min     1Q Median     3Q    Max 
 +-1.217 -0.578 -0.151  0.664  1.191 
 +
 +Coefficients:
 +            Estimate Std. Error t value Pr(>|t|)   
 +(Intercept)  6.39910    1.51654    4.22   0.0039 **
 +x2          -0.54473    0.22636   -2.41   0.0470 * 
 +x1           0.01184    0.00356    3.33   0.0127 * 
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 0.93 on 7 degrees of freedom
 +Multiple R-squared:  0.798, Adjusted R-squared:  0.74 
 +F-statistic: 13.8 on 2 and 7 DF,  p-value: 0.0037
 +
 +> anova(lm(y~x2+x1, data=df))
 +Analysis of Variance Table
 +
 +Response: y
 +          Df Sum Sq Mean Sq F value Pr(>F)   
 +x2          14.38   14.38    16.6 0.0047 **
 +x1           9.57    9.57    11.1 0.0127 * 
 +Residuals  7   6.06    0.87                  
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +> # note on 2 t-tests in summary
 +> # anova에서의 x1, x2에 대한 테스트와 
 +> # lm에서의 x1, x2에 대한 테스트 (t-test) 간에
 +> # 차이가 있음에 주의 (x1, x2에 대한 Pr 값이 
 +> # 다름). 그 이유는 
 +> # t-tests는 __pr__ 테스트로 테스트를 
 +> # (spr, zero_order_r 테스트가 아님) 하고
 +> # anova test는 x1 전체에 대한 테스트 하고
 +> # x2는 x1에 대한 테스트 외에 나머지를 가지고
 +> # 테스트하기 때문에 그러함
 +
 +> # 또한 anova test에서 두번째 IV의 F값은 
 +> # summary(lm)에서 두번 째 IV의 t값의 제곱값
 +> # 임을 이해. 이는 두번 째 IV의 설명력을 나
 +> # 타내는 부분이 lm과 anova 모두 같기 때문
 +> # 반면에 첫번째 IV의 경우에는 lm 분석 때에는
 +> # 고유의 설명력만을 가지고 (semi-partial cor^2)
 +> # 판단을 하는 반면에, anova는 x2와 공유하는 
 +> # 설명력도 포함해서 분석하기 때문에 t값의
 +> # 제곱이 F값이 되지 못함
 +
 +> # beta에 대한 설명
 +> # beta coefficient (standardized b)
 +> # beta <- b * (sd(x)/sd(y))
 +> beta1 <- b1 * (sd(df$x1)/sd(df$y))
 +> beta2 <- b2 * (sd(df$x2)/sd(df$y))
 +> beta1
 +    x1 
 +0.6161 
 +> beta2
 +     x2 
 +-0.4459 
 +
 +> # install.packages("lm.beta")
 +> library(lm.beta)
 +> lm.beta(lm.y.x1x2)
 +
 +Call:
 +lm(formula = y ~ x1 + x2, data = df)
 +
 +Standardized Coefficients::
 +(Intercept)          x1          x2 
 +         NA      0.6161     -0.4459 
 +
 +
 +> #######################################################
 +> # partial correlation coefficient and pr2
 +> # x2's explanation? 
 +> # understand with diagrams first
 +> # then calculate with r
 +> lm.tmp.1 <- lm(x2~x1, data=df)
 +> res.x2.x1 <- lm.tmp.1$residuals
 +
 +> lm.tmp.2 <- lm(y~x1, data=df)
 +> res.y.x1 <- lm.tmp.2$residuals
 +
 +> lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df)
 +> summary(lm.tmp.3)
 +
 +Call:
 +lm(formula = res.y.x1 ~ res.x2.x1, data = df)
 +
 +Residuals:
 +   Min     1Q Median     3Q    Max 
 +-1.217 -0.578 -0.151  0.664  1.191 
 +
 +Coefficients:
 +             Estimate Std. Error t value Pr(>|t|)  
 +(Intercept)  6.28e-18   2.75e-01    0.00    1.000  
 +res.x2.x1   -5.45e-01   2.12e-01   -2.57    0.033 *
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 0.87 on 8 degrees of freedom
 +Multiple R-squared:  0.453, Adjusted R-squared:  0.384 
 +F-statistic: 6.62 on 1 and 8 DF,  p-value: 0.033
 +
 +> summary(lm.tmp.3)$r.squared
 +[1] 0.4527
 +> sqrt(summary(lm.tmp.3)$r.squared)
 +[1] 0.6729
 +> # install.packages("ppcor")
 +> library(ppcor)
 +> partial.r <- pcor.test(df$y, df$x2, df$x1)
 +> str(partial.r)
 +'data.frame': 1 obs. of  6 variables:
 + $ estimate : num -0.673
 + $ p.value  : num 0.047
 + $ statistic: num -2.41
 + $ n        : int 10
 + $ gp       : num 1
 + $ Method   : chr "pearson"
 +> partial.r$estimate
 +[1] -0.6729
 +> summary(lm.tmp.3)
 +
 +Call:
 +lm(formula = res.y.x1 ~ res.x2.x1, data = df)
 +
 +Residuals:
 +   Min     1Q Median     3Q    Max 
 +-1.217 -0.578 -0.151  0.664  1.191 
 +
 +Coefficients:
 +             Estimate Std. Error t value Pr(>|t|)  
 +(Intercept)  6.28e-18   2.75e-01    0.00    1.000  
 +res.x2.x1   -5.45e-01   2.12e-01   -2.57    0.033 *
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 0.87 on 8 degrees of freedom
 +Multiple R-squared:  0.453, Adjusted R-squared:  0.384 
 +F-statistic: 6.62 on 1 and 8 DF,  p-value: 0.033
 +
 +> summary(lm.tmp.3)$r.square
 +[1] 0.4527
 +> partial.r$estimate^2
 +[1] 0.4527
 +
 +
 +> # x1's own explanation?
 +> lm.tmp.4 <- lm(x1~x2, data=df)
 +> res.x1.x2 <- lm.tmp.4$residuals
 +
 +> lm.tmp.5 <- lm(y~x2, data=df)
 +> res.y.x2 <- lm.tmp.5$residuals
 +
 +> lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df)
 +> summary(lm.tmp.6)
 +
 +Call:
 +lm(formula = res.y.x2 ~ res.x1.x2, data = df)
 +
 +Residuals:
 +   Min     1Q Median     3Q    Max 
 +-1.217 -0.578 -0.151  0.664  1.191 
 +
 +Coefficients:
 +            Estimate Std. Error t value Pr(>|t|)   
 +(Intercept) 1.33e-17   2.75e-01    0.00   1.0000   
 +res.x1.x2   1.18e-02   3.33e-03    3.55   0.0075 **
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 0.87 on 8 degrees of freedom
 +Multiple R-squared:  0.612, Adjusted R-squared:  0.564 
 +F-statistic: 12.6 on 1 and 8 DF,  p-value: 0.00746
 +
 +
 +> partial.r <- pcor.test(df$y, df$x1, df$x2)
 +> str(partial.r)
 +'data.frame': 1 obs. of  6 variables:
 + $ estimate : num 0.783
 + $ p.value  : num 0.0127
 + $ statistic: num 3.33
 + $ n        : int 10
 + $ gp       : num 1
 + $ Method   : chr "pearson"
 +> partial.r$estimate # this is partial correlation, not pr^2
 +[1] 0.7825
 +> # in order to get pr2, you should ^2
 +> partial.r$estimate^2
 +[1] 0.6123
 +> summary(lm.tmp.6)$r.squared
 +[1] 0.6123
 +
 +> #######################################################
 +> # semipartial correlation coefficient and spr2
 +> #
 +> spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1)
 +> spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2)
 +> spr.y.x2.x1
 +  estimate p.value statistic  n gp  Method
 +1  -0.4087  0.2748    -1.185 10  1 pearson
 +> spr.y.x1.x2
 +  estimate p.value statistic  n gp  Method
 +1   0.5647  0.1132      1.81 10  1 pearson
 +> spr2.y.x2.x1 <- spr.y.x2.x1$estimate^2
 +> spr2.y.x1.x2 <- spr.y.x1.x2$estimate^2
 +> spr2.y.x2.x1 
 +[1] 0.167
 +> spr2.y.x1.x2 
 +[1] 0.3189
 +
 +> lm.tmp.7 <- lm(y ~ res.x2.x1, data=df)
 +> summary(lm.tmp.7)
 +
 +Call:
 +lm(formula = y ~ res.x2.x1, data = df)
 +
 +Residuals:
 +   Min     1Q Median     3Q    Max 
 +-1.862 -1.171 -0.494  0.549  3.077 
 +
 +Coefficients:
 +            Estimate Std. Error t value Pr(>|t|)    
 +(Intercept)    8.000      0.559   14.31  5.5e-07 ***
 +res.x2.x1     -0.545      0.430   -1.27     0.24    
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 1.77 on 8 degrees of freedom
 +Multiple R-squared:  0.167, Adjusted R-squared:  0.0629 
 +F-statistic:  1.6 on 1 and 8 DF,  p-value: 0.241
 +
 +> spr2.y.x2.x1 
 +[1] 0.167
 +
 +> lm.tmp.8 <- lm(y~res.x1.x2, data=df)
 +> summary(lm.tmp.8)
 +
 +Call:
 +lm(formula = y ~ res.x1.x2, data = df)
 +
 +Residuals:
 +   Min     1Q Median     3Q    Max 
 +-2.664 -0.608 -0.149  1.219  2.290 
 +
 +Coefficients:
 +            Estimate Std. Error t value Pr(>|t|)    
 +(Intercept)  8.00000    0.50540   15.83  2.5e-07 ***
 +res.x1.x2    0.01184    0.00612    1.94    0.089 .  
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 1.6 on 8 degrees of freedom
 +Multiple R-squared:  0.319, Adjusted R-squared:  0.234 
 +F-statistic: 3.74 on 1 and 8 DF,  p-value: 0.089
 +
 +> spr2.y.x1.x2 
 +[1] 0.3189
 +
 +> bcd # remember bcd in the above?
 +[1] 0.7981
 +> bd <- spr2.y.x2.x1 + spr2.y.x1.x2 
 +> bd
 +[1] 0.4859
 +> bcd - bd
 +[1] 0.3123
 +
 +> #######################################################
 +> # get the common area that explain the y variable
 +> # 1.
 +> summary(lm.y.x2)
 +
 +Call:
 +lm(formula = y ~ x2, data = df)
 +
 +Residuals:
 +   Min     1Q Median     3Q    Max 
 +-1.254 -0.888 -0.485  0.496  2.592 
 +
 +Coefficients:
 +            Estimate Std. Error t value Pr(>|t|)    
 +(Intercept)   10.791      1.119    9.64  1.1e-05 ***
 +x2            -0.846      0.312   -2.71    0.027 *  
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 1.4 on 8 degrees of freedom
 +Multiple R-squared:  0.479, Adjusted R-squared:  0.414 
 +F-statistic: 7.36 on 1 and 8 DF,  p-value: 0.0265
 +
 +> all.x2 <- summary(lm.y.x2)$r.squared
 +> all.x2
 +[1] 0.4793
 +> spr2.y.x2.x1
 +[1] 0.167
 +> cma.1 <- all.x2 - spr2.y.x2.x1
 +> cma.1
 +[1] 0.3123
 +
 +> # 2.
 +> summary(lm.y.x1)
 +
 +Call:
 +lm(formula = y ~ x1, data = df)
 +
 +Residuals:
 +   Min     1Q Median     3Q    Max 
 +-1.519 -0.897 -0.130  1.006  1.580 
 +
 +Coefficients:
 +            Estimate Std. Error t value Pr(>|t|)   
 +(Intercept)  3.61778    1.24152    2.91    0.019 * 
 +x1           0.01527    0.00413    3.70    0.006 **
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 1.18 on 8 degrees of freedom
 +Multiple R-squared:  0.631, Adjusted R-squared:  0.585 
 +F-statistic: 13.7 on 1 and 8 DF,  p-value: 0.00605
 +
 +> all.x1 <- summary(lm.y.x1)$r.squared
 +> all.x1
 +[1] 0.6311
 +> spr2.y.x1.x2
 +[1] 0.3189
 +> cma.2 <- all.x1 - spr2.y.x1.x2
 +> cma.2
 +[1] 0.3123
 +
 +> # OR 3.
 +> summary(lm.y.x1x2)
 +
 +Call:
 +lm(formula = y ~ x1 + x2, data = df)
 +
 +Residuals:
 +   Min     1Q Median     3Q    Max 
 +-1.217 -0.578 -0.151  0.664  1.191 
 +
 +Coefficients:
 +            Estimate Std. Error t value Pr(>|t|)   
 +(Intercept)  6.39910    1.51654    4.22   0.0039 **
 +x1           0.01184    0.00356    3.33   0.0127 * 
 +x2          -0.54473    0.22636   -2.41   0.0470 * 
 +---
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 +
 +Residual standard error: 0.93 on 7 degrees of freedom
 +Multiple R-squared:  0.798, Adjusted R-squared:  0.74 
 +F-statistic: 13.8 on 2 and 7 DF,  p-value: 0.0037
 +
 +> r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
 +> r2.y.x1x2
 +[1] 0.7981
 +> spr2.y.x1.x2
 +[1] 0.3189
 +> spr2.y.x2.x1
 +[1] 0.167
 +> cma.3 <- r2.y.x1x2 - (spr2.y.x1.x2 + spr2.y.x2.x1)
 +> bcd - bd
 +[1] 0.3123
 +> cma.3
 +[1] 0.3123
 +
 +> cma.1 
 +[1] 0.3123
 +> cma.2
 +[1] 0.3123
 +> cma.3
 +[1] 0.3123
 +
 +> # OR 애초에 simple regression과 multiple 
 +> # regression에서 얻은 R2을 가지고 
 +> # 공통설명력을 알아볼 수도 있었다.
 +> r2.y.x1 <- summary(lm.y.x1)$r.square
 +> r2.y.x2 <- summary(lm.y.x2)$r.square
 +> r2.y.x1
 +[1] 0.6311
 +> r2.y.x2
 +[1] 0.4793
 +> r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
 +> r2.y.x1x2
 +[1] 0.7981
 +> cma.4 <- r2.y.x1 + r2.y.x2 - r2.y.x1x2
 +> cma.4
 +[1] 0.3123
 +
 +> # Note that sorting out unique and common
 +> # explanation area is only possible with 
 +> # semi-partial correlation determinant
 +> # NOT partial correlation determinant
 +> # because only semi-partial correlation
 +> # shares the same denominator (as total 
 +> # y).
 +> #############################################
 +
 +
 </code> </code>
  
 +====== explanation. added ======
 +{{:c:ms:2025:schedule:pasted:20250609-074413.png?400}}
 +<code>
 +# ex.
 +# resid(lm(y~x1, data=df)) = bc / delta.y
 +# resid(lm(y~x2, data=df)) = cd / delta.y
 +# resid(lm(y~x1+x2, data=df)) = bcd / delta.y
 +# b / delta.y = ?
 +# ce / delta.x2 = ?
 +</code>
 +
 +<code>
 +# exp.added
 +spcor.test(df$y, df$x1, df$x2)
 +spcor.test(df$y, df$x1, df$x2)$estimate
 +spcor.test(df$y, df$x1, df$x2)$estimate^2
 +spcor.test(df$y, df$x2, df$x1)$estimate^2
 +summary(lm(y~x1+x2, data=df))$r.square
 +
 +b <- spcor.test(df$y, df$x1, df$x2)$estimate^2
 +d <- spcor.test(df$y, df$x2, df$x1)$estimate^2
 +bcd <- summary(lm(y~x1+x2, data=df))$r.square
 +
 +summary(lm(df$y~df$x1+df$x2, data=df))$r.square - 
 +  (spcor.test(df$y, df$x1, df$x2)$estimate^2 + 
 +     spcor.test(df$y, df$x2, df$x1)$estimate^2)
 +bcd - (b + d)
 +
 +</code>
  
 +====== Another one ======
 +[[:partial and semipartial correlation]]
  
c/ms/2025/schedule/w13.lecture.note.1748822875.txt.gz · Last modified: 2025/06/02 09:07 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki