c:ms:2025:schedule:w13.lecture.note
This is an old revision of the document!
Table of Contents
MR
# multiple regression: a simple e.g.
#
#
rm(list=ls())
d <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")
d
colnames(d) <- c("y", "x1", "x2")
d
# y = 통장갯수
# x1 = 인컴
# x2 = 부양가족수
lm.y.x1 <- lm(y ~ x1, data=d)
summary(lm.y.x1)
anova(lm.y.x1)
r.sq.y.x1 <- cor(x1,y)^2
rsq.y.x1<-summary(lm.y.x1)$r.squared
lm.y.x2 <- lm(y ~ x2, data=d)
summary(lm.y.x2)
anova(lm.y.x2)
rsq.y.x2 <- cor(x2,y)^2
rsq.y.x2 <- summary(lm.y.x2)$r.squared
lm.y.x1x2 <- lm(y ~ x1+x2, data=d)
summary(lm.y.x1x2)
anova(lm.y.x1x2)
rsq.y.x1x2<- summary(lm.y.x1x2)$r.squared
rsq.y.x1 + rsq.y.x2 - rsq.y.x1x2
lm.y.x1x2$coefficient
# y.hat = 6.399103 + (0.01184145)*x1 + (−0.54472725)*x2
a <- lm.y.x1x2$coefficient[1]
b1 <- lm.y.x1x2$coefficient[2]
b2 <- lm.y.x1x2$coefficient[3]
a
b1
b2
y.pred <- a + (b1 * x1) + (b2 * x2)
y.pred
# or
y.pred2 <- predict(lm.y.x1x2)
head(y.pred == y.pred2)
y.real <- y
y.real
y.mean <- mean(y)
y.mean
res <- y.real - y.pred
reg <- y.pred - y.mean
y.mean
# remember y is sum of res + reg + y.mean
y2 <- res + reg + y.mean
y==y2
ss.res <- sum(res^2)
ss.reg <- sum(reg^2)
ss.tot <- var(y) * (length(y)-1)
ss.tot
ss.res
ss.reg
ss.res+ss.reg
k <- 3 # # of parameters a, b1, b2
df.tot <- length(y)-1
df.reg <- k - 1
df.res <- df.tot - df.reg
ms.reg <- ss.reg/df.reg
ms.res <- ss.res/df.res
ms.reg
ms.res
f.val <- ms.reg/ms.res
f.val
p.val <- pf(f.val, df.reg, df.res, lower.tail = F)
p.val
# double check
summary(lm.y.x1x2)
anova(lm.y.x1x2)
# note on 2 t-tests in summary
# anova에서의 x1, x2에 대한 테스트와
# lm에서의 x1, x2에 대한 테스트 (t-test) 간에
# 차이가 있음에 주의 (x1, x2에 대한 Pr 값이
# 다름). 그 이유는
# t-tests는 __pr__ 테스트로 테스트를
# (spr, zero_order_r 테스트가 아님) 하고
# anova test는 x1 전체에 대한 테스트 하고
# x2는 x1에 대한 테스트 외에 나머지를 가지고
# 테스트하기 때문에 그러함
# beta coefficient (standardized b)
# beta <- b * (sd(x)/sd(y))
beta1 <- b1 * (sd(x1)/sd(y))
beta2 <- b2 * (sd(x2)/sd(y))
beta1
beta2
# install.packages("lm.beta")
library(lm.beta)
lm.beta(lm.y.x1x2)
#######################################################
# partial correlation coefficient and pr2
# x2's explanation?
# understand with diagrams first
# then calculate with r
lm.tmp.1 <- lm(x2~x1, data=d)
res.x2.x1 <- lm.tmp.1$residuals
lm.tmp.2 <- lm(y~x1, data=d)
res.y.x1 <- lm.tmp.2$residuals
lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d)
summary(lm.tmp.3)
# install.packages("ppcor")
library(ppcor)
partial.r <- pcor.test(y, x2, x1)
partial.r
str(partial.r)
summary(lm.tmp.3)
summary(lm.tmp.3)$r.square
partial.r$estimate^2
# x1's own explanation?
lm.tmp.4 <- lm(x1~x2, data=d)
res.x1.x2 <- lm.tmp.4$residuals
lm.tmp.5 <- lm(y~x2, data=d)
res.y.x2 <- lm.tmp.5$residuals
lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d)
summary(lm.tmp.6)
partial.r <- pcor.test(y, x1, x2)
str(partial.r)
partial.r$estimate # this is partial correlation, not pr^2
# in order to get pr2, you should ^2
partial.r$estimate^2
#######################################################
#
# semipartial correlation coefficient and spr2
#
spr.y.x2.x1 <- spcor.test(y,x2,x1)
spr.y.x1.x2 <- spcor.test(y,x1,x2)
spr.y.x2.x1
spr.y.x1.x2
spr2.y.x2.x1 <- spr.y.x2.x1$estimate^2
spr2.y.x1.x2 <- spr.y.x1.x2$estimate^2
spr2.y.x2.x1
spr2.y.x1.x2
lm.tmp.7 <- lm(y ~ res.x2.x1, data = d)
summary(lm.tmp.7)
spr2.y.x2.x1
lm.tmp.8 <- lm(y~res.x1.x2, data = d)
summary(lm.tmp.8)
spr2.y.x1.x2
#######################################################
# get the common area that explain the y variable
# 1.
# lm.tmp.9
summary(lm.y.x2)
all.x2 <- summary(lm.y.x2)$r.squared
all.x2
spr2.y.x2.x1
cma.1 <- all.x2 - spr2.y.x2.x1
cma.1
# 2.
# lm.tmp.10
summary(lm.y.x1)
all.x1 <- summary(lm.y.x1)$r.squared
all.x1
spr2.y.x1.x2
cma.2 <- all.x1 - spr2.y.x1.x2
cma.2
# OR 3.
# lm.tmp.11
summary(lm.y.x1x2)
r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
r2.y.x1x2
spr2.y.x1.x2
spr2.y.x2.x1
cma.3 <- r2.y.x1x2 - (spr2.y.x1.x2 + spr2.y.x2.x1)
cma.3
cma.1
cma.2
cma.3
# OR 애초에 simple regression과 multiple
# regression에서 얻은 R2을 가지고
# 공통설명력을 알아볼 수도 있었다.
r2.y.x1 <- summary(lm.y.x1)$r.square
r2.y.x2 <- summary(lm.y.x2)$r.square
r2.y.x1
r2.y.x2
r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
r2.y.x1x2
cma.4 <- r2.y.x1 + r2.y.x2 - r2.y.x1x2
cma.4
# Note that sorting out unique and common
# explanation area is only possible with
# semi-partial correlation determinant
# NOT partial correlation determinant
# because only semi-partial correlation
# shares the same denominator (as total
# y).
#############################################
output
> # multiple regression: a simple e.g.
> #
> #
> rm(list=ls())
> d <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")
> d
bankaccount income famnum
1 6 220 5
2 5 190 6
3 7 260 3
4 7 200 4
5 8 330 2
6 10 490 4
7 8 210 3
8 11 380 2
9 9 320 1
10 9 270 3
>
> colnames(d) <- c("y", "x1", "x2")
> d
y x1 x2
1 6 220 5
2 5 190 6
3 7 260 3
4 7 200 4
5 8 330 2
6 10 490 4
7 8 210 3
8 11 380 2
9 9 320 1
10 9 270 3
> # y = 통장갯수
> # x1 = 인컴
> # x2 = 부양가족수
> lm.y.x1 <- lm(y ~ x1, data=d)
> summary(lm.y.x1)
Call:
lm(formula = y ~ x1, data = d)
Residuals:
Min 1Q Median 3Q Max
-1.5189 -0.8969 -0.1297 1.0058 1.5800
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.617781 1.241518 2.914 0.01947 *
x1 0.015269 0.004127 3.700 0.00605 **
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.176 on 8 degrees of freedom
Multiple R-squared: 0.6311, Adjusted R-squared: 0.585
F-statistic: 13.69 on 1 and 8 DF, p-value: 0.006046
> anova(lm.y.x1)
Analysis of Variance Table
Response: y
Df Sum Sq Mean Sq F value Pr(>F)
x1 1 18.934 18.9336 13.687 0.006046 **
Residuals 8 11.066 1.3833
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> r.sq.y.x1 <- cor(x1,y)^2
> rsq.y.x1<-summary(lm.y.x1)$r.squared
>
>
> lm.y.x2 <- lm(y ~ x2, data=d)
> summary(lm.y.x2)
Call:
lm(formula = y ~ x2, data = d)
Residuals:
Min 1Q Median 3Q Max
-1.2537 -0.8881 -0.4851 0.4963 2.5920
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 10.7910 1.1195 9.639 1.12e-05 ***
x2 -0.8458 0.3117 -2.713 0.0265 *
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.397 on 8 degrees of freedom
Multiple R-squared: 0.4793, Adjusted R-squared: 0.4142
F-statistic: 7.363 on 1 and 8 DF, p-value: 0.02651
> anova(lm.y.x2)
Analysis of Variance Table
Response: y
Df Sum Sq Mean Sq F value Pr(>F)
x2 1 14.378 14.3781 7.3631 0.02651 *
Residuals 8 15.622 1.9527
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> rsq.y.x2 <- cor(x2,y)^2
> rsq.y.x2 <- summary(lm.y.x2)$r.squared
>
>
> lm.y.x1x2 <- lm(y ~ x1+x2, data=d)
> summary(lm.y.x1x2)
Call:
lm(formula = y ~ x1 + x2, data = d)
Residuals:
Min 1Q Median 3Q Max
-1.2173 -0.5779 -0.1515 0.6642 1.1906
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.399103 1.516539 4.220 0.00394 **
x1 0.011841 0.003561 3.325 0.01268 *
x2 -0.544727 0.226364 -2.406 0.04702 *
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9301 on 7 degrees of freedom
Multiple R-squared: 0.7981, Adjusted R-squared: 0.7404
F-statistic: 13.84 on 2 and 7 DF, p-value: 0.003696
> anova(lm.y.x1x2)
Analysis of Variance Table
Response: y
Df Sum Sq Mean Sq F value Pr(>F)
x1 1 18.9336 18.9336 21.8841 0.002266 **
x2 1 5.0101 5.0101 5.7909 0.047020 *
Residuals 7 6.0562 0.8652
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> rsq.y.x1x2<- summary(lm.y.x1x2)$r.squared
> rsq.y.x1 + rsq.y.x2 - rsq.y.x1x2
[1] 0.3122658
>
> lm.y.x1x2$coefficient
(Intercept) x1 x2
6.39910298 0.01184145 -0.54472725
> # y.hat = 6.399103 + (0.01184145)*x1 + (−0.54472725)*x2
> a <- lm.y.x1x2$coefficient[1]
> b1 <- lm.y.x1x2$coefficient[2]
> b2 <- lm.y.x1x2$coefficient[3]
> a
(Intercept)
6.399103
> b1
x1
0.01184145
> b2
x2
-0.5447272
>
> y.pred <- a + (b1 * x1) + (b2 * x2)
> y.pred
[1] 6.280586 5.380616 7.843699 6.588485 9.217328
[6] 10.022506 7.251626 9.809401 9.643641 7.962113
> # or
> y.pred2 <- predict(lm.y.x1x2)
> head(y.pred == y.pred2)
1 2 3 4 5 6
TRUE TRUE TRUE TRUE TRUE TRUE
>
> y.real <- y
> y.real
[1] 6 5 7 7 8 10 8 11 9 9
> y.mean <- mean(y)
> y.mean
[1] 8
>
> res <- y.real - y.pred
> reg <- y.pred - y.mean
> y.mean
[1] 8
> # remember y is sum of res + reg + y.mean
> y2 <- res + reg + y.mean
> y==y2
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
>
> ss.res <- sum(res^2)
> ss.reg <- sum(reg^2)
>
> ss.tot <- var(y) * (length(y)-1)
> ss.tot
[1] 30
> ss.res
[1] 6.056235
> ss.reg
[1] 23.94376
> ss.res+ss.reg
[1] 30
>
> k <- 3 # # of parameters a, b1, b2
> df.tot <- length(y)-1
> df.reg <- k - 1
> df.res <- df.tot - df.reg
>
> ms.reg <- ss.reg/df.reg
> ms.res <- ss.res/df.res
> ms.reg
[1] 11.97188
> ms.res
[1] 0.8651765
> f.val <- ms.reg/ms.res
> f.val
[1] 13.8375
> p.val <- pf(f.val, df.reg, df.res, lower.tail = F)
> p.val
[1] 0.003696453
>
> # double check
> summary(lm.y.x1x2)
Call:
lm(formula = y ~ x1 + x2, data = d)
Residuals:
Min 1Q Median 3Q Max
-1.2173 -0.5779 -0.1515 0.6642 1.1906
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.399103 1.516539 4.220 0.00394 **
x1 0.011841 0.003561 3.325 0.01268 *
x2 -0.544727 0.226364 -2.406 0.04702 *
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9301 on 7 degrees of freedom
Multiple R-squared: 0.7981, Adjusted R-squared: 0.7404
F-statistic: 13.84 on 2 and 7 DF, p-value: 0.003696
> anova(lm.y.x1x2)
Analysis of Variance Table
Response: y
Df Sum Sq Mean Sq F value Pr(>F)
x1 1 18.9336 18.9336 21.8841 0.002266 **
x2 1 5.0101 5.0101 5.7909 0.047020 *
Residuals 7 6.0562 0.8652
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> # note on 2 t-tests in summary
> # anova에서의 x1, x2에 대한 테스트와
> # lm에서의 x1, x2에 대한 테스트 (t-test) 간에
> # 차이가 있음에 주의 (x1, x2에 대한 Pr 값이
> # 다름). 그 이유는
> # t-tests는 __pr__ 테스트로 테스트를
> # (spr, zero_order_r 테스트가 아님) 하고
> # anova test는 x1 전체에 대한 테스트 하고
> # x2는 x1에 대한 테스트 외에 나머지를 가지고
> # 테스트하기 때문에 그러함
>
> # beta coefficient (standardized b)
> # beta <- b * (sd(x)/sd(y))
> beta1 <- b1 * (sd(x1)/sd(y))
> beta2 <- b2 * (sd(x2)/sd(y))
> beta1
x1
0.616097
> beta2
x2
-0.4458785
>
> # install.packages("lm.beta")
> library(lm.beta)
> lm.beta(lm.y.x1x2)
Call:
lm(formula = y ~ x1 + x2, data = d)
Standardized Coefficients::
(Intercept) x1 x2
NA 0.6160970 -0.4458785
>
> #######################################################
> # partial correlation coefficient and pr2
> # x2's explanation?
> # understand with diagrams first
> # then calculate with r
> lm.tmp.1 <- lm(x2~x1, data=d)
> res.x2.x1 <- lm.tmp.1$residuals
>
> lm.tmp.2 <- lm(y~x1, data=d)
> res.y.x1 <- lm.tmp.2$residuals
>
> lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d)
> summary(lm.tmp.3)
Call:
lm(formula = res.y.x1 ~ res.x2.x1, data = d)
Residuals:
Min 1Q Median 3Q Max
-1.2173 -0.5779 -0.1515 0.6642 1.1906
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.281e-18 2.751e-01 0.000 1.000
res.x2.x1 -5.447e-01 2.117e-01 -2.573 0.033 *
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.8701 on 8 degrees of freedom
Multiple R-squared: 0.4527, Adjusted R-squared: 0.3843
F-statistic: 6.618 on 1 and 8 DF, p-value: 0.033
>
> # install.packages("ppcor")
> library(ppcor)
> partial.r <- pcor.test(y, x2, x1)
> partial.r
estimate p.value statistic n gp Method
1 -0.672856 0.04702022 -2.406425 10 1 pearson
> str(partial.r)
'data.frame': 1 obs. of 6 variables:
$ estimate : num -0.673
$ p.value : num 0.047
$ statistic: num -2.41
$ n : int 10
$ gp : num 1
$ Method : chr "pearson"
> summary(lm.tmp.3)
Call:
lm(formula = res.y.x1 ~ res.x2.x1, data = d)
Residuals:
Min 1Q Median 3Q Max
-1.2173 -0.5779 -0.1515 0.6642 1.1906
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.281e-18 2.751e-01 0.000 1.000
res.x2.x1 -5.447e-01 2.117e-01 -2.573 0.033 *
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.8701 on 8 degrees of freedom
Multiple R-squared: 0.4527, Adjusted R-squared: 0.3843
F-statistic: 6.618 on 1 and 8 DF, p-value: 0.033
> summary(lm.tmp.3)$r.square
[1] 0.4527352
> partial.r$estimate^2
[1] 0.4527352
>
>
> # x1's own explanation?
> lm.tmp.4 <- lm(x1~x2, data=d)
> res.x1.x2 <- lm.tmp.4$residuals
>
> lm.tmp.5 <- lm(y~x2, data=d)
> res.y.x2 <- lm.tmp.5$residuals
>
> lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d)
> summary(lm.tmp.6)
Call:
lm(formula = res.y.x2 ~ res.x1.x2, data = d)
Residuals:
Min 1Q Median 3Q Max
-1.2173 -0.5779 -0.1515 0.6642 1.1906
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.330e-17 2.751e-01 0.000 1.00000
res.x1.x2 1.184e-02 3.331e-03 3.555 0.00746 **
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.8701 on 8 degrees of freedom
Multiple R-squared: 0.6123, Adjusted R-squared: 0.5639
F-statistic: 12.64 on 1 and 8 DF, p-value: 0.007458
>
> partial.r <- pcor.test(y, x1, x2)
> str(partial.r)
'data.frame': 1 obs. of 6 variables:
$ estimate : num 0.783
$ p.value : num 0.0127
$ statistic: num 3.33
$ n : int 10
$ gp : num 1
$ Method : chr "pearson"
> partial.r$estimate # this is partial correlation, not pr^2
[1] 0.7825112
> # in order to get pr2, you should ^2
> partial.r$estimate^2
[1] 0.6123238
>
> #######################################################
> #
> # semipartial correlation coefficient and spr2
> #
> spr.y.x2.x1 <- spcor.test(y,x2,x1)
> spr.y.x1.x2 <- spcor.test(y,x1,x2)
> spr.y.x2.x1
estimate p.value statistic n gp Method
1 -0.4086619 0.2748117 -1.184655 10 1 pearson
> spr.y.x1.x2
estimate p.value statistic n gp Method
1 0.5646726 0.113182 1.810198 10 1 pearson
> spr2.y.x2.x1 <- spr.y.x2.x1$estimate^2
> spr2.y.x1.x2 <- spr.y.x1.x2$estimate^2
> spr2.y.x2.x1
[1] 0.1670045
> spr2.y.x1.x2
[1] 0.3188552
>
> lm.tmp.7 <- lm(y ~ res.x2.x1, data = d)
> summary(lm.tmp.7)
Call:
lm(formula = y ~ res.x2.x1, data = d)
Residuals:
Min 1Q Median 3Q Max
-1.8617 -1.1712 -0.4940 0.5488 3.0771
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 8.0000 0.5589 14.314 5.54e-07 ***
res.x2.x1 -0.5447 0.4301 -1.266 0.241
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.767 on 8 degrees of freedom
Multiple R-squared: 0.167, Adjusted R-squared: 0.06288
F-statistic: 1.604 on 1 and 8 DF, p-value: 0.241
> spr2.y.x2.x1
[1] 0.1670045
>
> lm.tmp.8 <- lm(y~res.x1.x2, data = d)
> summary(lm.tmp.8)
Call:
lm(formula = y ~ res.x1.x2, data = d)
Residuals:
Min 1Q Median 3Q Max
-2.6642 -0.6084 -0.1492 1.2192 2.2901
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 8.000000 0.505400 15.829 2.54e-07 ***
res.x1.x2 0.011841 0.006119 1.935 0.089 .
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.598 on 8 degrees of freedom
Multiple R-squared: 0.3189, Adjusted R-squared: 0.2337
F-statistic: 3.745 on 1 and 8 DF, p-value: 0.08901
> spr2.y.x1.x2
[1] 0.3188552
>
>
> #######################################################
> # get the common area that explain the y variable
> # 1.
> summary(lm.y.x2)
Call:
lm(formula = y ~ x2, data = d)
Residuals:
Min 1Q Median 3Q Max
-1.2537 -0.8881 -0.4851 0.4963 2.5920
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 10.7910 1.1195 9.639 1.12e-05 ***
x2 -0.8458 0.3117 -2.713 0.0265 *
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.397 on 8 degrees of freedom
Multiple R-squared: 0.4793, Adjusted R-squared: 0.4142
F-statistic: 7.363 on 1 and 8 DF, p-value: 0.02651
> all.x2 <- summary(lm.y.x2)$r.squared
> all.x2
[1] 0.4792703
> spr2.y.x2.x1
[1] 0.1670045
> cma.1 <- all.x2 - spr2.y.x2.x1
> cma.1
[1] 0.3122658
>
> # 2.
> summary(lm.y.x1)
Call:
lm(formula = y ~ x1, data = d)
Residuals:
Min 1Q Median 3Q Max
-1.5189 -0.8969 -0.1297 1.0058 1.5800
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.617781 1.241518 2.914 0.01947 *
x1 0.015269 0.004127 3.700 0.00605 **
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.176 on 8 degrees of freedom
Multiple R-squared: 0.6311, Adjusted R-squared: 0.585
F-statistic: 13.69 on 1 and 8 DF, p-value: 0.006046
> all.x1 <- summary(lm.y.x1)$r.squared
> all.x1
[1] 0.631121
> spr2.y.x1.x2
[1] 0.3188552
> cma.2 <- all.x1 - spr2.y.x1.x2
> cma.2
[1] 0.3122658
>
> # OR 3.
> summary(lm.y.x1x2)
Call:
lm(formula = y ~ x1 + x2, data = d)
Residuals:
Min 1Q Median 3Q Max
-1.2173 -0.5779 -0.1515 0.6642 1.1906
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 6.399103 1.516539 4.220 0.00394 **
x1 0.011841 0.003561 3.325 0.01268 *
x2 -0.544727 0.226364 -2.406 0.04702 *
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9301 on 7 degrees of freedom
Multiple R-squared: 0.7981, Adjusted R-squared: 0.7404
F-statistic: 13.84 on 2 and 7 DF, p-value: 0.003696
> r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
> r2.y.x1x2
[1] 0.7981255
> spr2.y.x1.x2
[1] 0.3188552
> spr2.y.x2.x1
[1] 0.1670045
> cma.3 <- r2.y.x1x2 - (spr2.y.x1.x2 + spr2.y.x2.x1)
> cma.3
[1] 0.3122658
>
> cma.1
[1] 0.3122658
> cma.2
[1] 0.3122658
> cma.3
[1] 0.3122658
>
> # OR 애초에 simple regression과 multiple
> # regression에서 얻은 R2을 가지고
> # 공통설명력을 알아볼 수도 있었다.
> r2.y.x1 <- summary(lm.y.x1)$r.square
> r2.y.x2 <- summary(lm.y.x2)$r.square
> r2.y.x1
[1] 0.631121
> r2.y.x2
[1] 0.4792703
> r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
> r2.y.x1x2
[1] 0.7981255
> cma.4 <- r2.y.x1 + r2.y.x2 - r2.y.x1x2
> cma.4
[1] 0.3122658
>
> # Note that sorting out unique and common
> # explanation area is only possible with
> # semi-partial correlation determinant
> # NOT partial correlation determinant
> # because only semi-partial correlation
> # shares the same denominator (as total
> # y).
> #############################################
>
>
Another one
c/ms/2025/schedule/w13.lecture.note.1748995157.txt.gz · Last modified: by hkimscil
