https://rpubs.com/KwonPublishing/249631
n.sample <- 10000
rho <- 0.05
dat <- mvrnorm(n.sample, c(0,0), matrix(c(1,rho,rho,1),2))
C1 <- dat[,1]
C2 <- dat[,2]
X <- rnorm(n.sample) + C1 + C2
Y <- rnorm(n.sample) + 0.5*C1 + C2 + X
dat <- data.frame(Y, X, C1, C2)
plot(dat, col=rgb(0,0,0,alpha=min(1, 1000/n.sample)))
http://faculty.cas.usf.edu/mbrannick/regression/Part3/Partials.html
http://faculty.cas.usf.edu/mbrannick/regression/Partial.html
options(digits = 3)
SATQ <- c(500, 550, 450, 400, 600, 650, 700, 550, 650, 550)
CLEP <- c(30, 32, 28, 25, 32, 38, 39, 38, 35, 31)
MathGPA <- c(2.8, 3, 2.9, 2.8, 3.3, 3.3, 3.5, 3.7, 3.4, 2.9)
data <- data.frame(SATQ, CLEP, MathGPA)
data1 <- subset(data, select = c(-CLEP))
data2 <- subset(data, select = c(-SATQ))
data3 <- subset(data, select = c(-MathGPA))
cor(data)
SATQ CLEP MathGPA
SATQ 1.0000 0.8745 0.7180
CLEP 0.8745 1.0000 0.8763
MathGPA 0.7180 0.8763 1.0000
====== m1 ======
m1 <- lm(data1$MathGPA ~ data1$SATQ)
m1
Call:
lm(formula = data1$MathGPA ~ data1$SATQ)
Coefficients:
(Intercept) data1$SATQ
1.78481 0.00246
summary(m1)
Call:
lm(formula = data1$MathGPA ~ data1$SATQ)
Residuals:
Min 1Q Median 3Q Max
-0.2354 -0.1218 0.0032 0.0294 0.5646
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.784810 0.477172 3.74 0.0057 **
data1$SATQ 0.002456 0.000842 2.92 0.0193 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.237 on 8 degrees of freedom
Multiple R-squared: 0.516, Adjusted R-squared: 0.455
F-statistic: 8.51 on 1 and 8 DF, p-value: 0.0193
Pred = 1.784810 + 0.002456 * data1$SATQ
Resid = m1$residuals
data1 <- data.frame(data1, Pred, Resid)
data1
SATQ MathGPA Pred Resid
1 500 2.8 3.013 -0.212658
2 550 3.0 3.136 -0.135443
3 450 2.9 2.890 0.010127
4 400 2.8 2.767 0.032911
5 600 3.3 3.258 0.041772
6 650 3.3 3.381 -0.081013
7 700 3.5 3.504 -0.003797
8 550 3.7 3.136 0.564557
9 650 3.4 3.381 0.018987
10 550 2.9 3.136 -0.235443
round(cor(data1),3)
SATQ MathGPA Pred Resid
SATQ 1.000 0.718 1.000 0.000
MathGPA 0.718 1.000 0.718 0.696
Pred 1.000 0.718 1.000 0.000
Resid 0.000 0.696 0.000 1.000
* MathGPA에 대한 SATQ의 상관계수값은 %%(%%r%%)%% .718 이다.
* 또한 SATQ와 Pred 간의 상관계수값은 1 이다. Pred값은 아래 regression model의 선형방정식 해이므로 SATQ의 값에 따라서 일정한 값을 (해를) 갖는다.
* Pred = 1.784810 + 0.002456 * data1%%$%%SATQ
* 독립변인인 SATQ와 Resid의 상관관계는 0. 또한 Resid와 Pred 또한 0.
* Y의 variance는 X(독립변인, 즉 SATQ)로 설명되는 부분과 그 나머지(독립변인과 아무 관계가 없는)로 이루어져 있다. 설명되는 부분은 X로 만들어진 regression line (선형방정식)으로 구성되므로 X(SATQ)와 Pred은 1의 상관관계를, SATQ와 Resid는 0의 상관관계를 갖는다.
====== m2 ======
m2 <- lm(data2$MathGPA ~ data2$CLEP)
m2
Call:
lm(formula = data2$MathGPA ~ data2$CLEP)
Coefficients:
(Intercept) data2$CLEP
1.1744 0.0605
summary(m2)
Call:
lm(formula = data2$MathGPA ~ data2$CLEP)
Residuals:
Min 1Q Median 3Q Max
-0.19050 -0.14117 -0.00238 0.11085 0.22521
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.1744 0.3895 3.02 0.01668 *
data2$CLEP 0.0605 0.0118 5.14 0.00088 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.164 on 8 degrees of freedom
Multiple R-squared: 0.768, Adjusted R-squared: 0.739
F-statistic: 26.5 on 1 and 8 DF, p-value: 0.000881
Pred = 1.1744 + 0.0605 * data2$CLEP
Resid = m2$residuals
data2 <- data.frame(data2, Pred, Resid)
data2
CLEP MathGPA Pred Resid
1 30 2.8 2.989 -0.19050
2 32 3.0 3.110 -0.11157
3 28 2.9 2.868 0.03058
4 25 2.8 2.687 0.11219
5 32 3.3 3.110 0.18843
6 38 3.3 3.473 -0.17479
7 39 3.5 3.534 -0.03533
8 38 3.7 3.473 0.22521
9 35 3.4 3.292 0.10682
10 31 2.9 3.050 -0.15103
round(cor(data2),3)
CLEP MathGPA Pred Resid
CLEP 1.000 0.876 1.000 0.000
MathGPA 0.876 1.000 0.876 0.482
Pred 1.000 0.876 1.000 0.000
Resid 0.000 0.482 0.000 1.000
* 상관관계 (CLEP * MathGPA) = .876 으로 SAT-Q와 MathGPA보다 높다.
* 종속변인인 MathGPA와 Resid와의 상관관계는 .482
* Pred value와 CLEP(독립변인)과는 상관관계 1.
* Resid는 독립변인과도 또한 Pred와도 상관관계 없음 (0).
data3
SATQ CLEP
1 500 30
2 550 32
3 450 28
4 400 25
5 600 32
6 650 38
7 700 39
8 550 38
9 650 35
10 550 31
> mall <- lm(data$MathGPA ~ data$CLEP + data$SATQ)
>
> mall
Call:
lm(formula = data$MathGPA ~ data$CLEP + data$SATQ)
Coefficients:
(Intercept) data$CLEP data$SATQ
1.160756 0.072929 -0.000702
> summary(mall)
Call:
lm(formula = data$MathGPA ~ data$CLEP + data$SATQ)
Residuals:
Min 1Q Median 3Q Max
-0.19789 -0.12897 -0.00053 0.13117 0.22640
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.160756 0.408112 2.84 0.025 *
data$CLEP 0.072929 0.025380 2.87 0.024 *
data$SATQ -0.000702 0.001256 -0.56 0.594
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.171 on 7 degrees of freedom
Multiple R-squared: 0.778, Adjusted R-squared: 0.714
F-statistic: 12.2 on 2 and 7 DF, p-value: 0.00518
>
Intercept = 1.16, t=2.844, p < .05
CLEP = 0.07, t=2.874, p < .05
SATQ = -.0007, t=-0.558, n.s.
====== m3 ======
m3 <- lm(CLEP ~ SATQ, data=data3)
m3
Call:
lm(formula = CLEP ~ SATQ, data = data3)
Coefficients:
(Intercept) SATQ
8.5570 0.0433
summary(m3)
Call:
lm(formula = CLEP ~ SATQ, data = data3)
Residuals:
Min 1Q Median 3Q Max
-2.532 -1.244 -0.285 0.095 5.633
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 8.55696 4.81337 1.78 0.11334
SATQ 0.04329 0.00849 5.10 0.00093 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.39 on 8 degrees of freedom
Multiple R-squared: 0.765, Adjusted R-squared: 0.735
F-statistic: 26 on 1 and 8 DF, p-value: 0.00093
Pred = 8.55696 + 0.04329 * data3$SATQ
Resid = m3$residuals
data3 <- data.frame(data3, Pred, Resid)
data3
SATQ CLEP Pred Resid
1 500 30 30.20 -0.20253
2 550 32 32.37 -0.36709
3 450 28 28.04 -0.03797
4 400 25 25.87 -0.87342
5 600 32 34.53 -2.53165
6 650 38 36.70 1.30380
7 700 39 38.86 0.13924
8 550 38 32.37 5.63291
9 650 35 36.70 -1.69620
10 550 31 32.37 -1.36709
round(cor(data3),3)
SATQ CLEP Pred Resid
SATQ 1.000 0.875 1.000 0.000
CLEP 0.875 1.000 0.875 0.485
Pred 1.000 0.875 1.000 0.000
Resid 0.000 0.485 0.000 1.000
temp1 <- subset(data1, select=c(Resid))
temp3 <- subset(data3, select=c(Resid))
temp <- data.frame(temp3, temp1)
colnames(temp)[1]<-"CLEPResid"
colnames(temp)[2]<-"MathGPAResid"
temp
CLEPResid MathGPAResid
1 -0.20253 -0.212658
2 -0.36709 -0.135443
3 -0.03797 0.010127
4 -0.87342 0.032911
5 -2.53165 0.041772
6 1.30380 -0.081013
7 0.13924 -0.003797
8 5.63291 0.564557
9 -1.69620 0.018987
10 -1.36709 -0.235443
> cor(temp)
CLEPResid MathGPAResid
CLEPResid 1.0000 0.7357
MathGPAResid 0.7357 1.0000
$$
r_{12.3} = \cfrac { \text r^{}_{12} - r^{}_{13}r^{}_{23}} { \sqrt{1-r^{2}_{13}} \sqrt{1-r^{2}_{23}} }
$$
where
* 1 = MathGPA
* 2 = CLEP
* 3 = SATQ
$$
r_{12.3} = \cfrac { \text .88 - (.87)(.72)} { \sqrt{1-.87^{2}} \sqrt{1-.72^{2}} }
$$
$$
r_{12.3} = \huge r_{\huge MathGPA:CLEP.SATQ} = .73
$$