Differences

This shows you the differences between two versions of the page.

--- c:ms:2025:schedule:w13.lecture.note [2025/06/09 07:11] – [output] hkimscil
+++ c:ms:2025:schedule:w13.lecture.note [2025/06/09 08:51] (current) – [output] hkimscil
@@ Line 5: / Line 5: @@
 #
 rm(list=ls())
-d <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")
+df <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")
-d
+df
-colnames(d) <- c("y", "x1", "x2")
+colnames(df) <- c("y", "x1", "x2")
-d
+df
 # y = 통장갯수
 # x1 = 인컴
 # x2 = 부양가족수
-lm.y.x1 <- lm(y ~ x1, data=d)
+lm.y.x1 <- lm(y ~ x1, data=df)
 summary(lm.y.x1)
 anova(lm.y.x1)
-cor(d$x1, d$y)^2
+cor(df$x1, df$y)^2
 summary(lm.y.x1)$r.squared
-lm.y.x2 <- lm(y ~ x2, data=d)
+lm.y.x2 <- lm(y ~ x2, data=df)
 summary(lm.y.x2)
 anova(lm.y.x2)
-cor(d$x2, d$y)^2
+cor(df$x2, df$y)^2
 summary(lm.y.x2)$r.squared
-lm.y.x1x2 <- lm(y ~ x1+x2, data=d)
+lm.y.x1x2 <- lm(y ~ x1+x2, data=df)
 summary(lm.y.x1x2)
 anova(lm.y.x1x2)
@@ Line 38: / Line 38: @@
 lm.y.x1x2$coefficient
-# y.hat = 6.399103 + (0.01184145)*x1 + (−0.54472725)*x2
+# y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2
 a <- lm.y.x1x2$coefficient[1]
 b1 <- lm.y.x1x2$coefficient[2]
@@ Line 46: / Line 46: @@
 b2
-y.pred <- a + (b1 * d$x1) + (b2 * d$x2)
+y.pred <- a + (b1 * df$x1) + (b2 * df$x2)
 y.pred
 # or
@@ Line 52: / Line 52: @@
 head(y.pred == y.pred2)
-y.real <- d$y
+y.real <- df$y
 y.real
-y.mean <- mean(d$y)
+y.mean <- mean(df$y)
 y.mean
-deviation.score <- d$y - y.mean
+deviation.score <- df$y - y.mean
 ds <- deviation.score
 res <- y.real - y.pred
@@ Line 64: / Line 64: @@
 # remember y is sum of res + reg + y.mean
 y2 <- res + reg + y.mean
-d$y==y2
+df$y==y2
 ss.tot <- sum(ds^2)
@@ Line 70: / Line 70: @@
 ss.reg <- sum(reg^2)
-ss.tot2 <- var(d$y) * (length(d$y)-1)
+ss.tot2 <- var(df$y) * (length(df$y)-1)
 ss.tot
 ss.tot2
@@ Line 78: / Line 78: @@
 k <- 3 # # of parameters a, b1, b2
-df.tot <- length(y)-1
+df.tot <- length(df$y)-1
 df.reg <- k - 1
 df.res <- df.tot - df.reg
@@ Line 95: / Line 95: @@
 anova(lm.y.x1x2)
-summary(lm(y~x2+x1, data=d))
+summary(lm(y~x2+x1, data=df))
-anova(lm(y~x2+x1, data=d))
+anova(lm(y~x2+x1, data=df))
 # note on 2 t-tests in summary
@@ Line 122: / Line 122: @@
 # beta coefficient (standardized b)
 # beta <- b * (sd(x)/sd(y))
-beta1 <- b1 * (sd(x1)/sd(y))
+beta1 <- b1 * (sd(df$x1)/sd(df$y))
-beta2 <- b2 * (sd(x2)/sd(y))
+beta2 <- b2 * (sd(df$x2)/sd(df$y))
 beta1
 beta2
@@ Line 136: / Line 136: @@
 # understand with diagrams first
 # then calculate with r
-lm.tmp.1 <- lm(x2~x1, data=d)
+lm.tmp.1 <- lm(x2~x1, data=df)
 res.x2.x1 <- lm.tmp.1$residuals
-lm.tmp.2 <- lm(y~x1, data=d)
+lm.tmp.2 <- lm(y~x1, data=df)
 res.y.x1 <- lm.tmp.2$residuals
-lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d)
+lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df)
 summary(lm.tmp.3)
 summary(lm.tmp.3)$r.squared
@@ Line 148: / Line 148: @@
 # install.packages("ppcor")
 library(ppcor)
-partial.r <- pcor.test(d$y, d$x2, d$x1)
+partial.r <- pcor.test(df$y, df$x2, df$x1)
 str(partial.r)
 partial.r$estimate
@@ Line 157: / Line 157: @@
 # x1's own explanation?
-lm.tmp.4 <- lm(x1~x2, data=d)
+lm.tmp.4 <- lm(x1~x2, data=df)
 res.x1.x2 <- lm.tmp.4$residuals
-lm.tmp.5 <- lm(y~x2, data=d)
+lm.tmp.5 <- lm(y~x2, data=df)
 res.y.x2 <- lm.tmp.5$residuals
-lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d)
+lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df)
 summary(lm.tmp.6)
-partial.r <- pcor.test(d$y, d$x1, d$x2)
+partial.r <- pcor.test(df$y, df$x1, df$x2)
 str(partial.r)
 partial.r$estimate # this is partial correlation, not pr^2
@@ Line 176: / Line 176: @@
 # semipartial correlation coefficient and spr2
 #
-spr.y.x2.x1 <- spcor.test(d$y,d$x2,d$x1)
+spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1)
-spr.y.x1.x2 <- spcor.test(d$y,d$x1,d$x2)
+spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2)
 spr.y.x2.x1
 spr.y.x1.x2
@@ Line 185: / Line 185: @@
 spr2.y.x1.x2
-lm.tmp.7 <- lm(y ~ res.x2.x1, data = d)
+lm.tmp.7 <- lm(y ~ res.x2.x1, data=df)
 summary(lm.tmp.7)
 spr2.y.x2.x1
-lm.tmp.8 <- lm(y~res.x1.x2, data = d)
+lm.tmp.8 <- lm(y~res.x1.x2, data=df)
 summary(lm.tmp.8)
 spr2.y.x1.x2
@@ Line 250: / Line 250: @@
 # y).
 #############################################
@@ Line 260: / Line 259: @@
 > #
 > rm(list=ls())
-> d <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")
+> df <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")
-> d
+> df
    bankaccount income famnum
             6    220      5
@@ Line 274: / Line 273: @@
            9    270      3
 >
-> colnames(d) <- c("y", "x1", "x2")
+> colnames(df) <- c("y", "x1", "x2")
-> d
+> df
     y  x1 x2
    6 220  5
@@ Line 290: / Line 289: @@
 > # x1 = 인컴
 > # x2 = 부양가족수
-> lm.y.x1 <- lm(y ~ x1, data=d)
+> lm.y.x1 <- lm(y ~ x1, data=df)
 > summary(lm.y.x1)
 Call:
-lm(formula = y ~ x1, data = d)
+lm(formula = y ~ x1, data = df)
 Residuals:
@@ Line 320: / Line 319: @@
 ---
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-> cor(d$x1, d$y)^2
+> cor(df$x1, df$y)^2
 [1] 0.6311
 > summary(lm.y.x1)$r.squared
@@ Line 326: / Line 325: @@
 >
 >
-> lm.y.x2 <- lm(y ~ x2, data=d)
+> lm.y.x2 <- lm(y ~ x2, data=df)
 > summary(lm.y.x2)
 Call:
-lm(formula = y ~ x2, data = d)
+lm(formula = y ~ x2, data = df)
 Residuals:
@@ Line 356: / Line 355: @@
 ---
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-> cor(d$x2, d$y)^2
+> cor(df$x2, df$y)^2
 [1] 0.4793
 > summary(lm.y.x2)$r.squared
@@ Line 362: / Line 361: @@
 >
 >
-> lm.y.x1x2 <- lm(y ~ x1+x2, data=d)
+> lm.y.x1x2 <- lm(y ~ x1+x2, data=df)
 > summary(lm.y.x1x2)
 Call:
-lm(formula = y ~ x1 + x2, data = d)
+lm(formula = y ~ x1 + x2, data = df)
 Residuals:
@@ Line 407: / Line 406: @@
 (Intercept)          x1          x2
 .39910     0.01184    -0.54473
-> # y.hat = 6.399103 + (0.01184145)*x1 + (−0.54472725)*x2
+> # y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2
 > a <- lm.y.x1x2$coefficient[1]
 > b1 <- lm.y.x1x2$coefficient[2]
@@ Line 421: / Line 420: @@
 -0.5447
 >
-> y.pred <- a + (b1 * d$x1) + (b2 * d$x2)
+> y.pred <- a + (b1 * df$x1) + (b2 * df$x2)
 > y.pred
  [1]  6.281  5.381  7.844  6.588  9.217 10.023  7.252  9.809  9.644  7.962
@@ Line 430: / Line 429: @@
 TRUE TRUE TRUE TRUE TRUE TRUE
 >
-> y.real <- d$y
+> y.real <- df$y
 > y.real
  [1]  6  5  7  7  8 10  8 11  9  9
-> y.mean <- mean(d$y)
+> y.mean <- mean(df$y)
 > y.mean
 [1] 8
 >
-> deviation.score <- d$y - y.mean
+> deviation.score <- df$y - y.mean
 > ds <- deviation.score
 > res <- y.real - y.pred
@@ Line 445: / Line 444: @@
 > # remember y is sum of res + reg + y.mean
 > y2 <- res + reg + y.mean
-> d$y==y2
+> df$y==y2
  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 >
@@ Line 452: / Line 451: @@
 > ss.reg <- sum(reg^2)
 >
-> ss.tot2 <- var(d$y) * (length(d$y)-1)
+> ss.tot2 <- var(df$y) * (length(df$y)-1)
 > ss.tot
 [1] 30
@@ Line 465: / Line 464: @@
 >
 > k <- 3 # # of parameters a, b1, b2
-> df.tot <- length(d$y)-1
+> df.tot <- length(df$y)-1
 > df.reg <- k - 1
 > df.res <- df.tot - df.reg
@@ Line 486: / Line 485: @@
 Call:
-lm(formula = y ~ x1 + x2, data = d)
+lm(formula = y ~ x1 + x2, data = df)
 Residuals:
@@ Line 515: / Line 514: @@
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 >
-> summary(lm(y~x2+x1, data=d))
+> summary(lm(y~x2+x1, data=df))
 Call:
-lm(formula = y ~ x2 + x1, data = d)
+lm(formula = y ~ x2 + x1, data = df)
 Residuals:
@@ Line 536: / Line 535: @@
 F-statistic: 13.8 on 2 and 7 DF,  p-value: 0.0037
-> anova(lm(y~x2+x1, data=d))
+> anova(lm(y~x2+x1, data=df))
 Analysis of Variance Table
@@ Line 571: / Line 570: @@
 > # beta coefficient (standardized b)
 > # beta <- b * (sd(x)/sd(y))
-> beta1 <- b1 * (sd(d$x1)/sd(d$y))
+> beta1 <- b1 * (sd(df$x1)/sd(df$y))
-> beta2 <- b2 * (sd(d$x2)/sd(d$y))
+> beta2 <- b2 * (sd(df$x2)/sd(df$y))
 > beta1
     x1
@@ Line 582: / Line 581: @@
 > # install.packages("lm.beta")
 > library(lm.beta)
-경고메시지(들):
-패키지 ‘lm.beta’는 R 버전 4.3.3에서 작성되었습니다
 > lm.beta(lm.y.x1x2)
 Call:
-lm(formula = y ~ x1 + x2, data = d)
+lm(formula = y ~ x1 + x2, data = df)
 Standardized Coefficients::
@@ Line 599: / Line 596: @@
 > # understand with diagrams first
 > # then calculate with r
-> lm.tmp.1 <- lm(x2~x1, data=d)
+> lm.tmp.1 <- lm(x2~x1, data=df)
 > res.x2.x1 <- lm.tmp.1$residuals
 >
-> lm.tmp.2 <- lm(y~x1, data=d)
+> lm.tmp.2 <- lm(y~x1, data=df)
 > res.y.x1 <- lm.tmp.2$residuals
 >
-> lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d)
+> lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df)
 > summary(lm.tmp.3)
 Call:
-lm(formula = res.y.x1 ~ res.x2.x1, data = d)
+lm(formula = res.y.x1 ~ res.x2.x1, data = df)
 Residuals:
@@ Line 632: / Line 629: @@
 > # install.packages("ppcor")
 > library(ppcor)
-> partial.r <- pcor.test(d$y, d$x2, d$x1)
+> partial.r <- pcor.test(df$y, df$x2, df$x1)
 > str(partial.r)
 'data.frame':	1 obs. of  6 variables:
@@ Line 646: / Line 643: @@
 Call:
-lm(formula = res.y.x1 ~ res.x2.x1, data = d)
+lm(formula = res.y.x1 ~ res.x2.x1, data = df)
 Residuals:
@@ Line 670: / Line 667: @@
 >
 > # x1's own explanation?
-> lm.tmp.4 <- lm(x1~x2, data=d)
+> lm.tmp.4 <- lm(x1~x2, data=df)
 > res.x1.x2 <- lm.tmp.4$residuals
 >
-> lm.tmp.5 <- lm(y~x2, data=d)
+> lm.tmp.5 <- lm(y~x2, data=df)
 > res.y.x2 <- lm.tmp.5$residuals
 >
-> lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d)
+> lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df)
 > summary(lm.tmp.6)
 Call:
-lm(formula = res.y.x2 ~ res.x1.x2, data = d)
+lm(formula = res.y.x2 ~ res.x1.x2, data = df)
 Residuals:
@@ Line 698: / Line 695: @@
 >
-> partial.r <- pcor.test(d$y, d$x1, d$x2)
+> partial.r <- pcor.test(df$y, df$x1, df$x2)
 > str(partial.r)
 'data.frame':	1 obs. of  6 variables:
@@ Line 718: / Line 715: @@
 > # semipartial correlation coefficient and spr2
 > #
-> spr.y.x2.x1 <- spcor.test(d$y,d$x2,d$x1)
+> spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1)
-> spr.y.x1.x2 <- spcor.test(d$y,d$x1,d$x2)
+> spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2)
 > spr.y.x2.x1
   estimate p.value statistic  n gp  Method
@@ Line 733: / Line 730: @@
 [1] 0.3189
 >
-> lm.tmp.7 <- lm(y ~ res.x2.x1, data = d)
+> lm.tmp.7 <- lm(y ~ res.x2.x1, data=df)
 > summary(lm.tmp.7)
 Call:
-lm(formula = y ~ res.x2.x1, data = d)
+lm(formula = y ~ res.x2.x1, data = df)
 Residuals:
@@ Line 757: / Line 754: @@
 [1] 0.167
 >
-> lm.tmp.8 <- lm(y~res.x1.x2, data = d)
+> lm.tmp.8 <- lm(y~res.x1.x2, data=df)
 > summary(lm.tmp.8)
 Call:
-lm(formula = y ~ res.x1.x2, data = d)
+lm(formula = y ~ res.x1.x2, data = df)
 Residuals:
@@ Line 795: / Line 792: @@
 Call:
-lm(formula = y ~ x2, data = d)
+lm(formula = y ~ x2, data = df)
 Residuals:
@@ Line 825: / Line 822: @@
 Call:
-lm(formula = y ~ x1, data = d)
+lm(formula = y ~ x1, data = df)
 Residuals:
@@ Line 855: / Line 852: @@
 Call:
-lm(formula = y ~ x1 + x2, data = d)
+lm(formula = y ~ x1 + x2, data = df)
 Residuals:
@@ Line 918: / Line 915: @@
 > #############################################
 >
+>
+</code>
+====== explanation. added ======
+{{:c:ms:2025:schedule:pasted:20250609-074413.png?400}}
+<code>
+# ex.
+# resid(lm(y~x1, data=df)) = bc / delta.y
+# resid(lm(y~x2, data=df)) = cd / delta.y
+# resid(lm(y~x1+x2, data=df)) = bcd / delta.y
+# b / delta.y = ?
+# ce / delta.x2 = ?
+</code>
+<code>
+# exp.added
+spcor.test(df$y, df$x1, df$x2)
+spcor.test(df$y, df$x1, df$x2)$estimate
+spcor.test(df$y, df$x1, df$x2)$estimate^2
+spcor.test(df$y, df$x2, df$x1)$estimate^2
+summary(lm(y~x1+x2, data=df))$r.square
+b <- spcor.test(df$y, df$x1, df$x2)$estimate^2
+d <- spcor.test(df$y, df$x2, df$x1)$estimate^2
+bcd <- summary(lm(y~x1+x2, data=df))$r.square
+summary(lm(df$y~df$x1+df$x2, data=df))$r.square -
+  (spcor.test(df$y, df$x1, df$x2)$estimate^2 +
+     spcor.test(df$y, df$x2, df$x1)$estimate^2)
+bcd - (b + d)
 </code>