User Tools

Site Tools


c:ms:2025:schedule:w13.lecture.note

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
c:ms:2025:schedule:w13.lecture.note [2025/06/09 08:08] – [explanation. added] hkimscilc:ms:2025:schedule:w13.lecture.note [2025/06/09 08:51] (current) – [output] hkimscil
Line 5: Line 5:
 # #
 rm(list=ls()) rm(list=ls())
-<- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")  +df <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")  
-d+df
  
-colnames(d) <- c("y", "x1", "x2"+colnames(df) <- c("y", "x1", "x2"
-d+df
 # y = 통장갯수 # y = 통장갯수
 # x1 = 인컴 # x1 = 인컴
 # x2 = 부양가족수  # x2 = 부양가족수 
-lm.y.x1 <- lm(y ~ x1, data=d)+lm.y.x1 <- lm(y ~ x1, data=df)
 summary(lm.y.x1) summary(lm.y.x1)
 anova(lm.y.x1) anova(lm.y.x1)
-cor(d$x1, d$y)^2+cor(df$x1, df$y)^2
 summary(lm.y.x1)$r.squared summary(lm.y.x1)$r.squared
  
  
-lm.y.x2 <- lm(y ~ x2, data=d)+lm.y.x2 <- lm(y ~ x2, data=df)
 summary(lm.y.x2) summary(lm.y.x2)
 anova(lm.y.x2) anova(lm.y.x2)
-cor(d$x2, d$y)^2+cor(df$x2, df$y)^2
 summary(lm.y.x2)$r.squared summary(lm.y.x2)$r.squared
  
  
-lm.y.x1x2 <- lm(y ~ x1+x2, data=d)+lm.y.x1x2 <- lm(y ~ x1+x2, data=df)
 summary(lm.y.x1x2) summary(lm.y.x1x2)
 anova(lm.y.x1x2) anova(lm.y.x1x2)
Line 38: Line 38:
  
 lm.y.x1x2$coefficient lm.y.x1x2$coefficient
-# y.hat = 6.399103 + (0.01184145)*x1 + (0.54472725)*x2 +# y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2 
 a <- lm.y.x1x2$coefficient[1] a <- lm.y.x1x2$coefficient[1]
 b1 <- lm.y.x1x2$coefficient[2] b1 <- lm.y.x1x2$coefficient[2]
Line 46: Line 46:
 b2  b2 
  
-y.pred <- a + (b1 * d$x1) + (b2 * d$x2)+y.pred <- a + (b1 * df$x1) + (b2 * df$x2)
 y.pred  y.pred 
 # or  # or 
Line 52: Line 52:
 head(y.pred == y.pred2) head(y.pred == y.pred2)
  
-y.real <- d$y+y.real <- df$y
 y.real y.real
-y.mean <- mean(d$y)+y.mean <- mean(df$y)
 y.mean  y.mean 
  
-deviation.score <- d$y - y.mean+deviation.score <- df$y - y.mean
 ds <- deviation.score ds <- deviation.score
 res <- y.real - y.pred res <- y.real - y.pred
Line 64: Line 64:
 # remember y is sum of res + reg + y.mean # remember y is sum of res + reg + y.mean
 y2 <- res + reg + y.mean y2 <- res + reg + y.mean
-d$y==y2+df$y==y2
  
 ss.tot <- sum(ds^2) ss.tot <- sum(ds^2)
Line 70: Line 70:
 ss.reg <- sum(reg^2) ss.reg <- sum(reg^2)
  
-ss.tot2 <- var(d$y) * (length(d$y)-1)+ss.tot2 <- var(df$y) * (length(df$y)-1)
 ss.tot ss.tot
 ss.tot2 ss.tot2
Line 78: Line 78:
  
 k <- 3 # # of parameters a, b1, b2 k <- 3 # # of parameters a, b1, b2
-df.tot <- length(y)-1+df.tot <- length(df$y)-1
 df.reg <- k - 1 df.reg <- k - 1
 df.res <- df.tot - df.reg df.res <- df.tot - df.reg
Line 95: Line 95:
 anova(lm.y.x1x2) anova(lm.y.x1x2)
  
-summary(lm(y~x2+x1, data=d)) +summary(lm(y~x2+x1, data=df)) 
-anova(lm(y~x2+x1, data=d))+anova(lm(y~x2+x1, data=df))
  
 # note on 2 t-tests in summary # note on 2 t-tests in summary
Line 122: Line 122:
 # beta coefficient (standardized b) # beta coefficient (standardized b)
 # beta <- b * (sd(x)/sd(y)) # beta <- b * (sd(x)/sd(y))
-beta1 <- b1 * (sd(x1)/sd(y)) +beta1 <- b1 * (sd(df$x1)/sd(df$y)) 
-beta2 <- b2 * (sd(x2)/sd(y))+beta2 <- b2 * (sd(df$x2)/sd(df$y))
 beta1 beta1
 beta2 beta2
Line 136: Line 136:
 # understand with diagrams first # understand with diagrams first
 # then calculate with r # then calculate with r
-lm.tmp.1 <- lm(x2~x1, data=d)+lm.tmp.1 <- lm(x2~x1, data=df)
 res.x2.x1 <- lm.tmp.1$residuals res.x2.x1 <- lm.tmp.1$residuals
  
-lm.tmp.2 <- lm(y~x1, data=d)+lm.tmp.2 <- lm(y~x1, data=df)
 res.y.x1 <- lm.tmp.2$residuals res.y.x1 <- lm.tmp.2$residuals
  
-lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d)+lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df)
 summary(lm.tmp.3) summary(lm.tmp.3)
 summary(lm.tmp.3)$r.squared summary(lm.tmp.3)$r.squared
Line 148: Line 148:
 # install.packages("ppcor") # install.packages("ppcor")
 library(ppcor) library(ppcor)
-partial.r <- pcor.test(d$y, d$x2, d$x1)+partial.r <- pcor.test(df$y, df$x2, df$x1)
 str(partial.r) str(partial.r)
 partial.r$estimate partial.r$estimate
Line 157: Line 157:
  
 # x1's own explanation? # x1's own explanation?
-lm.tmp.4 <- lm(x1~x2, data=d)+lm.tmp.4 <- lm(x1~x2, data=df)
 res.x1.x2 <- lm.tmp.4$residuals res.x1.x2 <- lm.tmp.4$residuals
  
-lm.tmp.5 <- lm(y~x2, data=d)+lm.tmp.5 <- lm(y~x2, data=df)
 res.y.x2 <- lm.tmp.5$residuals res.y.x2 <- lm.tmp.5$residuals
  
-lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d)+lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df)
 summary(lm.tmp.6) summary(lm.tmp.6)
  
-partial.r <- pcor.test(d$y, d$x1, d$x2)+partial.r <- pcor.test(df$y, df$x1, df$x2)
 str(partial.r) str(partial.r)
 partial.r$estimate # this is partial correlation, not pr^2 partial.r$estimate # this is partial correlation, not pr^2
Line 176: Line 176:
 # semipartial correlation coefficient and spr2 # semipartial correlation coefficient and spr2
 # #
-spr.y.x2.x1 <- spcor.test(d$y,d$x2,d$x1) +spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1) 
-spr.y.x1.x2 <- spcor.test(d$y,d$x1,d$x2)+spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2)
 spr.y.x2.x1 spr.y.x2.x1
 spr.y.x1.x2 spr.y.x1.x2
Line 185: Line 185:
 spr2.y.x1.x2  spr2.y.x1.x2 
  
-lm.tmp.7 <- lm(y ~ res.x2.x1, data = d)+lm.tmp.7 <- lm(y ~ res.x2.x1, data=df)
 summary(lm.tmp.7) summary(lm.tmp.7)
 spr2.y.x2.x1  spr2.y.x2.x1 
  
-lm.tmp.8 <- lm(y~res.x1.x2, data = d)+lm.tmp.8 <- lm(y~res.x1.x2, data=df)
 summary(lm.tmp.8) summary(lm.tmp.8)
 spr2.y.x1.x2  spr2.y.x1.x2 
Line 250: Line 250:
 # y). # y).
 ############################################# #############################################
- 
  
  
Line 260: Line 259:
 > # > #
 > rm(list=ls()) > rm(list=ls())
-<- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")  +df <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")  
-d+df
    bankaccount income famnum    bankaccount income famnum
 1            6    220      5 1            6    220      5
Line 274: Line 273:
 10              270      3 10              270      3
  
-> colnames(d) <- c("y", "x1", "x2"+> colnames(df) <- c("y", "x1", "x2"
-d+df
     y  x1 x2     y  x1 x2
 1   6 220  5 1   6 220  5
Line 290: Line 289:
 > # x1 = 인컴 > # x1 = 인컴
 > # x2 = 부양가족수  > # x2 = 부양가족수 
-> lm.y.x1 <- lm(y ~ x1, data=d)+> lm.y.x1 <- lm(y ~ x1, data=df)
 > summary(lm.y.x1) > summary(lm.y.x1)
  
 Call: Call:
-lm(formula = y ~ x1, data = d)+lm(formula = y ~ x1, data = df)
  
 Residuals: Residuals:
Line 320: Line 319:
 --- ---
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-> cor(d$x1, d$y)^2+> cor(df$x1, df$y)^2
 [1] 0.6311 [1] 0.6311
 > summary(lm.y.x1)$r.squared > summary(lm.y.x1)$r.squared
Line 326: Line 325:
  
  
-> lm.y.x2 <- lm(y ~ x2, data=d)+> lm.y.x2 <- lm(y ~ x2, data=df)
 > summary(lm.y.x2) > summary(lm.y.x2)
  
 Call: Call:
-lm(formula = y ~ x2, data = d)+lm(formula = y ~ x2, data = df)
  
 Residuals: Residuals:
Line 356: Line 355:
 --- ---
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-> cor(d$x2, d$y)^2+> cor(df$x2, df$y)^2
 [1] 0.4793 [1] 0.4793
 > summary(lm.y.x2)$r.squared > summary(lm.y.x2)$r.squared
Line 362: Line 361:
  
  
-> lm.y.x1x2 <- lm(y ~ x1+x2, data=d)+> lm.y.x1x2 <- lm(y ~ x1+x2, data=df)
 > summary(lm.y.x1x2) > summary(lm.y.x1x2)
  
 Call: Call:
-lm(formula = y ~ x1 + x2, data = d)+lm(formula = y ~ x1 + x2, data = df)
  
 Residuals: Residuals:
Line 407: Line 406:
 (Intercept)          x1          x2  (Intercept)          x1          x2 
     6.39910     0.01184    -0.54473      6.39910     0.01184    -0.54473 
-> # y.hat = 6.399103 + (0.01184145)*x1 + (0.54472725)*x2 +> # y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2 
 > a <- lm.y.x1x2$coefficient[1] > a <- lm.y.x1x2$coefficient[1]
 > b1 <- lm.y.x1x2$coefficient[2] > b1 <- lm.y.x1x2$coefficient[2]
Line 421: Line 420:
 -0.5447  -0.5447 
  
-> y.pred <- a + (b1 * d$x1) + (b2 * d$x2)+> y.pred <- a + (b1 * df$x1) + (b2 * df$x2)
 > y.pred  > y.pred 
  [1]  6.281  5.381  7.844  6.588  9.217 10.023  7.252  9.809  9.644  7.962  [1]  6.281  5.381  7.844  6.588  9.217 10.023  7.252  9.809  9.644  7.962
Line 430: Line 429:
 TRUE TRUE TRUE TRUE TRUE TRUE  TRUE TRUE TRUE TRUE TRUE TRUE 
  
-> y.real <- d$y+> y.real <- df$y
 > y.real > y.real
  [1]  6  5  7  7  8 10  8 11  9  9  [1]  6  5  7  7  8 10  8 11  9  9
-> y.mean <- mean(d$y)+> y.mean <- mean(df$y)
 > y.mean  > y.mean 
 [1] 8 [1] 8
  
-> deviation.score <- d$y - y.mean+> deviation.score <- df$y - y.mean
 > ds <- deviation.score > ds <- deviation.score
 > res <- y.real - y.pred > res <- y.real - y.pred
Line 445: Line 444:
 > # remember y is sum of res + reg + y.mean > # remember y is sum of res + reg + y.mean
 > y2 <- res + reg + y.mean > y2 <- res + reg + y.mean
-d$y==y2+df$y==y2
  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
  
Line 452: Line 451:
 > ss.reg <- sum(reg^2) > ss.reg <- sum(reg^2)
  
-> ss.tot2 <- var(d$y) * (length(d$y)-1)+> ss.tot2 <- var(df$y) * (length(df$y)-1)
 > ss.tot > ss.tot
 [1] 30 [1] 30
Line 465: Line 464:
  
 > k <- 3 # # of parameters a, b1, b2 > k <- 3 # # of parameters a, b1, b2
-> df.tot <- length(d$y)-1+> df.tot <- length(df$y)-1
 > df.reg <- k - 1 > df.reg <- k - 1
 > df.res <- df.tot - df.reg > df.res <- df.tot - df.reg
Line 486: Line 485:
  
 Call: Call:
-lm(formula = y ~ x1 + x2, data = d)+lm(formula = y ~ x1 + x2, data = df)
  
 Residuals: Residuals:
Line 515: Line 514:
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
  
-> summary(lm(y~x2+x1, data=d))+> summary(lm(y~x2+x1, data=df))
  
 Call: Call:
-lm(formula = y ~ x2 + x1, data = d)+lm(formula = y ~ x2 + x1, data = df)
  
 Residuals: Residuals:
Line 536: Line 535:
 F-statistic: 13.8 on 2 and 7 DF,  p-value: 0.0037 F-statistic: 13.8 on 2 and 7 DF,  p-value: 0.0037
  
-> anova(lm(y~x2+x1, data=d))+> anova(lm(y~x2+x1, data=df))
 Analysis of Variance Table Analysis of Variance Table
  
Line 571: Line 570:
 > # beta coefficient (standardized b) > # beta coefficient (standardized b)
 > # beta <- b * (sd(x)/sd(y)) > # beta <- b * (sd(x)/sd(y))
-> beta1 <- b1 * (sd(d$x1)/sd(d$y)) +> beta1 <- b1 * (sd(df$x1)/sd(df$y)) 
-> beta2 <- b2 * (sd(d$x2)/sd(d$y))+> beta2 <- b2 * (sd(df$x2)/sd(df$y))
 > beta1 > beta1
     x1      x1 
Line 582: Line 581:
 > # install.packages("lm.beta") > # install.packages("lm.beta")
 > library(lm.beta) > library(lm.beta)
-경고메시지(들):  
-패키지 ‘lm.beta’는 R 버전 4.3.3에서 작성되었습니다  
 > lm.beta(lm.y.x1x2) > lm.beta(lm.y.x1x2)
  
 Call: Call:
-lm(formula = y ~ x1 + x2, data = d)+lm(formula = y ~ x1 + x2, data = df)
  
 Standardized Coefficients:: Standardized Coefficients::
Line 599: Line 596:
 > # understand with diagrams first > # understand with diagrams first
 > # then calculate with r > # then calculate with r
-> lm.tmp.1 <- lm(x2~x1, data=d)+> lm.tmp.1 <- lm(x2~x1, data=df)
 > res.x2.x1 <- lm.tmp.1$residuals > res.x2.x1 <- lm.tmp.1$residuals
  
-> lm.tmp.2 <- lm(y~x1, data=d)+> lm.tmp.2 <- lm(y~x1, data=df)
 > res.y.x1 <- lm.tmp.2$residuals > res.y.x1 <- lm.tmp.2$residuals
  
-> lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d)+> lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df)
 > summary(lm.tmp.3) > summary(lm.tmp.3)
  
 Call: Call:
-lm(formula = res.y.x1 ~ res.x2.x1, data = d)+lm(formula = res.y.x1 ~ res.x2.x1, data = df)
  
 Residuals: Residuals:
Line 632: Line 629:
 > # install.packages("ppcor") > # install.packages("ppcor")
 > library(ppcor) > library(ppcor)
-> partial.r <- pcor.test(d$y, d$x2, d$x1)+> partial.r <- pcor.test(df$y, df$x2, df$x1)
 > str(partial.r) > str(partial.r)
 'data.frame': 1 obs. of  6 variables: 'data.frame': 1 obs. of  6 variables:
Line 646: Line 643:
  
 Call: Call:
-lm(formula = res.y.x1 ~ res.x2.x1, data = d)+lm(formula = res.y.x1 ~ res.x2.x1, data = df)
  
 Residuals: Residuals:
Line 670: Line 667:
  
 > # x1's own explanation? > # x1's own explanation?
-> lm.tmp.4 <- lm(x1~x2, data=d)+> lm.tmp.4 <- lm(x1~x2, data=df)
 > res.x1.x2 <- lm.tmp.4$residuals > res.x1.x2 <- lm.tmp.4$residuals
  
-> lm.tmp.5 <- lm(y~x2, data=d)+> lm.tmp.5 <- lm(y~x2, data=df)
 > res.y.x2 <- lm.tmp.5$residuals > res.y.x2 <- lm.tmp.5$residuals
  
-> lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d)+> lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df)
 > summary(lm.tmp.6) > summary(lm.tmp.6)
  
 Call: Call:
-lm(formula = res.y.x2 ~ res.x1.x2, data = d)+lm(formula = res.y.x2 ~ res.x1.x2, data = df)
  
 Residuals: Residuals:
Line 698: Line 695:
  
  
-> partial.r <- pcor.test(d$y, d$x1, d$x2)+> partial.r <- pcor.test(df$y, df$x1, df$x2)
 > str(partial.r) > str(partial.r)
 'data.frame': 1 obs. of  6 variables: 'data.frame': 1 obs. of  6 variables:
Line 718: Line 715:
 > # semipartial correlation coefficient and spr2 > # semipartial correlation coefficient and spr2
 > # > #
-> spr.y.x2.x1 <- spcor.test(d$y,d$x2,d$x1) +> spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1) 
-> spr.y.x1.x2 <- spcor.test(d$y,d$x1,d$x2)+> spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2)
 > spr.y.x2.x1 > spr.y.x2.x1
   estimate p.value statistic  n gp  Method   estimate p.value statistic  n gp  Method
Line 733: Line 730:
 [1] 0.3189 [1] 0.3189
  
-> lm.tmp.7 <- lm(y ~ res.x2.x1, data = d)+> lm.tmp.7 <- lm(y ~ res.x2.x1, data=df)
 > summary(lm.tmp.7) > summary(lm.tmp.7)
  
 Call: Call:
-lm(formula = y ~ res.x2.x1, data = d)+lm(formula = y ~ res.x2.x1, data = df)
  
 Residuals: Residuals:
Line 757: Line 754:
 [1] 0.167 [1] 0.167
  
-> lm.tmp.8 <- lm(y~res.x1.x2, data = d)+> lm.tmp.8 <- lm(y~res.x1.x2, data=df)
 > summary(lm.tmp.8) > summary(lm.tmp.8)
  
 Call: Call:
-lm(formula = y ~ res.x1.x2, data = d)+lm(formula = y ~ res.x1.x2, data = df)
  
 Residuals: Residuals:
Line 795: Line 792:
  
 Call: Call:
-lm(formula = y ~ x2, data = d)+lm(formula = y ~ x2, data = df)
  
 Residuals: Residuals:
Line 825: Line 822:
  
 Call: Call:
-lm(formula = y ~ x1, data = d)+lm(formula = y ~ x1, data = df)
  
 Residuals: Residuals:
Line 855: Line 852:
  
 Call: Call:
-lm(formula = y ~ x1 + x2, data = d)+lm(formula = y ~ x1 + x2, data = df)
  
 Residuals: Residuals:
Line 917: Line 914:
 > # y). > # y).
 > ############################################# > #############################################
 +
  
 </code> </code>
c/ms/2025/schedule/w13.lecture.note.1749424134.txt.gz · Last modified: 2025/06/09 08:08 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki