User Tools

Site Tools


c:ms:2025:schedule:w13.lecture.note

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
c:ms:2025:schedule:w13.lecture.note [2025/06/09 08:51] – [MR] hkimscilc:ms:2025:schedule:w13.lecture.note [2025/06/09 08:51] (current) – [output] hkimscil
Line 259: Line 259:
 > # > #
 > rm(list=ls()) > rm(list=ls())
-<- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")  +df <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")  
-d+df
    bankaccount income famnum    bankaccount income famnum
 1            6    220      5 1            6    220      5
Line 273: Line 273:
 10              270      3 10              270      3
  
-> colnames(d) <- c("y", "x1", "x2"+> colnames(df) <- c("y", "x1", "x2"
-d+df
     y  x1 x2     y  x1 x2
 1   6 220  5 1   6 220  5
Line 289: Line 289:
 > # x1 = 인컴 > # x1 = 인컴
 > # x2 = 부양가족수  > # x2 = 부양가족수 
-> lm.y.x1 <- lm(y ~ x1, data=d)+> lm.y.x1 <- lm(y ~ x1, data=df)
 > summary(lm.y.x1) > summary(lm.y.x1)
  
 Call: Call:
-lm(formula = y ~ x1, data = d)+lm(formula = y ~ x1, data = df)
  
 Residuals: Residuals:
Line 319: Line 319:
 --- ---
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-> cor(d$x1, d$y)^2+> cor(df$x1, df$y)^2
 [1] 0.6311 [1] 0.6311
 > summary(lm.y.x1)$r.squared > summary(lm.y.x1)$r.squared
Line 325: Line 325:
  
  
-> lm.y.x2 <- lm(y ~ x2, data=d)+> lm.y.x2 <- lm(y ~ x2, data=df)
 > summary(lm.y.x2) > summary(lm.y.x2)
  
 Call: Call:
-lm(formula = y ~ x2, data = d)+lm(formula = y ~ x2, data = df)
  
 Residuals: Residuals:
Line 355: Line 355:
 --- ---
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-> cor(d$x2, d$y)^2+> cor(df$x2, df$y)^2
 [1] 0.4793 [1] 0.4793
 > summary(lm.y.x2)$r.squared > summary(lm.y.x2)$r.squared
Line 361: Line 361:
  
  
-> lm.y.x1x2 <- lm(y ~ x1+x2, data=d)+> lm.y.x1x2 <- lm(y ~ x1+x2, data=df)
 > summary(lm.y.x1x2) > summary(lm.y.x1x2)
  
 Call: Call:
-lm(formula = y ~ x1 + x2, data = d)+lm(formula = y ~ x1 + x2, data = df)
  
 Residuals: Residuals:
Line 406: Line 406:
 (Intercept)          x1          x2  (Intercept)          x1          x2 
     6.39910     0.01184    -0.54473      6.39910     0.01184    -0.54473 
-> # y.hat = 6.399103 + (0.01184145)*x1 + (0.54472725)*x2 +> # y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2 
 > a <- lm.y.x1x2$coefficient[1] > a <- lm.y.x1x2$coefficient[1]
 > b1 <- lm.y.x1x2$coefficient[2] > b1 <- lm.y.x1x2$coefficient[2]
Line 420: Line 420:
 -0.5447  -0.5447 
  
-> y.pred <- a + (b1 * d$x1) + (b2 * d$x2)+> y.pred <- a + (b1 * df$x1) + (b2 * df$x2)
 > y.pred  > y.pred 
  [1]  6.281  5.381  7.844  6.588  9.217 10.023  7.252  9.809  9.644  7.962  [1]  6.281  5.381  7.844  6.588  9.217 10.023  7.252  9.809  9.644  7.962
Line 429: Line 429:
 TRUE TRUE TRUE TRUE TRUE TRUE  TRUE TRUE TRUE TRUE TRUE TRUE 
  
-> y.real <- d$y+> y.real <- df$y
 > y.real > y.real
  [1]  6  5  7  7  8 10  8 11  9  9  [1]  6  5  7  7  8 10  8 11  9  9
-> y.mean <- mean(d$y)+> y.mean <- mean(df$y)
 > y.mean  > y.mean 
 [1] 8 [1] 8
  
-> deviation.score <- d$y - y.mean+> deviation.score <- df$y - y.mean
 > ds <- deviation.score > ds <- deviation.score
 > res <- y.real - y.pred > res <- y.real - y.pred
Line 444: Line 444:
 > # remember y is sum of res + reg + y.mean > # remember y is sum of res + reg + y.mean
 > y2 <- res + reg + y.mean > y2 <- res + reg + y.mean
-d$y==y2+df$y==y2
  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
  
Line 451: Line 451:
 > ss.reg <- sum(reg^2) > ss.reg <- sum(reg^2)
  
-> ss.tot2 <- var(d$y) * (length(d$y)-1)+> ss.tot2 <- var(df$y) * (length(df$y)-1)
 > ss.tot > ss.tot
 [1] 30 [1] 30
Line 464: Line 464:
  
 > k <- 3 # # of parameters a, b1, b2 > k <- 3 # # of parameters a, b1, b2
-> df.tot <- length(d$y)-1+> df.tot <- length(df$y)-1
 > df.reg <- k - 1 > df.reg <- k - 1
 > df.res <- df.tot - df.reg > df.res <- df.tot - df.reg
Line 485: Line 485:
  
 Call: Call:
-lm(formula = y ~ x1 + x2, data = d)+lm(formula = y ~ x1 + x2, data = df)
  
 Residuals: Residuals:
Line 514: Line 514:
 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
  
-> summary(lm(y~x2+x1, data=d))+> summary(lm(y~x2+x1, data=df))
  
 Call: Call:
-lm(formula = y ~ x2 + x1, data = d)+lm(formula = y ~ x2 + x1, data = df)
  
 Residuals: Residuals:
Line 535: Line 535:
 F-statistic: 13.8 on 2 and 7 DF,  p-value: 0.0037 F-statistic: 13.8 on 2 and 7 DF,  p-value: 0.0037
  
-> anova(lm(y~x2+x1, data=d))+> anova(lm(y~x2+x1, data=df))
 Analysis of Variance Table Analysis of Variance Table
  
Line 570: Line 570:
 > # beta coefficient (standardized b) > # beta coefficient (standardized b)
 > # beta <- b * (sd(x)/sd(y)) > # beta <- b * (sd(x)/sd(y))
-> beta1 <- b1 * (sd(d$x1)/sd(d$y)) +> beta1 <- b1 * (sd(df$x1)/sd(df$y)) 
-> beta2 <- b2 * (sd(d$x2)/sd(d$y))+> beta2 <- b2 * (sd(df$x2)/sd(df$y))
 > beta1 > beta1
     x1      x1 
Line 581: Line 581:
 > # install.packages("lm.beta") > # install.packages("lm.beta")
 > library(lm.beta) > library(lm.beta)
-경고메시지(들):  
-패키지 ‘lm.beta’는 R 버전 4.3.3에서 작성되었습니다  
 > lm.beta(lm.y.x1x2) > lm.beta(lm.y.x1x2)
  
 Call: Call:
-lm(formula = y ~ x1 + x2, data = d)+lm(formula = y ~ x1 + x2, data = df)
  
 Standardized Coefficients:: Standardized Coefficients::
Line 598: Line 596:
 > # understand with diagrams first > # understand with diagrams first
 > # then calculate with r > # then calculate with r
-> lm.tmp.1 <- lm(x2~x1, data=d)+> lm.tmp.1 <- lm(x2~x1, data=df)
 > res.x2.x1 <- lm.tmp.1$residuals > res.x2.x1 <- lm.tmp.1$residuals
  
-> lm.tmp.2 <- lm(y~x1, data=d)+> lm.tmp.2 <- lm(y~x1, data=df)
 > res.y.x1 <- lm.tmp.2$residuals > res.y.x1 <- lm.tmp.2$residuals
  
-> lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d)+> lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df)
 > summary(lm.tmp.3) > summary(lm.tmp.3)
  
 Call: Call:
-lm(formula = res.y.x1 ~ res.x2.x1, data = d)+lm(formula = res.y.x1 ~ res.x2.x1, data = df)
  
 Residuals: Residuals:
Line 631: Line 629:
 > # install.packages("ppcor") > # install.packages("ppcor")
 > library(ppcor) > library(ppcor)
-> partial.r <- pcor.test(d$y, d$x2, d$x1)+> partial.r <- pcor.test(df$y, df$x2, df$x1)
 > str(partial.r) > str(partial.r)
 'data.frame': 1 obs. of  6 variables: 'data.frame': 1 obs. of  6 variables:
Line 645: Line 643:
  
 Call: Call:
-lm(formula = res.y.x1 ~ res.x2.x1, data = d)+lm(formula = res.y.x1 ~ res.x2.x1, data = df)
  
 Residuals: Residuals:
Line 669: Line 667:
  
 > # x1's own explanation? > # x1's own explanation?
-> lm.tmp.4 <- lm(x1~x2, data=d)+> lm.tmp.4 <- lm(x1~x2, data=df)
 > res.x1.x2 <- lm.tmp.4$residuals > res.x1.x2 <- lm.tmp.4$residuals
  
-> lm.tmp.5 <- lm(y~x2, data=d)+> lm.tmp.5 <- lm(y~x2, data=df)
 > res.y.x2 <- lm.tmp.5$residuals > res.y.x2 <- lm.tmp.5$residuals
  
-> lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d)+> lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df)
 > summary(lm.tmp.6) > summary(lm.tmp.6)
  
 Call: Call:
-lm(formula = res.y.x2 ~ res.x1.x2, data = d)+lm(formula = res.y.x2 ~ res.x1.x2, data = df)
  
 Residuals: Residuals:
Line 697: Line 695:
  
  
-> partial.r <- pcor.test(d$y, d$x1, d$x2)+> partial.r <- pcor.test(df$y, df$x1, df$x2)
 > str(partial.r) > str(partial.r)
 'data.frame': 1 obs. of  6 variables: 'data.frame': 1 obs. of  6 variables:
Line 717: Line 715:
 > # semipartial correlation coefficient and spr2 > # semipartial correlation coefficient and spr2
 > # > #
-> spr.y.x2.x1 <- spcor.test(d$y,d$x2,d$x1) +> spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1) 
-> spr.y.x1.x2 <- spcor.test(d$y,d$x1,d$x2)+> spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2)
 > spr.y.x2.x1 > spr.y.x2.x1
   estimate p.value statistic  n gp  Method   estimate p.value statistic  n gp  Method
Line 732: Line 730:
 [1] 0.3189 [1] 0.3189
  
-> lm.tmp.7 <- lm(y ~ res.x2.x1, data = d)+> lm.tmp.7 <- lm(y ~ res.x2.x1, data=df)
 > summary(lm.tmp.7) > summary(lm.tmp.7)
  
 Call: Call:
-lm(formula = y ~ res.x2.x1, data = d)+lm(formula = y ~ res.x2.x1, data = df)
  
 Residuals: Residuals:
Line 756: Line 754:
 [1] 0.167 [1] 0.167
  
-> lm.tmp.8 <- lm(y~res.x1.x2, data = d)+> lm.tmp.8 <- lm(y~res.x1.x2, data=df)
 > summary(lm.tmp.8) > summary(lm.tmp.8)
  
 Call: Call:
-lm(formula = y ~ res.x1.x2, data = d)+lm(formula = y ~ res.x1.x2, data = df)
  
 Residuals: Residuals:
Line 794: Line 792:
  
 Call: Call:
-lm(formula = y ~ x2, data = d)+lm(formula = y ~ x2, data = df)
  
 Residuals: Residuals:
Line 824: Line 822:
  
 Call: Call:
-lm(formula = y ~ x1, data = d)+lm(formula = y ~ x1, data = df)
  
 Residuals: Residuals:
Line 854: Line 852:
  
 Call: Call:
-lm(formula = y ~ x1 + x2, data = d)+lm(formula = y ~ x1 + x2, data = df)
  
 Residuals: Residuals:
Line 916: Line 914:
 > # y). > # y).
 > ############################################# > #############################################
 +
  
 </code> </code>
c/ms/2025/schedule/w13.lecture.note.txt · Last modified: 2025/06/09 08:51 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki