User Tools

Site Tools


c:ms:2025:schedule:w13.lecture.note

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
c:ms:2025:schedule:w13.lecture.note [2025/06/09 07:08] – [MR] hkimscilc:ms:2025:schedule:w13.lecture.note [2025/06/09 08:51] (current) – [output] hkimscil
Line 5: Line 5:
 # #
 rm(list=ls()) rm(list=ls())
-<- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")  +df <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")  
-d+df
  
-colnames(d) <- c("y", "x1", "x2"+colnames(df) <- c("y", "x1", "x2"
-d+df
 # y = 통장갯수 # y = 통장갯수
 # x1 = 인컴 # x1 = 인컴
 # x2 = 부양가족수  # x2 = 부양가족수 
-lm.y.x1 <- lm(y ~ x1, data=d)+lm.y.x1 <- lm(y ~ x1, data=df)
 summary(lm.y.x1) summary(lm.y.x1)
 anova(lm.y.x1) anova(lm.y.x1)
-cor(d$x1, d$y)^2+cor(df$x1, df$y)^2
 summary(lm.y.x1)$r.squared summary(lm.y.x1)$r.squared
  
  
-lm.y.x2 <- lm(y ~ x2, data=d)+lm.y.x2 <- lm(y ~ x2, data=df)
 summary(lm.y.x2) summary(lm.y.x2)
 anova(lm.y.x2) anova(lm.y.x2)
-cor(d$x2, d$y)^2+cor(df$x2, df$y)^2
 summary(lm.y.x2)$r.squared summary(lm.y.x2)$r.squared
  
  
-lm.y.x1x2 <- lm(y ~ x1+x2, data=d)+lm.y.x1x2 <- lm(y ~ x1+x2, data=df)
 summary(lm.y.x1x2) summary(lm.y.x1x2)
 anova(lm.y.x1x2) anova(lm.y.x1x2)
Line 38: Line 38:
  
 lm.y.x1x2$coefficient lm.y.x1x2$coefficient
-# y.hat = 6.399103 + (0.01184145)*x1 + (0.54472725)*x2 +# y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2 
 a <- lm.y.x1x2$coefficient[1] a <- lm.y.x1x2$coefficient[1]
 b1 <- lm.y.x1x2$coefficient[2] b1 <- lm.y.x1x2$coefficient[2]
Line 46: Line 46:
 b2  b2 
  
-y.pred <- a + (b1 * d$x1) + (b2 * d$x2)+y.pred <- a + (b1 * df$x1) + (b2 * df$x2)
 y.pred  y.pred 
 # or  # or 
Line 52: Line 52:
 head(y.pred == y.pred2) head(y.pred == y.pred2)
  
-y.real <- d$y+y.real <- df$y
 y.real y.real
-y.mean <- mean(d$y)+y.mean <- mean(df$y)
 y.mean  y.mean 
  
-deviation.score <- d$y - y.mean+deviation.score <- df$y - y.mean
 ds <- deviation.score ds <- deviation.score
 res <- y.real - y.pred res <- y.real - y.pred
Line 64: Line 64:
 # remember y is sum of res + reg + y.mean # remember y is sum of res + reg + y.mean
 y2 <- res + reg + y.mean y2 <- res + reg + y.mean
-d$y==y2+df$y==y2
  
 ss.tot <- sum(ds^2) ss.tot <- sum(ds^2)
Line 70: Line 70:
 ss.reg <- sum(reg^2) ss.reg <- sum(reg^2)
  
-ss.tot2 <- var(d$y) * (length(d$y)-1)+ss.tot2 <- var(df$y) * (length(df$y)-1)
 ss.tot ss.tot
 ss.tot2 ss.tot2
Line 78: Line 78:
  
 k <- 3 # # of parameters a, b1, b2 k <- 3 # # of parameters a, b1, b2
-df.tot <- length(y)-1+df.tot <- length(df$y)-1
 df.reg <- k - 1 df.reg <- k - 1
 df.res <- df.tot - df.reg df.res <- df.tot - df.reg
Line 95: Line 95:
 anova(lm.y.x1x2) anova(lm.y.x1x2)
  
-summary(lm(y~x2+x1, data=d)) +summary(lm(y~x2+x1, data=df)) 
-anova(lm(y~x2+x1, data=d))+anova(lm(y~x2+x1, data=df))
  
 # note on 2 t-tests in summary # note on 2 t-tests in summary
Line 122: Line 122:
 # beta coefficient (standardized b) # beta coefficient (standardized b)
 # beta <- b * (sd(x)/sd(y)) # beta <- b * (sd(x)/sd(y))
-beta1 <- b1 * (sd(x1)/sd(y)) +beta1 <- b1 * (sd(df$x1)/sd(df$y)) 
-beta2 <- b2 * (sd(x2)/sd(y))+beta2 <- b2 * (sd(df$x2)/sd(df$y))
 beta1 beta1
 beta2 beta2
Line 136: Line 136:
 # understand with diagrams first # understand with diagrams first
 # then calculate with r # then calculate with r
-lm.tmp.1 <- lm(x2~x1, data=d)+lm.tmp.1 <- lm(x2~x1, data=df)
 res.x2.x1 <- lm.tmp.1$residuals res.x2.x1 <- lm.tmp.1$residuals
  
-lm.tmp.2 <- lm(y~x1, data=d)+lm.tmp.2 <- lm(y~x1, data=df)
 res.y.x1 <- lm.tmp.2$residuals res.y.x1 <- lm.tmp.2$residuals
  
-lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d)+lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df)
 summary(lm.tmp.3) summary(lm.tmp.3)
 summary(lm.tmp.3)$r.squared summary(lm.tmp.3)$r.squared
Line 148: Line 148:
 # install.packages("ppcor") # install.packages("ppcor")
 library(ppcor) library(ppcor)
-partial.r <- pcor.test(d$y, d$x2, d$x1)+partial.r <- pcor.test(df$y, df$x2, df$x1)
 str(partial.r) str(partial.r)
 partial.r$estimate partial.r$estimate
Line 157: Line 157:
  
 # x1's own explanation? # x1's own explanation?
-lm.tmp.4 <- lm(x1~x2, data=d)+lm.tmp.4 <- lm(x1~x2, data=df)
 res.x1.x2 <- lm.tmp.4$residuals res.x1.x2 <- lm.tmp.4$residuals
  
-lm.tmp.5 <- lm(y~x2, data=d)+lm.tmp.5 <- lm(y~x2, data=df)
 res.y.x2 <- lm.tmp.5$residuals res.y.x2 <- lm.tmp.5$residuals
  
-lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d)+lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df)
 summary(lm.tmp.6) summary(lm.tmp.6)
  
-partial.r <- pcor.test(d$y, d$x1, d$x2)+partial.r <- pcor.test(df$y, df$x1, df$x2)
 str(partial.r) str(partial.r)
 partial.r$estimate # this is partial correlation, not pr^2 partial.r$estimate # this is partial correlation, not pr^2
Line 176: Line 176:
 # semipartial correlation coefficient and spr2 # semipartial correlation coefficient and spr2
 # #
-spr.y.x2.x1 <- spcor.test(d$y,d$x2,d$x1) +spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1) 
-spr.y.x1.x2 <- spcor.test(d$y,d$x1,d$x2)+spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2)
 spr.y.x2.x1 spr.y.x2.x1
 spr.y.x1.x2 spr.y.x1.x2
Line 185: Line 185:
 spr2.y.x1.x2  spr2.y.x1.x2 
  
-lm.tmp.7 <- lm(y ~ res.x2.x1, data = d)+lm.tmp.7 <- lm(y ~ res.x2.x1, data=df)
 summary(lm.tmp.7) summary(lm.tmp.7)
 spr2.y.x2.x1  spr2.y.x2.x1 
  
-lm.tmp.8 <- lm(y~res.x1.x2, data = d)+lm.tmp.8 <- lm(y~res.x1.x2, data=df)
 summary(lm.tmp.8) summary(lm.tmp.8)
 spr2.y.x1.x2  spr2.y.x1.x2 
Line 250: Line 250:
 # y). # y).
 ############################################# #############################################
- 
  
  
Line 260: Line 259:
 > # > #
 > rm(list=ls()) > rm(list=ls())
-<- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")  +df <- read.csv("http://commres.net/wiki/_media/regression01-bankaccount.csv")  
-d+df
    bankaccount income famnum    bankaccount income famnum
 1            6    220      5 1            6    220      5
Line 274: Line 273:
 10              270      3 10              270      3
  
-> colnames(d) <- c("y", "x1", "x2"+> colnames(df) <- c("y", "x1", "x2"
-d+df
     y  x1 x2     y  x1 x2
 1   6 220  5 1   6 220  5
Line 290: Line 289:
 > # x1 = 인컴 > # x1 = 인컴
 > # x2 = 부양가족수  > # x2 = 부양가족수 
-> lm.y.x1 <- lm(y ~ x1, data=d)+> lm.y.x1 <- lm(y ~ x1, data=df)
 > summary(lm.y.x1) > summary(lm.y.x1)
  
 Call: Call:
-lm(formula = y ~ x1, data = d)+lm(formula = y ~ x1, data = df)
  
 Residuals: Residuals:
-    Min      1Q  Median      3Q     Max  +   Min     1Q Median     3Q    Max  
--1.5189 -0.8969 -0.1297  1.0058  1.5800 +-1.519 -0.897 -0.130  1.006  1.580 
  
 Coefficients: Coefficients:
             Estimate Std. Error t value Pr(>|t|)                Estimate Std. Error t value Pr(>|t|)   
-(Intercept) 3.617781   1.241518   2.914  0.01947 *  +(Intercept)  3.61778    1.24152    2.91    0.019 *  
-x1          0.015269   0.004127   3.700  0.00605 **+x1           0.01527    0.00413    3.70    0.006 **
 --- ---
-Signif. codes:   +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1+
  
-Residual standard error: 1.176 on 8 degrees of freedom +Residual standard error: 1.18 on 8 degrees of freedom 
-Multiple R-squared:  0.6311, Adjusted R-squared:  0.585  +Multiple R-squared:  0.631, Adjusted R-squared:  0.585  
-F-statistic: 13.69 on 1 and 8 DF,  p-value: 0.006046+F-statistic: 13.on 1 and 8 DF,  p-value: 0.00605
  
 > anova(lm.y.x1) > anova(lm.y.x1)
Line 316: Line 314:
  
 Response: y Response: y
-          Df Sum Sq Mean Sq F value   Pr(>F)    +          Df Sum Sq Mean Sq F value Pr(>F)    
-x1         1 18.934 18.9336  13.687 0.006046 ** +x1           18.9   18.93    13.7  0.006 ** 
-Residuals  8 11.066  1.3833                    +Residuals   11.1    1.38                  
 --- ---
-Signif. codes:   +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 
-0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 +> cor(df$x1, df$y)^2 
-r.sq.y.x1 <- cor(x1,y)^2 +[1] 0.6311 
-rsq.y.x1<-summary(lm.y.x1)$r.squared+> summary(lm.y.x1)$r.squared 
 +[1] 0.6311
  
  
-> lm.y.x2 <- lm(y ~ x2, data=d)+> lm.y.x2 <- lm(y ~ x2, data=df)
 > summary(lm.y.x2) > summary(lm.y.x2)
  
 Call: Call:
-lm(formula = y ~ x2, data = d)+lm(formula = y ~ x2, data = df)
  
 Residuals: Residuals:
-    Min      1Q  Median      3Q     Max  +   Min     1Q Median     3Q    Max  
--1.2537 -0.8881 -0.4851  0.4963  2.5920 +-1.254 -0.888 -0.485  0.496  2.592 
  
 Coefficients: Coefficients:
             Estimate Std. Error t value Pr(>|t|)                 Estimate Std. Error t value Pr(>|t|)    
-(Intercept)  10.7910     1.1195   9.639 1.12e-05 *** +(Intercept)   10.791      1.119    9.64  1.1e-05 *** 
-x2           -0.8458     0.3117  -2.713   0.0265 *  +x2            -0.846      0.312   -2.71    0.027 *  
 --- ---
-Signif. codes:   +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1+
  
-Residual standard error: 1.397 on 8 degrees of freedom +Residual standard error: 1.on 8 degrees of freedom 
-Multiple R-squared:  0.4793, Adjusted R-squared:  0.4142  +Multiple R-squared:  0.479, Adjusted R-squared:  0.414  
-F-statistic: 7.363 on 1 and 8 DF,  p-value: 0.02651+F-statistic: 7.36 on 1 and 8 DF,  p-value: 0.0265
  
 > anova(lm.y.x2) > anova(lm.y.x2)
Line 352: Line 350:
  
 Response: y Response: y
-          Df Sum Sq Mean Sq F value  Pr(>F)   +          Df Sum Sq Mean Sq F value Pr(>F)   
-x2         1 14.378 14.3781  7.3631 0.02651 +x2           14.4   14.38    7.36  0.027 
-Residuals  8 15.622  1.9527                  +Residuals   15.6    1.95                 
 --- ---
-Signif. codes:   +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 
-0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 +> cor(df$x2, df$y)^2 
-rsq.y.x2 <- cor(x2,y)^2 +[1] 0.4793 
-> rsq.y.x2 <- summary(lm.y.x2)$r.squared+summary(lm.y.x2)$r.squared 
 +[1] 0.4793
  
  
-> lm.y.x1x2 <- lm(y ~ x1+x2, data=d)+> lm.y.x1x2 <- lm(y ~ x1+x2, data=df)
 > summary(lm.y.x1x2) > summary(lm.y.x1x2)
  
 Call: Call:
-lm(formula = y ~ x1 + x2, data = d)+lm(formula = y ~ x1 + x2, data = df)
  
 Residuals: Residuals:
-    Min      1Q  Median      3Q     Max  +   Min     1Q Median     3Q    Max  
--1.2173 -0.5779 -0.1515  0.6642  1.1906 +-1.217 -0.578 -0.151  0.664  1.191 
  
 Coefficients: Coefficients:
-             Estimate Std. Error t value Pr(>|t|)    +            Estimate Std. Error t value Pr(>|t|)    
-(Intercept)  6.399103   1.516539   4.220  0.00394 ** +(Intercept)  6.39910    1.51654    4.22   0.0039 ** 
-x1           0.011841   0.003561   3.325  0.01268 *  +x1           0.01184    0.00356    3.33   0.0127 *  
-x2          -0.544727   0.226364  -2.406  0.04702 +x2          -0.54473    0.22636   -2.41   0.0470 
 --- ---
-Signif. codes:   +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1+
  
-Residual standard error: 0.9301 on 7 degrees of freedom +Residual standard error: 0.93 on 7 degrees of freedom 
-Multiple R-squared:  0.7981, Adjusted R-squared:  0.7404  +Multiple R-squared:  0.798, Adjusted R-squared:  0.74  
-F-statistic: 13.84 on 2 and 7 DF,  p-value: 0.003696+F-statistic: 13.on 2 and 7 DF,  p-value: 0.0037
  
 > anova(lm.y.x1x2) > anova(lm.y.x1x2)
Line 389: Line 387:
  
 Response: y Response: y
-          Df  Sum Sq Mean Sq F value   Pr(>F)    +          Df Sum Sq Mean Sq F value Pr(>F)    
-x1         1 18.9336 18.9336 21.8841 0.002266 ** +x1          18.93   18.93   21.88 0.0023 ** 
-x2          5.0101  5.0101  5.7909 0.047020 *  +x2           5.01    5.01    5.79 0.0470 *  
-Residuals  6.0562  0.8652                    +Residuals   6.06    0.87                  
 --- ---
-Signif. codes:   +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 
-0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 +bcd <- summary(lm.y.x1x2)$r.squared 
-rsq.y.x1x2<- summary(lm.y.x1x2)$r.squared +bcd 
-rsq.y.x1 + rsq.y.x2 - rsq.y.x1x2 +[1] 0.7981 
-[1] 0.3122658+> bc.cd <- summary(lm.y.x1)$r.squared summary(lm.y.x2)$r.squared 
 +> bc.cd 
 +[1] 1.11 
 +> bc.cd - bcd  # note that this is the amount that shared by the two IVs 
 +[1] 0.3123 
 +
  
 > lm.y.x1x2$coefficient > lm.y.x1x2$coefficient
 (Intercept)          x1          x2  (Intercept)          x1          x2 
- 6.39910298  0.01184145 -0.54472725  +    6.39910     0.01184    -0.54473  
-> # y.hat = 6.399103 + (0.01184145)*x1 + (0.54472725)*x2 +> # y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2 
 > a <- lm.y.x1x2$coefficient[1] > a <- lm.y.x1x2$coefficient[1]
 > b1 <- lm.y.x1x2$coefficient[2] > b1 <- lm.y.x1x2$coefficient[2]
Line 409: Line 412:
 > a > a
 (Intercept)  (Intercept) 
-   6.399103 +      6.399 
 > b1 > b1
-        x1  +     x1  
-0.01184145 +0.01184 
 > b2  > b2 
-        x2  +     x2  
--0.5447272 +-0.5447 
  
-> y.pred <- a + (b1 * x1) + (b2 * x2)+> y.pred <- a + (b1 * df$x1) + (b2 * df$x2)
 > y.pred  > y.pred 
- [1]  6.280586  5.380616  7.843699  6.588485  9.217328 + [1]  6.281  5.381  7.844  6.588  9.217 10.023  7.252  9.809  9.644  7.962
- [6] 10.022506  7.251626  9.809401  9.643641  7.962113+
 > # or  > # or 
 > y.pred2 <- predict(lm.y.x1x2) > y.pred2 <- predict(lm.y.x1x2)
Line 427: Line 429:
 TRUE TRUE TRUE TRUE TRUE TRUE  TRUE TRUE TRUE TRUE TRUE TRUE 
  
-> y.real <- y+> y.real <- df$y
 > y.real > y.real
  [1]  6  5  7  7  8 10  8 11  9  9  [1]  6  5  7  7  8 10  8 11  9  9
-> y.mean <- mean(y)+> y.mean <- mean(df$y)
 > y.mean  > y.mean 
 [1] 8 [1] 8
  
 +> deviation.score <- df$y - y.mean
 +> ds <- deviation.score
 > res <- y.real - y.pred > res <- y.real - y.pred
 > reg <- y.pred - y.mean > reg <- y.pred - y.mean
Line 440: Line 444:
 > # remember y is sum of res + reg + y.mean > # remember y is sum of res + reg + y.mean
 > y2 <- res + reg + y.mean > y2 <- res + reg + y.mean
-> y==y2+df$y==y2
  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
  
 +> ss.tot <- sum(ds^2)
 > ss.res <- sum(res^2) > ss.res <- sum(res^2)
 > ss.reg <- sum(reg^2) > ss.reg <- sum(reg^2)
  
-> ss.tot <- var(y) * (length(y)-1)+> ss.tot2 <- var(df$y) * (length(df$y)-1)
 > ss.tot > ss.tot
 +[1] 30
 +> ss.tot2
 [1] 30 [1] 30
 > ss.res > ss.res
-[1] 6.056235+[1] 6.056
 > ss.reg > ss.reg
-[1] 23.94376+[1] 23.94
 > ss.res+ss.reg > ss.res+ss.reg
 [1] 30 [1] 30
  
 > k <- 3 # # of parameters a, b1, b2 > k <- 3 # # of parameters a, b1, b2
-> df.tot <- length(y)-1+> df.tot <- length(df$y)-1
 > df.reg <- k - 1 > df.reg <- k - 1
 > df.res <- df.tot - df.reg > df.res <- df.tot - df.reg
Line 464: Line 471:
 > ms.res <- ss.res/df.res > ms.res <- ss.res/df.res
 > ms.reg > ms.reg
-[1] 11.97188+[1] 11.97
 > ms.res > ms.res
-[1] 0.8651765+[1] 0.8652
 > f.val <- ms.reg/ms.res > f.val <- ms.reg/ms.res
 > f.val > f.val
-[1] 13.8375+[1] 13.84
 > p.val <- pf(f.val, df.reg, df.res, lower.tail = F) > p.val <- pf(f.val, df.reg, df.res, lower.tail = F)
 > p.val > p.val
-[1] 0.003696453+[1] 0.003696
  
 > # double check  > # double check 
Line 478: Line 485:
  
 Call: Call:
-lm(formula = y ~ x1 + x2, data = d)+lm(formula = y ~ x1 + x2, data = df)
  
 Residuals: Residuals:
-    Min      1Q  Median      3Q     Max  +   Min     1Q Median     3Q    Max  
--1.2173 -0.5779 -0.1515  0.6642  1.1906 +-1.217 -0.578 -0.151  0.664  1.191 
  
 Coefficients: Coefficients:
-             Estimate Std. Error t value Pr(>|t|)    +            Estimate Std. Error t value Pr(>|t|)    
-(Intercept)  6.399103   1.516539   4.220  0.00394 ** +(Intercept)  6.39910    1.51654    4.22   0.0039 ** 
-x1           0.011841   0.003561   3.325  0.01268 *  +x1           0.01184    0.00356    3.33   0.0127 *  
-x2          -0.544727   0.226364  -2.406  0.04702 +x2          -0.54473    0.22636   -2.41   0.0470 
 --- ---
-Signif. codes:   +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1+
  
-Residual standard error: 0.9301 on 7 degrees of freedom +Residual standard error: 0.93 on 7 degrees of freedom 
-Multiple R-squared:  0.7981, Adjusted R-squared:  0.7404  +Multiple R-squared:  0.798, Adjusted R-squared:  0.74  
-F-statistic: 13.84 on 2 and 7 DF,  p-value: 0.003696+F-statistic: 13.on 2 and 7 DF,  p-value: 0.0037
  
 > anova(lm.y.x1x2) > anova(lm.y.x1x2)
Line 501: Line 507:
  
 Response: y Response: y
-          Df  Sum Sq Mean Sq F value   Pr(>F)    +          Df Sum Sq Mean Sq F value Pr(>F)    
-x1         1 18.9336 18.9336 21.8841 0.002266 ** +x1          18.93   18.93   21.88 0.0023 ** 
-x2          5.0101  5.0101  5.7909 0.047020 *  +x2           5.01    5.01    5.79 0.0470 *  
-Residuals  6.0562  0.8652                    +Residuals   6.06    0.87                  
 --- ---
-Signif. codes:   +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 
-0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1+>  
 +> summary(lm(y~x2+x1, data=df)) 
 + 
 +Call: 
 +lm(formula = y ~ x2 + x1, data = df) 
 + 
 +Residuals: 
 +   Min     1Q Median     3Q    Max  
 +-1.217 -0.578 -0.151  0.664  1.191  
 + 
 +Coefficients: 
 +            Estimate Std. Error t value Pr(>|t|)    
 +(Intercept)  6.39910    1.51654    4.22   0.0039 ** 
 +x2          -0.54473    0.22636   -2.41   0.0470 *  
 +x1           0.01184    0.00356    3.33   0.0127 *  
 +--- 
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 
 + 
 +Residual standard error: 0.93 on 7 degrees of freedom 
 +Multiple R-squared:  0.798, Adjusted R-squared:  0.74  
 +F-statistic: 13.8 on 2 and 7 DF,  p-value: 0.0037 
 + 
 +> anova(lm(y~x2+x1, data=df)) 
 +Analysis of Variance Table 
 + 
 +Response: y 
 +          Df Sum Sq Mean Sq F value Pr(>F)    
 +x2          14.38   14.38    16.6 0.0047 ** 
 +x1           9.57    9.57    11.1 0.0127 *  
 +Residuals  7   6.06    0.87                   
 +--- 
 +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 
 +
 > # note on 2 t-tests in summary > # note on 2 t-tests in summary
 > # anova에서의 x1, x2에 대한 테스트와  > # anova에서의 x1, x2에 대한 테스트와 
Line 519: Line 557:
 > # 테스트하기 때문에 그러함 > # 테스트하기 때문에 그러함
  
 +> # 또한 anova test에서 두번째 IV의 F값은 
 +> # summary(lm)에서 두번 째 IV의 t값의 제곱값
 +> # 임을 이해. 이는 두번 째 IV의 설명력을 나
 +> # 타내는 부분이 lm과 anova 모두 같기 때문
 +> # 반면에 첫번째 IV의 경우에는 lm 분석 때에는
 +> # 고유의 설명력만을 가지고 (semi-partial cor^2)
 +> # 판단을 하는 반면에, anova는 x2와 공유하는 
 +> # 설명력도 포함해서 분석하기 때문에 t값의
 +> # 제곱이 F값이 되지 못함
 +
 +> # beta에 대한 설명
 > # beta coefficient (standardized b) > # beta coefficient (standardized b)
 > # beta <- b * (sd(x)/sd(y)) > # beta <- b * (sd(x)/sd(y))
-> beta1 <- b1 * (sd(x1)/sd(y)) +> beta1 <- b1 * (sd(df$x1)/sd(df$y)) 
-> beta2 <- b2 * (sd(x2)/sd(y))+> beta2 <- b2 * (sd(df$x2)/sd(df$y))
 > beta1 > beta1
-      x1  +    x1  
-0.616097 +0.6161 
 > beta2 > beta2
-        x2  +     x2  
--0.4458785 +-0.4459 
  
 > # install.packages("lm.beta") > # install.packages("lm.beta")
Line 535: Line 584:
  
 Call: Call:
-lm(formula = y ~ x1 + x2, data = d)+lm(formula = y ~ x1 + x2, data = df)
  
 Standardized Coefficients:: Standardized Coefficients::
 (Intercept)          x1          x2  (Intercept)          x1          x2 
-         NA   0.6160970  -0.4458785 +         NA      0.6161     -0.4459 
  
  
Line 547: Line 596:
 > # understand with diagrams first > # understand with diagrams first
 > # then calculate with r > # then calculate with r
-> lm.tmp.1 <- lm(x2~x1, data=d)+> lm.tmp.1 <- lm(x2~x1, data=df)
 > res.x2.x1 <- lm.tmp.1$residuals > res.x2.x1 <- lm.tmp.1$residuals
  
-> lm.tmp.2 <- lm(y~x1, data=d)+> lm.tmp.2 <- lm(y~x1, data=df)
 > res.y.x1 <- lm.tmp.2$residuals > res.y.x1 <- lm.tmp.2$residuals
  
-> lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d)+> lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df)
 > summary(lm.tmp.3) > summary(lm.tmp.3)
  
 Call: Call:
-lm(formula = res.y.x1 ~ res.x2.x1, data = d)+lm(formula = res.y.x1 ~ res.x2.x1, data = df)
  
 Residuals: Residuals:
-    Min      1Q  Median      3Q     Max  +   Min     1Q Median     3Q    Max  
--1.2173 -0.5779 -0.1515  0.6642  1.1906 +-1.217 -0.578 -0.151  0.664  1.191 
  
 Coefficients: Coefficients:
-              Estimate Std. Error t value Pr(>|t|)   +             Estimate Std. Error t value Pr(>|t|)   
-(Intercept)  6.281e-18  2.751e-01   0.000    1.000   +(Intercept)  6.28e-18   2.75e-01    0.00    1.000   
-res.x2.x1   -5.447e-01  2.117e-01  -2.573    0.033 *+res.x2.x1   -5.45e-01   2.12e-01   -2.57    0.033 *
 --- ---
-Signif. codes:   +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1+
  
-Residual standard error: 0.8701 on 8 degrees of freedom +Residual standard error: 0.87 on 8 degrees of freedom 
-Multiple R-squared:  0.4527, Adjusted R-squared:  0.3843  +Multiple R-squared:  0.453, Adjusted R-squared:  0.384  
-F-statistic: 6.618 on 1 and 8 DF,  p-value: 0.033+F-statistic: 6.62 on 1 and 8 DF,  p-value: 0.033
  
-+summary(lm.tmp.3)$r.squared 
 +[1] 0.4527 
 +> sqrt(summary(lm.tmp.3)$r.squared) 
 +[1] 0.6729
 > # install.packages("ppcor") > # install.packages("ppcor")
 > library(ppcor) > library(ppcor)
-> partial.r <- pcor.test(y, x2, x1) +> partial.r <- pcor.test(df$y, df$x2, df$x1)
-> partial.r +
-   estimate    p.value statistic  n gp  Method +
-1 -0.672856 0.04702022 -2.406425 10  1 pearson+
 > str(partial.r) > str(partial.r)
 'data.frame': 1 obs. of  6 variables: 'data.frame': 1 obs. of  6 variables:
Line 590: Line 638:
  $ gp       : num 1  $ gp       : num 1
  $ Method   : chr "pearson"  $ Method   : chr "pearson"
 +> partial.r$estimate
 +[1] -0.6729
 > summary(lm.tmp.3) > summary(lm.tmp.3)
  
 Call: Call:
-lm(formula = res.y.x1 ~ res.x2.x1, data = d)+lm(formula = res.y.x1 ~ res.x2.x1, data = df)
  
 Residuals: Residuals:
-    Min      1Q  Median      3Q     Max  +   Min     1Q Median     3Q    Max  
--1.2173 -0.5779 -0.1515  0.6642  1.1906 +-1.217 -0.578 -0.151  0.664  1.191 
  
 Coefficients: Coefficients:
-              Estimate Std. Error t value Pr(>|t|)   +             Estimate Std. Error t value Pr(>|t|)   
-(Intercept)  6.281e-18  2.751e-01   0.000    1.000   +(Intercept)  6.28e-18   2.75e-01    0.00    1.000   
-res.x2.x1   -5.447e-01  2.117e-01  -2.573    0.033 *+res.x2.x1   -5.45e-01   2.12e-01   -2.57    0.033 *
 --- ---
-Signif. codes:   +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1+
  
-Residual standard error: 0.8701 on 8 degrees of freedom +Residual standard error: 0.87 on 8 degrees of freedom 
-Multiple R-squared:  0.4527, Adjusted R-squared:  0.3843  +Multiple R-squared:  0.453, Adjusted R-squared:  0.384  
-F-statistic: 6.618 on 1 and 8 DF,  p-value: 0.033+F-statistic: 6.62 on 1 and 8 DF,  p-value: 0.033
  
 > summary(lm.tmp.3)$r.square > summary(lm.tmp.3)$r.square
-[1] 0.4527352+[1] 0.4527
 > partial.r$estimate^2 > partial.r$estimate^2
-[1] 0.4527352+[1] 0.4527
  
  
 > # x1's own explanation? > # x1's own explanation?
-> lm.tmp.4 <- lm(x1~x2, data=d)+> lm.tmp.4 <- lm(x1~x2, data=df)
 > res.x1.x2 <- lm.tmp.4$residuals > res.x1.x2 <- lm.tmp.4$residuals
  
-> lm.tmp.5 <- lm(y~x2, data=d)+> lm.tmp.5 <- lm(y~x2, data=df)
 > res.y.x2 <- lm.tmp.5$residuals > res.y.x2 <- lm.tmp.5$residuals
  
-> lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d)+> lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df)
 > summary(lm.tmp.6) > summary(lm.tmp.6)
  
 Call: Call:
-lm(formula = res.y.x2 ~ res.x1.x2, data = d)+lm(formula = res.y.x2 ~ res.x1.x2, data = df)
  
 Residuals: Residuals:
-    Min      1Q  Median      3Q     Max  +   Min     1Q Median     3Q    Max  
--1.2173 -0.5779 -0.1515  0.6642  1.1906 +-1.217 -0.578 -0.151  0.664  1.191 
  
 Coefficients: Coefficients:
-             Estimate Std. Error t value Pr(>|t|)    +            Estimate Std. Error t value Pr(>|t|)    
-(Intercept) 1.330e-17  2.751e-01   0.000  1.00000    +(Intercept) 1.33e-17   2.75e-01    0.00   1.0000    
-res.x1.x2   1.184e-02  3.331e-03   3.555  0.00746 **+res.x1.x2   1.18e-02   3.33e-03    3.55   0.0075 **
 --- ---
-Signif. codes:   +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1+
  
-Residual standard error: 0.8701 on 8 degrees of freedom +Residual standard error: 0.87 on 8 degrees of freedom 
-Multiple R-squared:  0.6123, Adjusted R-squared:  0.5639  +Multiple R-squared:  0.612, Adjusted R-squared:  0.564  
-F-statistic: 12.64 on 1 and 8 DF,  p-value: 0.007458+F-statistic: 12.on 1 and 8 DF,  p-value: 0.00746
  
  
-> partial.r <- pcor.test(y, x1, x2)+> partial.r <- pcor.test(df$y, df$x1, df$x2)
 > str(partial.r) > str(partial.r)
 'data.frame': 1 obs. of  6 variables: 'data.frame': 1 obs. of  6 variables:
Line 657: Line 705:
  $ Method   : chr "pearson"  $ Method   : chr "pearson"
 > partial.r$estimate # this is partial correlation, not pr^2 > partial.r$estimate # this is partial correlation, not pr^2
-[1] 0.7825112+[1] 0.7825
 > # in order to get pr2, you should ^2 > # in order to get pr2, you should ^2
 > partial.r$estimate^2 > partial.r$estimate^2
-[1] 0.6123238+[1] 0.6123 
 +> summary(lm.tmp.6)$r.squared 
 +[1] 0.6123
  
 > ####################################################### > #######################################################
-> # 
 > # semipartial correlation coefficient and spr2 > # semipartial correlation coefficient and spr2
 > # > #
-> spr.y.x2.x1 <- spcor.test(y,x2,x1) +> spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1) 
-> spr.y.x1.x2 <- spcor.test(y,x1,x2)+> spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2)
 > spr.y.x2.x1 > spr.y.x2.x1
-    estimate   p.value statistic  n gp  Method +  estimate p.value statistic  n gp  Method 
-1 -0.4086619 0.2748117 -1.184655 10  1 pearson+ -0.4087  0.2748    -1.185 10  1 pearson
 > spr.y.x1.x2 > spr.y.x1.x2
-   estimate  p.value statistic  n gp  Method +  estimate p.value statistic  n gp  Method 
-1 0.5646726 0.113182  1.810198 10  1 pearson+  0.5647  0.1132      1.81 10  1 pearson
 > spr2.y.x2.x1 <- spr.y.x2.x1$estimate^2 > spr2.y.x2.x1 <- spr.y.x2.x1$estimate^2
 > spr2.y.x1.x2 <- spr.y.x1.x2$estimate^2 > spr2.y.x1.x2 <- spr.y.x1.x2$estimate^2
 > spr2.y.x2.x1  > spr2.y.x2.x1 
-[1] 0.1670045+[1] 0.167
 > spr2.y.x1.x2  > spr2.y.x1.x2 
-[1] 0.3188552+[1] 0.3189
  
-> lm.tmp.7 <- lm(y ~ res.x2.x1, data = d)+> lm.tmp.7 <- lm(y ~ res.x2.x1, data=df)
 > summary(lm.tmp.7) > summary(lm.tmp.7)
  
 Call: Call:
-lm(formula = y ~ res.x2.x1, data = d)+lm(formula = y ~ res.x2.x1, data = df)
  
 Residuals: Residuals:
-    Min      1Q  Median      3Q     Max  +   Min     1Q Median     3Q    Max  
--1.8617 -1.1712 -0.4940  0.5488  3.0771 +-1.862 -1.171 -0.494  0.549  3.077 
  
 Coefficients: Coefficients:
             Estimate Std. Error t value Pr(>|t|)                 Estimate Std. Error t value Pr(>|t|)    
-(Intercept)   8.0000     0.5589  14.314 5.54e-07 *** +(Intercept)    8.000      0.559   14.31  5.5e-07 *** 
-res.x2.x1    -0.5447     0.4301  -1.266    0.241    +res.x2.x1     -0.545      0.430   -1.27     0.24    
 --- ---
-Signif. codes:   +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1+
  
-Residual standard error: 1.767 on 8 degrees of freedom +Residual standard error: 1.77 on 8 degrees of freedom 
-Multiple R-squared:  0.167, Adjusted R-squared:  0.06288  +Multiple R-squared:  0.167, Adjusted R-squared:  0.0629  
-F-statistic: 1.604 on 1 and 8 DF,  p-value: 0.241+F-statistic:  1.on 1 and 8 DF,  p-value: 0.241
  
 > spr2.y.x2.x1  > spr2.y.x2.x1 
-[1] 0.1670045+[1] 0.167
  
-> lm.tmp.8 <- lm(y~res.x1.x2, data = d)+> lm.tmp.8 <- lm(y~res.x1.x2, data=df)
 > summary(lm.tmp.8) > summary(lm.tmp.8)
  
 Call: Call:
-lm(formula = y ~ res.x1.x2, data = d)+lm(formula = y ~ res.x1.x2, data = df)
  
 Residuals: Residuals:
-    Min      1Q  Median      3Q     Max  +   Min     1Q Median     3Q    Max  
--2.6642 -0.6084 -0.1492  1.2192  2.2901 +-2.664 -0.608 -0.149  1.219  2.290 
  
 Coefficients: Coefficients:
             Estimate Std. Error t value Pr(>|t|)                 Estimate Std. Error t value Pr(>|t|)    
-(Intercept) 8.000000   0.505400  15.829 2.54e-07 *** +(Intercept)  8.00000    0.50540   15.83  2.5e-07 *** 
-res.x1.x2   0.011841   0.006119   1.935    0.089 .  +res.x1.x2    0.01184    0.00612    1.94    0.089 .  
 --- ---
-Signif. codes:   +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1+
  
-Residual standard error: 1.598 on 8 degrees of freedom +Residual standard error: 1.on 8 degrees of freedom 
-Multiple R-squared:  0.3189, Adjusted R-squared:  0.2337  +Multiple R-squared:  0.319, Adjusted R-squared:  0.234  
-F-statistic: 3.745 on 1 and 8 DF,  p-value: 0.08901+F-statistic: 3.74 on 1 and 8 DF,  p-value: 0.089
  
 > spr2.y.x1.x2  > spr2.y.x1.x2 
-[1] 0.3188552+[1] 0.3189
  
 +> bcd # remember bcd in the above?
 +[1] 0.7981
 +> bd <- spr2.y.x2.x1 + spr2.y.x1.x2 
 +> bd
 +[1] 0.4859
 +> bcd - bd
 +[1] 0.3123
  
 > ####################################################### > #######################################################
Line 738: Line 792:
  
 Call: Call:
-lm(formula = y ~ x2, data = d)+lm(formula = y ~ x2, data = df)
  
 Residuals: Residuals:
-    Min      1Q  Median      3Q     Max  +   Min     1Q Median     3Q    Max  
--1.2537 -0.8881 -0.4851  0.4963  2.5920 +-1.254 -0.888 -0.485  0.496  2.592 
  
 Coefficients: Coefficients:
             Estimate Std. Error t value Pr(>|t|)                 Estimate Std. Error t value Pr(>|t|)    
-(Intercept)  10.7910     1.1195   9.639 1.12e-05 *** +(Intercept)   10.791      1.119    9.64  1.1e-05 *** 
-x2           -0.8458     0.3117  -2.713   0.0265 *  +x2            -0.846      0.312   -2.71    0.027 *  
 --- ---
-Signif. codes:   +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1+
  
-Residual standard error: 1.397 on 8 degrees of freedom +Residual standard error: 1.on 8 degrees of freedom 
-Multiple R-squared:  0.4793, Adjusted R-squared:  0.4142  +Multiple R-squared:  0.479, Adjusted R-squared:  0.414  
-F-statistic: 7.363 on 1 and 8 DF,  p-value: 0.02651+F-statistic: 7.36 on 1 and 8 DF,  p-value: 0.0265
  
 > all.x2 <- summary(lm.y.x2)$r.squared > all.x2 <- summary(lm.y.x2)$r.squared
 > all.x2 > all.x2
-[1] 0.4792703+[1] 0.4793
 > spr2.y.x2.x1 > spr2.y.x2.x1
-[1] 0.1670045+[1] 0.167
 > cma.1 <- all.x2 - spr2.y.x2.x1 > cma.1 <- all.x2 - spr2.y.x2.x1
 > cma.1 > cma.1
-[1] 0.3122658+[1] 0.3123
  
 > # 2. > # 2.
Line 769: Line 822:
  
 Call: Call:
-lm(formula = y ~ x1, data = d)+lm(formula = y ~ x1, data = df)
  
 Residuals: Residuals:
-    Min      1Q  Median      3Q     Max  +   Min     1Q Median     3Q    Max  
--1.5189 -0.8969 -0.1297  1.0058  1.5800 +-1.519 -0.897 -0.130  1.006  1.580 
  
 Coefficients: Coefficients:
             Estimate Std. Error t value Pr(>|t|)                Estimate Std. Error t value Pr(>|t|)   
-(Intercept) 3.617781   1.241518   2.914  0.01947 *  +(Intercept)  3.61778    1.24152    2.91    0.019 *  
-x1          0.015269   0.004127   3.700  0.00605 **+x1           0.01527    0.00413    3.70    0.006 **
 --- ---
-Signif. codes:   +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1+
  
-Residual standard error: 1.176 on 8 degrees of freedom +Residual standard error: 1.18 on 8 degrees of freedom 
-Multiple R-squared:  0.6311, Adjusted R-squared:  0.585  +Multiple R-squared:  0.631, Adjusted R-squared:  0.585  
-F-statistic: 13.69 on 1 and 8 DF,  p-value: 0.006046+F-statistic: 13.on 1 and 8 DF,  p-value: 0.00605
  
 > all.x1 <- summary(lm.y.x1)$r.squared > all.x1 <- summary(lm.y.x1)$r.squared
 > all.x1 > all.x1
-[1] 0.631121+[1] 0.6311
 > spr2.y.x1.x2 > spr2.y.x1.x2
-[1] 0.3188552+[1] 0.3189
 > cma.2 <- all.x1 - spr2.y.x1.x2 > cma.2 <- all.x1 - spr2.y.x1.x2
 > cma.2 > cma.2
-[1] 0.3122658+[1] 0.3123
  
 > # OR 3. > # OR 3.
Line 800: Line 852:
  
 Call: Call:
-lm(formula = y ~ x1 + x2, data = d)+lm(formula = y ~ x1 + x2, data = df)
  
 Residuals: Residuals:
-    Min      1Q  Median      3Q     Max  +   Min     1Q Median     3Q    Max  
--1.2173 -0.5779 -0.1515  0.6642  1.1906 +-1.217 -0.578 -0.151  0.664  1.191 
  
 Coefficients: Coefficients:
-             Estimate Std. Error t value Pr(>|t|)    +            Estimate Std. Error t value Pr(>|t|)    
-(Intercept)  6.399103   1.516539   4.220  0.00394 ** +(Intercept)  6.39910    1.51654    4.22   0.0039 ** 
-x1           0.011841   0.003561   3.325  0.01268 *  +x1           0.01184    0.00356    3.33   0.0127 *  
-x2          -0.544727   0.226364  -2.406  0.04702 +x2          -0.54473    0.22636   -2.41   0.0470 
 --- ---
-Signif. codes:   +Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1+
  
-Residual standard error: 0.9301 on 7 degrees of freedom +Residual standard error: 0.93 on 7 degrees of freedom 
-Multiple R-squared:  0.7981, Adjusted R-squared:  0.7404  +Multiple R-squared:  0.798, Adjusted R-squared:  0.74  
-F-statistic: 13.84 on 2 and 7 DF,  p-value: 0.003696+F-statistic: 13.on 2 and 7 DF,  p-value: 0.0037
  
 > r2.y.x1x2 <- summary(lm.y.x1x2)$r.square > r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
 > r2.y.x1x2 > r2.y.x1x2
-[1] 0.7981255+[1] 0.7981
 > spr2.y.x1.x2 > spr2.y.x1.x2
-[1] 0.3188552+[1] 0.3189
 > spr2.y.x2.x1 > spr2.y.x2.x1
-[1] 0.1670045+[1] 0.167
 > cma.3 <- r2.y.x1x2 - (spr2.y.x1.x2 + spr2.y.x2.x1) > cma.3 <- r2.y.x1x2 - (spr2.y.x1.x2 + spr2.y.x2.x1)
 +> bcd - bd
 +[1] 0.3123
 > cma.3 > cma.3
-[1] 0.3122658+[1] 0.3123
  
 > cma.1  > cma.1 
-[1] 0.3122658+[1] 0.3123
 > cma.2 > cma.2
-[1] 0.3122658+[1] 0.3123
 > cma.3 > cma.3
-[1] 0.3122658+[1] 0.3123
  
 > # OR 애초에 simple regression과 multiple  > # OR 애초에 simple regression과 multiple 
Line 843: Line 896:
 > r2.y.x2 <- summary(lm.y.x2)$r.square > r2.y.x2 <- summary(lm.y.x2)$r.square
 > r2.y.x1 > r2.y.x1
-[1] 0.631121+[1] 0.6311
 > r2.y.x2 > r2.y.x2
-[1] 0.4792703+[1] 0.4793
 > r2.y.x1x2 <- summary(lm.y.x1x2)$r.square > r2.y.x1x2 <- summary(lm.y.x1x2)$r.square
 > r2.y.x1x2 > r2.y.x1x2
-[1] 0.7981255+[1] 0.7981
 > cma.4 <- r2.y.x1 + r2.y.x2 - r2.y.x1x2 > cma.4 <- r2.y.x1 + r2.y.x2 - r2.y.x1x2
 > cma.4 > cma.4
-[1] 0.3122658+[1] 0.3123
  
 > # Note that sorting out unique and common > # Note that sorting out unique and common
Line 863: Line 916:
  
  
 +</code>
 +
 +====== explanation. added ======
 +{{:c:ms:2025:schedule:pasted:20250609-074413.png?400}}
 +<code>
 +# ex.
 +# resid(lm(y~x1, data=df)) = bc / delta.y
 +# resid(lm(y~x2, data=df)) = cd / delta.y
 +# resid(lm(y~x1+x2, data=df)) = bcd / delta.y
 +# b / delta.y = ?
 +# ce / delta.x2 = ?
 +</code>
 +
 +<code>
 +# exp.added
 +spcor.test(df$y, df$x1, df$x2)
 +spcor.test(df$y, df$x1, df$x2)$estimate
 +spcor.test(df$y, df$x1, df$x2)$estimate^2
 +spcor.test(df$y, df$x2, df$x1)$estimate^2
 +summary(lm(y~x1+x2, data=df))$r.square
 +
 +b <- spcor.test(df$y, df$x1, df$x2)$estimate^2
 +d <- spcor.test(df$y, df$x2, df$x1)$estimate^2
 +bcd <- summary(lm(y~x1+x2, data=df))$r.square
 +
 +summary(lm(df$y~df$x1+df$x2, data=df))$r.square - 
 +  (spcor.test(df$y, df$x1, df$x2)$estimate^2 + 
 +     spcor.test(df$y, df$x2, df$x1)$estimate^2)
 +bcd - (b + d)
 +
 </code> </code>
  
c/ms/2025/schedule/w13.lecture.note.1749420529.txt.gz · Last modified: 2025/06/09 07:08 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki