c:ms:2025:schedule:w13.lecture.note
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| c:ms:2025:schedule:w13.lecture.note [2025/06/08 22:44] – [output] hkimscil | c:ms:2025:schedule:w13.lecture.note [2025/06/08 23:51] (current) – [output] hkimscil | ||
|---|---|---|---|
| Line 5: | Line 5: | ||
| # | # | ||
| rm(list=ls()) | rm(list=ls()) | ||
| - | d <- read.csv(" | + | df <- read.csv(" |
| - | d | + | df |
| - | colnames(d) <- c(" | + | colnames(df) <- c(" |
| - | d | + | df |
| # y = 통장갯수 | # y = 통장갯수 | ||
| # x1 = 인컴 | # x1 = 인컴 | ||
| # x2 = 부양가족수 | # x2 = 부양가족수 | ||
| - | lm.y.x1 <- lm(y ~ x1, data=d) | + | lm.y.x1 <- lm(y ~ x1, data=df) |
| summary(lm.y.x1) | summary(lm.y.x1) | ||
| anova(lm.y.x1) | anova(lm.y.x1) | ||
| - | cor(d$x1, d$y)^2 | + | cor(df$x1, df$y)^2 |
| summary(lm.y.x1)$r.squared | summary(lm.y.x1)$r.squared | ||
| - | lm.y.x2 <- lm(y ~ x2, data=d) | + | lm.y.x2 <- lm(y ~ x2, data=df) |
| summary(lm.y.x2) | summary(lm.y.x2) | ||
| anova(lm.y.x2) | anova(lm.y.x2) | ||
| - | cor(d$x2, d$y)^2 | + | cor(df$x2, df$y)^2 |
| summary(lm.y.x2)$r.squared | summary(lm.y.x2)$r.squared | ||
| - | lm.y.x1x2 <- lm(y ~ x1+x2, data=d) | + | lm.y.x1x2 <- lm(y ~ x1+x2, data=df) |
| summary(lm.y.x1x2) | summary(lm.y.x1x2) | ||
| anova(lm.y.x1x2) | anova(lm.y.x1x2) | ||
| Line 38: | Line 38: | ||
| lm.y.x1x2$coefficient | lm.y.x1x2$coefficient | ||
| - | # y.hat = 6.399103 + (0.01184145)*x1 + (−0.54472725)*x2 | + | # y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2 |
| a <- lm.y.x1x2$coefficient[1] | a <- lm.y.x1x2$coefficient[1] | ||
| b1 <- lm.y.x1x2$coefficient[2] | b1 <- lm.y.x1x2$coefficient[2] | ||
| Line 46: | Line 46: | ||
| b2 | b2 | ||
| - | y.pred <- a + (b1 * d$x1) + (b2 * d$x2) | + | y.pred <- a + (b1 * df$x1) + (b2 * df$x2) |
| y.pred | y.pred | ||
| # or | # or | ||
| Line 52: | Line 52: | ||
| head(y.pred == y.pred2) | head(y.pred == y.pred2) | ||
| - | y.real <- d$y | + | y.real <- df$y |
| y.real | y.real | ||
| - | y.mean <- mean(d$y) | + | y.mean <- mean(df$y) |
| y.mean | y.mean | ||
| - | deviation.score <- d$y - y.mean | + | deviation.score <- df$y - y.mean |
| ds <- deviation.score | ds <- deviation.score | ||
| res <- y.real - y.pred | res <- y.real - y.pred | ||
| Line 64: | Line 64: | ||
| # remember y is sum of res + reg + y.mean | # remember y is sum of res + reg + y.mean | ||
| y2 <- res + reg + y.mean | y2 <- res + reg + y.mean | ||
| - | d$y==y2 | + | df$y==y2 |
| ss.tot <- sum(ds^2) | ss.tot <- sum(ds^2) | ||
| Line 70: | Line 70: | ||
| ss.reg <- sum(reg^2) | ss.reg <- sum(reg^2) | ||
| - | ss.tot2 <- var(d$y) * (length(d$y)-1) | + | ss.tot2 <- var(df$y) * (length(df$y)-1) |
| ss.tot | ss.tot | ||
| ss.tot2 | ss.tot2 | ||
| Line 78: | Line 78: | ||
| k <- 3 # # of parameters a, b1, b2 | k <- 3 # # of parameters a, b1, b2 | ||
| - | df.tot <- length(y)-1 | + | df.tot <- length(df$y)-1 |
| df.reg <- k - 1 | df.reg <- k - 1 | ||
| df.res <- df.tot - df.reg | df.res <- df.tot - df.reg | ||
| Line 95: | Line 95: | ||
| anova(lm.y.x1x2) | anova(lm.y.x1x2) | ||
| - | summary(lm(y~x2+x1, | + | summary(lm(y~x2+x1, |
| - | anova(lm(y~x2+x1, | + | anova(lm(y~x2+x1, |
| # note on 2 t-tests in summary | # note on 2 t-tests in summary | ||
| Line 122: | Line 122: | ||
| # beta coefficient (standardized b) | # beta coefficient (standardized b) | ||
| # beta <- b * (sd(x)/ | # beta <- b * (sd(x)/ | ||
| - | beta1 <- b1 * (sd(x1)/ | + | beta1 <- b1 * (sd(df$x1)/sd(df$y)) |
| - | beta2 <- b2 * (sd(x2)/ | + | beta2 <- b2 * (sd(df$x2)/sd(df$y)) |
| beta1 | beta1 | ||
| beta2 | beta2 | ||
| Line 136: | Line 136: | ||
| # understand with diagrams first | # understand with diagrams first | ||
| # then calculate with r | # then calculate with r | ||
| - | lm.tmp.1 <- lm(x2~x1, data=d) | + | lm.tmp.1 <- lm(x2~x1, data=df) |
| res.x2.x1 <- lm.tmp.1$residuals | res.x2.x1 <- lm.tmp.1$residuals | ||
| - | lm.tmp.2 <- lm(y~x1, data=d) | + | lm.tmp.2 <- lm(y~x1, data=df) |
| res.y.x1 <- lm.tmp.2$residuals | res.y.x1 <- lm.tmp.2$residuals | ||
| - | lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d) | + | lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df) |
| summary(lm.tmp.3) | summary(lm.tmp.3) | ||
| summary(lm.tmp.3)$r.squared | summary(lm.tmp.3)$r.squared | ||
| Line 148: | Line 148: | ||
| # install.packages(" | # install.packages(" | ||
| library(ppcor) | library(ppcor) | ||
| - | partial.r <- pcor.test(d$y, d$x2, d$x1) | + | partial.r <- pcor.test(df$y, df$x2, df$x1) |
| str(partial.r) | str(partial.r) | ||
| partial.r$estimate | partial.r$estimate | ||
| Line 157: | Line 157: | ||
| # x1's own explanation? | # x1's own explanation? | ||
| - | lm.tmp.4 <- lm(x1~x2, data=d) | + | lm.tmp.4 <- lm(x1~x2, data=df) |
| res.x1.x2 <- lm.tmp.4$residuals | res.x1.x2 <- lm.tmp.4$residuals | ||
| - | lm.tmp.5 <- lm(y~x2, data=d) | + | lm.tmp.5 <- lm(y~x2, data=df) |
| res.y.x2 <- lm.tmp.5$residuals | res.y.x2 <- lm.tmp.5$residuals | ||
| - | lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d) | + | lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df) |
| summary(lm.tmp.6) | summary(lm.tmp.6) | ||
| - | partial.r <- pcor.test(d$y, d$x1, d$x2) | + | partial.r <- pcor.test(df$y, df$x1, df$x2) |
| str(partial.r) | str(partial.r) | ||
| partial.r$estimate # this is partial correlation, | partial.r$estimate # this is partial correlation, | ||
| Line 176: | Line 176: | ||
| # semipartial correlation coefficient and spr2 | # semipartial correlation coefficient and spr2 | ||
| # | # | ||
| - | spr.y.x2.x1 <- spcor.test(d$y,d$x2,d$x1) | + | spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1) |
| - | spr.y.x1.x2 <- spcor.test(d$y,d$x1,d$x2) | + | spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2) |
| spr.y.x2.x1 | spr.y.x2.x1 | ||
| spr.y.x1.x2 | spr.y.x1.x2 | ||
| Line 185: | Line 185: | ||
| spr2.y.x1.x2 | spr2.y.x1.x2 | ||
| - | lm.tmp.7 <- lm(y ~ res.x2.x1, data = d) | + | lm.tmp.7 <- lm(y ~ res.x2.x1, data=df) |
| summary(lm.tmp.7) | summary(lm.tmp.7) | ||
| spr2.y.x2.x1 | spr2.y.x2.x1 | ||
| - | lm.tmp.8 <- lm(y~res.x1.x2, | + | lm.tmp.8 <- lm(y~res.x1.x2, |
| summary(lm.tmp.8) | summary(lm.tmp.8) | ||
| spr2.y.x1.x2 | spr2.y.x1.x2 | ||
| Line 250: | Line 250: | ||
| # y). | # y). | ||
| ############################################# | ############################################# | ||
| - | |||
| Line 260: | Line 259: | ||
| > # | > # | ||
| > rm(list=ls()) | > rm(list=ls()) | ||
| - | > d <- read.csv(" | + | > df <- read.csv(" |
| - | > d | + | > df |
| | | ||
| 1 6 220 5 | 1 6 220 5 | ||
| Line 274: | Line 273: | ||
| 10 | 10 | ||
| > | > | ||
| - | > colnames(d) <- c(" | + | > colnames(df) <- c(" |
| - | > d | + | > df |
| y x1 x2 | y x1 x2 | ||
| 1 6 220 5 | 1 6 220 5 | ||
| Line 290: | Line 289: | ||
| > # x1 = 인컴 | > # x1 = 인컴 | ||
| > # x2 = 부양가족수 | > # x2 = 부양가족수 | ||
| - | > lm.y.x1 <- lm(y ~ x1, data=d) | + | > lm.y.x1 <- lm(y ~ x1, data=df) |
| > summary(lm.y.x1) | > summary(lm.y.x1) | ||
| Call: | Call: | ||
| - | lm(formula = y ~ x1, data = d) | + | lm(formula = y ~ x1, data = df) |
| Residuals: | Residuals: | ||
| Line 320: | Line 319: | ||
| --- | --- | ||
| Signif. codes: | Signif. codes: | ||
| - | > cor(d$x1, d$y)^2 | + | > cor(df$x1, df$y)^2 |
| [1] 0.6311 | [1] 0.6311 | ||
| > summary(lm.y.x1)$r.squared | > summary(lm.y.x1)$r.squared | ||
| Line 326: | Line 325: | ||
| > | > | ||
| > | > | ||
| - | > lm.y.x2 <- lm(y ~ x2, data=d) | + | > lm.y.x2 <- lm(y ~ x2, data=df) |
| > summary(lm.y.x2) | > summary(lm.y.x2) | ||
| Call: | Call: | ||
| - | lm(formula = y ~ x2, data = d) | + | lm(formula = y ~ x2, data = df) |
| Residuals: | Residuals: | ||
| Line 356: | Line 355: | ||
| --- | --- | ||
| Signif. codes: | Signif. codes: | ||
| - | > cor(d$x2, d$y)^2 | + | > cor(df$x2, df$y)^2 |
| [1] 0.4793 | [1] 0.4793 | ||
| > summary(lm.y.x2)$r.squared | > summary(lm.y.x2)$r.squared | ||
| Line 362: | Line 361: | ||
| > | > | ||
| > | > | ||
| - | > lm.y.x1x2 <- lm(y ~ x1+x2, data=d) | + | > lm.y.x1x2 <- lm(y ~ x1+x2, data=df) |
| > summary(lm.y.x1x2) | > summary(lm.y.x1x2) | ||
| Call: | Call: | ||
| - | lm(formula = y ~ x1 + x2, data = d) | + | lm(formula = y ~ x1 + x2, data = df) |
| Residuals: | Residuals: | ||
| Line 407: | Line 406: | ||
| (Intercept) | (Intercept) | ||
| 6.39910 | 6.39910 | ||
| - | > # y.hat = 6.399103 + (0.01184145)*x1 + (−0.54472725)*x2 | + | > # y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2 |
| > a <- lm.y.x1x2$coefficient[1] | > a <- lm.y.x1x2$coefficient[1] | ||
| > b1 <- lm.y.x1x2$coefficient[2] | > b1 <- lm.y.x1x2$coefficient[2] | ||
| Line 421: | Line 420: | ||
| -0.5447 | -0.5447 | ||
| > | > | ||
| - | > y.pred <- a + (b1 * d$x1) + (b2 * d$x2) | + | > y.pred <- a + (b1 * df$x1) + (b2 * df$x2) |
| > y.pred | > y.pred | ||
| | | ||
| Line 430: | Line 429: | ||
| TRUE TRUE TRUE TRUE TRUE TRUE | TRUE TRUE TRUE TRUE TRUE TRUE | ||
| > | > | ||
| - | > y.real <- d$y | + | > y.real <- df$y |
| > y.real | > y.real | ||
| | | ||
| - | > y.mean <- mean(d$y) | + | > y.mean <- mean(df$y) |
| > y.mean | > y.mean | ||
| [1] 8 | [1] 8 | ||
| > | > | ||
| - | > deviation.score <- d$y - y.mean | + | > deviation.score <- df$y - y.mean |
| > ds <- deviation.score | > ds <- deviation.score | ||
| > res <- y.real - y.pred | > res <- y.real - y.pred | ||
| Line 445: | Line 444: | ||
| > # remember y is sum of res + reg + y.mean | > # remember y is sum of res + reg + y.mean | ||
| > y2 <- res + reg + y.mean | > y2 <- res + reg + y.mean | ||
| - | > d$y==y2 | + | > df$y==y2 |
| [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE | [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE | ||
| > | > | ||
| Line 452: | Line 451: | ||
| > ss.reg <- sum(reg^2) | > ss.reg <- sum(reg^2) | ||
| > | > | ||
| - | > ss.tot2 <- var(d$y) * (length(d$y)-1) | + | > ss.tot2 <- var(df$y) * (length(df$y)-1) |
| > ss.tot | > ss.tot | ||
| [1] 30 | [1] 30 | ||
| Line 465: | Line 464: | ||
| > | > | ||
| > k <- 3 # # of parameters a, b1, b2 | > k <- 3 # # of parameters a, b1, b2 | ||
| - | > df.tot <- length(d$y)-1 | + | > df.tot <- length(df$y)-1 |
| > df.reg <- k - 1 | > df.reg <- k - 1 | ||
| > df.res <- df.tot - df.reg | > df.res <- df.tot - df.reg | ||
| Line 486: | Line 485: | ||
| Call: | Call: | ||
| - | lm(formula = y ~ x1 + x2, data = d) | + | lm(formula = y ~ x1 + x2, data = df) |
| Residuals: | Residuals: | ||
| Line 515: | Line 514: | ||
| Signif. codes: | Signif. codes: | ||
| > | > | ||
| - | > summary(lm(y~x2+x1, | + | > summary(lm(y~x2+x1, |
| Call: | Call: | ||
| - | lm(formula = y ~ x2 + x1, data = d) | + | lm(formula = y ~ x2 + x1, data = df) |
| Residuals: | Residuals: | ||
| Line 536: | Line 535: | ||
| F-statistic: | F-statistic: | ||
| - | > anova(lm(y~x2+x1, | + | > anova(lm(y~x2+x1, |
| Analysis of Variance Table | Analysis of Variance Table | ||
| Line 571: | Line 570: | ||
| > # beta coefficient (standardized b) | > # beta coefficient (standardized b) | ||
| > # beta <- b * (sd(x)/ | > # beta <- b * (sd(x)/ | ||
| - | > beta1 <- b1 * (sd(d$x1)/sd(d$y)) | + | > beta1 <- b1 * (sd(df$x1)/sd(df$y)) |
| - | > beta2 <- b2 * (sd(d$x2)/sd(d$y)) | + | > beta2 <- b2 * (sd(df$x2)/sd(df$y)) |
| > beta1 | > beta1 | ||
| x1 | x1 | ||
| Line 582: | Line 581: | ||
| > # install.packages(" | > # install.packages(" | ||
| > library(lm.beta) | > library(lm.beta) | ||
| - | 경고메시지(들): | ||
| - | 패키지 ‘lm.beta’는 R 버전 4.3.3에서 작성되었습니다 | ||
| > lm.beta(lm.y.x1x2) | > lm.beta(lm.y.x1x2) | ||
| Call: | Call: | ||
| - | lm(formula = y ~ x1 + x2, data = d) | + | lm(formula = y ~ x1 + x2, data = df) |
| Standardized Coefficients:: | Standardized Coefficients:: | ||
| Line 599: | Line 596: | ||
| > # understand with diagrams first | > # understand with diagrams first | ||
| > # then calculate with r | > # then calculate with r | ||
| - | > lm.tmp.1 <- lm(x2~x1, data=d) | + | > lm.tmp.1 <- lm(x2~x1, data=df) |
| > res.x2.x1 <- lm.tmp.1$residuals | > res.x2.x1 <- lm.tmp.1$residuals | ||
| > | > | ||
| - | > lm.tmp.2 <- lm(y~x1, data=d) | + | > lm.tmp.2 <- lm(y~x1, data=df) |
| > res.y.x1 <- lm.tmp.2$residuals | > res.y.x1 <- lm.tmp.2$residuals | ||
| > | > | ||
| - | > lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d) | + | > lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df) |
| > summary(lm.tmp.3) | > summary(lm.tmp.3) | ||
| Call: | Call: | ||
| - | lm(formula = res.y.x1 ~ res.x2.x1, data = d) | + | lm(formula = res.y.x1 ~ res.x2.x1, data = df) |
| Residuals: | Residuals: | ||
| Line 632: | Line 629: | ||
| > # install.packages(" | > # install.packages(" | ||
| > library(ppcor) | > library(ppcor) | ||
| - | > partial.r <- pcor.test(d$y, d$x2, d$x1) | + | > partial.r <- pcor.test(df$y, df$x2, df$x1) |
| > str(partial.r) | > str(partial.r) | ||
| ' | ' | ||
| Line 646: | Line 643: | ||
| Call: | Call: | ||
| - | lm(formula = res.y.x1 ~ res.x2.x1, data = d) | + | lm(formula = res.y.x1 ~ res.x2.x1, data = df) |
| Residuals: | Residuals: | ||
| Line 670: | Line 667: | ||
| > | > | ||
| > # x1's own explanation? | > # x1's own explanation? | ||
| - | > lm.tmp.4 <- lm(x1~x2, data=d) | + | > lm.tmp.4 <- lm(x1~x2, data=df) |
| > res.x1.x2 <- lm.tmp.4$residuals | > res.x1.x2 <- lm.tmp.4$residuals | ||
| > | > | ||
| - | > lm.tmp.5 <- lm(y~x2, data=d) | + | > lm.tmp.5 <- lm(y~x2, data=df) |
| > res.y.x2 <- lm.tmp.5$residuals | > res.y.x2 <- lm.tmp.5$residuals | ||
| > | > | ||
| - | > lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d) | + | > lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df) |
| > summary(lm.tmp.6) | > summary(lm.tmp.6) | ||
| Call: | Call: | ||
| - | lm(formula = res.y.x2 ~ res.x1.x2, data = d) | + | lm(formula = res.y.x2 ~ res.x1.x2, data = df) |
| Residuals: | Residuals: | ||
| Line 698: | Line 695: | ||
| > | > | ||
| - | > partial.r <- pcor.test(d$y, d$x1, d$x2) | + | > partial.r <- pcor.test(df$y, df$x1, df$x2) |
| > str(partial.r) | > str(partial.r) | ||
| ' | ' | ||
| Line 718: | Line 715: | ||
| > # semipartial correlation coefficient and spr2 | > # semipartial correlation coefficient and spr2 | ||
| > # | > # | ||
| - | > spr.y.x2.x1 <- spcor.test(d$y,d$x2,d$x1) | + | > spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1) |
| - | > spr.y.x1.x2 <- spcor.test(d$y,d$x1,d$x2) | + | > spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2) |
| > spr.y.x2.x1 | > spr.y.x2.x1 | ||
| estimate p.value statistic | estimate p.value statistic | ||
| Line 733: | Line 730: | ||
| [1] 0.3189 | [1] 0.3189 | ||
| > | > | ||
| - | > lm.tmp.7 <- lm(y ~ res.x2.x1, data = d) | + | > lm.tmp.7 <- lm(y ~ res.x2.x1, data=df) |
| > summary(lm.tmp.7) | > summary(lm.tmp.7) | ||
| Call: | Call: | ||
| - | lm(formula = y ~ res.x2.x1, data = d) | + | lm(formula = y ~ res.x2.x1, data = df) |
| Residuals: | Residuals: | ||
| Line 757: | Line 754: | ||
| [1] 0.167 | [1] 0.167 | ||
| > | > | ||
| - | > lm.tmp.8 <- lm(y~res.x1.x2, | + | > lm.tmp.8 <- lm(y~res.x1.x2, |
| > summary(lm.tmp.8) | > summary(lm.tmp.8) | ||
| Call: | Call: | ||
| - | lm(formula = y ~ res.x1.x2, data = d) | + | lm(formula = y ~ res.x1.x2, data = df) |
| Residuals: | Residuals: | ||
| Line 795: | Line 792: | ||
| Call: | Call: | ||
| - | lm(formula = y ~ x2, data = d) | + | lm(formula = y ~ x2, data = df) |
| Residuals: | Residuals: | ||
| Line 825: | Line 822: | ||
| Call: | Call: | ||
| - | lm(formula = y ~ x1, data = d) | + | lm(formula = y ~ x1, data = df) |
| Residuals: | Residuals: | ||
| Line 855: | Line 852: | ||
| Call: | Call: | ||
| - | lm(formula = y ~ x1 + x2, data = d) | + | lm(formula = y ~ x1 + x2, data = df) |
| Residuals: | Residuals: | ||
| Line 917: | Line 914: | ||
| > # y). | > # y). | ||
| > ############################################# | > ############################################# | ||
| + | > | ||
| > | > | ||
| </ | </ | ||
| ====== explanation. added ====== | ====== explanation. added ====== | ||
| - | {{: | + | {{: |
| + | < | ||
| + | # ex. | ||
| + | # resid(lm(y~x1, | ||
| + | # resid(lm(y~x2, | ||
| + | # resid(lm(y~x1+x2, | ||
| + | # b / delta.y = ? | ||
| + | # ce / delta.x2 = ? | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | # exp.added | ||
| + | spcor.test(df$y, | ||
| + | spcor.test(df$y, | ||
| + | spcor.test(df$y, | ||
| + | spcor.test(df$y, | ||
| + | summary(lm(y~x1+x2, | ||
| + | |||
| + | b <- spcor.test(df$y, | ||
| + | d <- spcor.test(df$y, | ||
| + | bcd <- summary(lm(y~x1+x2, | ||
| + | |||
| + | summary(lm(df$y~df$x1+df$x2, | ||
| + | (spcor.test(df$y, | ||
| + | | ||
| + | bcd - (b + d) | ||
| + | |||
| + | </ | ||
| ====== Another one ====== | ====== Another one ====== | ||
| [[:partial and semipartial correlation]] | [[:partial and semipartial correlation]] | ||
c/ms/2025/schedule/w13.lecture.note.1749422662.txt.gz · Last modified: by hkimscil
