c:ms:2025:schedule:w13.lecture.note
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revision | |||
c:ms:2025:schedule:w13.lecture.note [2025/06/09 08:51] – [MR] hkimscil | c:ms:2025:schedule:w13.lecture.note [2025/06/09 08:51] (current) – [output] hkimscil | ||
---|---|---|---|
Line 259: | Line 259: | ||
> # | > # | ||
> rm(list=ls()) | > rm(list=ls()) | ||
- | > d <- read.csv(" | + | > df <- read.csv(" |
- | > d | + | > df |
| | ||
1 6 220 5 | 1 6 220 5 | ||
Line 273: | Line 273: | ||
10 | 10 | ||
> | > | ||
- | > colnames(d) <- c(" | + | > colnames(df) <- c(" |
- | > d | + | > df |
y x1 x2 | y x1 x2 | ||
1 6 220 5 | 1 6 220 5 | ||
Line 289: | Line 289: | ||
> # x1 = 인컴 | > # x1 = 인컴 | ||
> # x2 = 부양가족수 | > # x2 = 부양가족수 | ||
- | > lm.y.x1 <- lm(y ~ x1, data=d) | + | > lm.y.x1 <- lm(y ~ x1, data=df) |
> summary(lm.y.x1) | > summary(lm.y.x1) | ||
Call: | Call: | ||
- | lm(formula = y ~ x1, data = d) | + | lm(formula = y ~ x1, data = df) |
Residuals: | Residuals: | ||
Line 319: | Line 319: | ||
--- | --- | ||
Signif. codes: | Signif. codes: | ||
- | > cor(d$x1, d$y)^2 | + | > cor(df$x1, df$y)^2 |
[1] 0.6311 | [1] 0.6311 | ||
> summary(lm.y.x1)$r.squared | > summary(lm.y.x1)$r.squared | ||
Line 325: | Line 325: | ||
> | > | ||
> | > | ||
- | > lm.y.x2 <- lm(y ~ x2, data=d) | + | > lm.y.x2 <- lm(y ~ x2, data=df) |
> summary(lm.y.x2) | > summary(lm.y.x2) | ||
Call: | Call: | ||
- | lm(formula = y ~ x2, data = d) | + | lm(formula = y ~ x2, data = df) |
Residuals: | Residuals: | ||
Line 355: | Line 355: | ||
--- | --- | ||
Signif. codes: | Signif. codes: | ||
- | > cor(d$x2, d$y)^2 | + | > cor(df$x2, df$y)^2 |
[1] 0.4793 | [1] 0.4793 | ||
> summary(lm.y.x2)$r.squared | > summary(lm.y.x2)$r.squared | ||
Line 361: | Line 361: | ||
> | > | ||
> | > | ||
- | > lm.y.x1x2 <- lm(y ~ x1+x2, data=d) | + | > lm.y.x1x2 <- lm(y ~ x1+x2, data=df) |
> summary(lm.y.x1x2) | > summary(lm.y.x1x2) | ||
Call: | Call: | ||
- | lm(formula = y ~ x1 + x2, data = d) | + | lm(formula = y ~ x1 + x2, data = df) |
Residuals: | Residuals: | ||
Line 406: | Line 406: | ||
(Intercept) | (Intercept) | ||
6.39910 | 6.39910 | ||
- | > # y.hat = 6.399103 + (0.01184145)*x1 + (−0.54472725)*x2 | + | > # y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2 |
> a <- lm.y.x1x2$coefficient[1] | > a <- lm.y.x1x2$coefficient[1] | ||
> b1 <- lm.y.x1x2$coefficient[2] | > b1 <- lm.y.x1x2$coefficient[2] | ||
Line 420: | Line 420: | ||
-0.5447 | -0.5447 | ||
> | > | ||
- | > y.pred <- a + (b1 * d$x1) + (b2 * d$x2) | + | > y.pred <- a + (b1 * df$x1) + (b2 * df$x2) |
> y.pred | > y.pred | ||
| | ||
Line 429: | Line 429: | ||
TRUE TRUE TRUE TRUE TRUE TRUE | TRUE TRUE TRUE TRUE TRUE TRUE | ||
> | > | ||
- | > y.real <- d$y | + | > y.real <- df$y |
> y.real | > y.real | ||
| | ||
- | > y.mean <- mean(d$y) | + | > y.mean <- mean(df$y) |
> y.mean | > y.mean | ||
[1] 8 | [1] 8 | ||
> | > | ||
- | > deviation.score <- d$y - y.mean | + | > deviation.score <- df$y - y.mean |
> ds <- deviation.score | > ds <- deviation.score | ||
> res <- y.real - y.pred | > res <- y.real - y.pred | ||
Line 444: | Line 444: | ||
> # remember y is sum of res + reg + y.mean | > # remember y is sum of res + reg + y.mean | ||
> y2 <- res + reg + y.mean | > y2 <- res + reg + y.mean | ||
- | > d$y==y2 | + | > df$y==y2 |
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE | [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE | ||
> | > | ||
Line 451: | Line 451: | ||
> ss.reg <- sum(reg^2) | > ss.reg <- sum(reg^2) | ||
> | > | ||
- | > ss.tot2 <- var(d$y) * (length(d$y)-1) | + | > ss.tot2 <- var(df$y) * (length(df$y)-1) |
> ss.tot | > ss.tot | ||
[1] 30 | [1] 30 | ||
Line 464: | Line 464: | ||
> | > | ||
> k <- 3 # # of parameters a, b1, b2 | > k <- 3 # # of parameters a, b1, b2 | ||
- | > df.tot <- length(d$y)-1 | + | > df.tot <- length(df$y)-1 |
> df.reg <- k - 1 | > df.reg <- k - 1 | ||
> df.res <- df.tot - df.reg | > df.res <- df.tot - df.reg | ||
Line 485: | Line 485: | ||
Call: | Call: | ||
- | lm(formula = y ~ x1 + x2, data = d) | + | lm(formula = y ~ x1 + x2, data = df) |
Residuals: | Residuals: | ||
Line 514: | Line 514: | ||
Signif. codes: | Signif. codes: | ||
> | > | ||
- | > summary(lm(y~x2+x1, | + | > summary(lm(y~x2+x1, |
Call: | Call: | ||
- | lm(formula = y ~ x2 + x1, data = d) | + | lm(formula = y ~ x2 + x1, data = df) |
Residuals: | Residuals: | ||
Line 535: | Line 535: | ||
F-statistic: | F-statistic: | ||
- | > anova(lm(y~x2+x1, | + | > anova(lm(y~x2+x1, |
Analysis of Variance Table | Analysis of Variance Table | ||
Line 570: | Line 570: | ||
> # beta coefficient (standardized b) | > # beta coefficient (standardized b) | ||
> # beta <- b * (sd(x)/ | > # beta <- b * (sd(x)/ | ||
- | > beta1 <- b1 * (sd(d$x1)/sd(d$y)) | + | > beta1 <- b1 * (sd(df$x1)/sd(df$y)) |
- | > beta2 <- b2 * (sd(d$x2)/sd(d$y)) | + | > beta2 <- b2 * (sd(df$x2)/sd(df$y)) |
> beta1 | > beta1 | ||
x1 | x1 | ||
Line 581: | Line 581: | ||
> # install.packages(" | > # install.packages(" | ||
> library(lm.beta) | > library(lm.beta) | ||
- | 경고메시지(들): | ||
- | 패키지 ‘lm.beta’는 R 버전 4.3.3에서 작성되었습니다 | ||
> lm.beta(lm.y.x1x2) | > lm.beta(lm.y.x1x2) | ||
Call: | Call: | ||
- | lm(formula = y ~ x1 + x2, data = d) | + | lm(formula = y ~ x1 + x2, data = df) |
Standardized Coefficients:: | Standardized Coefficients:: | ||
Line 598: | Line 596: | ||
> # understand with diagrams first | > # understand with diagrams first | ||
> # then calculate with r | > # then calculate with r | ||
- | > lm.tmp.1 <- lm(x2~x1, data=d) | + | > lm.tmp.1 <- lm(x2~x1, data=df) |
> res.x2.x1 <- lm.tmp.1$residuals | > res.x2.x1 <- lm.tmp.1$residuals | ||
> | > | ||
- | > lm.tmp.2 <- lm(y~x1, data=d) | + | > lm.tmp.2 <- lm(y~x1, data=df) |
> res.y.x1 <- lm.tmp.2$residuals | > res.y.x1 <- lm.tmp.2$residuals | ||
> | > | ||
- | > lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d) | + | > lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df) |
> summary(lm.tmp.3) | > summary(lm.tmp.3) | ||
Call: | Call: | ||
- | lm(formula = res.y.x1 ~ res.x2.x1, data = d) | + | lm(formula = res.y.x1 ~ res.x2.x1, data = df) |
Residuals: | Residuals: | ||
Line 631: | Line 629: | ||
> # install.packages(" | > # install.packages(" | ||
> library(ppcor) | > library(ppcor) | ||
- | > partial.r <- pcor.test(d$y, d$x2, d$x1) | + | > partial.r <- pcor.test(df$y, df$x2, df$x1) |
> str(partial.r) | > str(partial.r) | ||
' | ' | ||
Line 645: | Line 643: | ||
Call: | Call: | ||
- | lm(formula = res.y.x1 ~ res.x2.x1, data = d) | + | lm(formula = res.y.x1 ~ res.x2.x1, data = df) |
Residuals: | Residuals: | ||
Line 669: | Line 667: | ||
> | > | ||
> # x1's own explanation? | > # x1's own explanation? | ||
- | > lm.tmp.4 <- lm(x1~x2, data=d) | + | > lm.tmp.4 <- lm(x1~x2, data=df) |
> res.x1.x2 <- lm.tmp.4$residuals | > res.x1.x2 <- lm.tmp.4$residuals | ||
> | > | ||
- | > lm.tmp.5 <- lm(y~x2, data=d) | + | > lm.tmp.5 <- lm(y~x2, data=df) |
> res.y.x2 <- lm.tmp.5$residuals | > res.y.x2 <- lm.tmp.5$residuals | ||
> | > | ||
- | > lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d) | + | > lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df) |
> summary(lm.tmp.6) | > summary(lm.tmp.6) | ||
Call: | Call: | ||
- | lm(formula = res.y.x2 ~ res.x1.x2, data = d) | + | lm(formula = res.y.x2 ~ res.x1.x2, data = df) |
Residuals: | Residuals: | ||
Line 697: | Line 695: | ||
> | > | ||
- | > partial.r <- pcor.test(d$y, d$x1, d$x2) | + | > partial.r <- pcor.test(df$y, df$x1, df$x2) |
> str(partial.r) | > str(partial.r) | ||
' | ' | ||
Line 717: | Line 715: | ||
> # semipartial correlation coefficient and spr2 | > # semipartial correlation coefficient and spr2 | ||
> # | > # | ||
- | > spr.y.x2.x1 <- spcor.test(d$y,d$x2,d$x1) | + | > spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1) |
- | > spr.y.x1.x2 <- spcor.test(d$y,d$x1,d$x2) | + | > spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2) |
> spr.y.x2.x1 | > spr.y.x2.x1 | ||
estimate p.value statistic | estimate p.value statistic | ||
Line 732: | Line 730: | ||
[1] 0.3189 | [1] 0.3189 | ||
> | > | ||
- | > lm.tmp.7 <- lm(y ~ res.x2.x1, data = d) | + | > lm.tmp.7 <- lm(y ~ res.x2.x1, data=df) |
> summary(lm.tmp.7) | > summary(lm.tmp.7) | ||
Call: | Call: | ||
- | lm(formula = y ~ res.x2.x1, data = d) | + | lm(formula = y ~ res.x2.x1, data = df) |
Residuals: | Residuals: | ||
Line 756: | Line 754: | ||
[1] 0.167 | [1] 0.167 | ||
> | > | ||
- | > lm.tmp.8 <- lm(y~res.x1.x2, | + | > lm.tmp.8 <- lm(y~res.x1.x2, |
> summary(lm.tmp.8) | > summary(lm.tmp.8) | ||
Call: | Call: | ||
- | lm(formula = y ~ res.x1.x2, data = d) | + | lm(formula = y ~ res.x1.x2, data = df) |
Residuals: | Residuals: | ||
Line 794: | Line 792: | ||
Call: | Call: | ||
- | lm(formula = y ~ x2, data = d) | + | lm(formula = y ~ x2, data = df) |
Residuals: | Residuals: | ||
Line 824: | Line 822: | ||
Call: | Call: | ||
- | lm(formula = y ~ x1, data = d) | + | lm(formula = y ~ x1, data = df) |
Residuals: | Residuals: | ||
Line 854: | Line 852: | ||
Call: | Call: | ||
- | lm(formula = y ~ x1 + x2, data = d) | + | lm(formula = y ~ x1 + x2, data = df) |
Residuals: | Residuals: | ||
Line 916: | Line 914: | ||
> # y). | > # y). | ||
> ############################################# | > ############################################# | ||
+ | > | ||
> | > | ||
</ | </ |
c/ms/2025/schedule/w13.lecture.note.txt · Last modified: 2025/06/09 08:51 by hkimscil