c:ms:2025:schedule:w13.lecture.note
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
c:ms:2025:schedule:w13.lecture.note [2025/06/09 07:11] – [output] hkimscil | c:ms:2025:schedule:w13.lecture.note [2025/06/09 08:51] (current) – [output] hkimscil | ||
---|---|---|---|
Line 5: | Line 5: | ||
# | # | ||
rm(list=ls()) | rm(list=ls()) | ||
- | d <- read.csv(" | + | df <- read.csv(" |
- | d | + | df |
- | colnames(d) <- c(" | + | colnames(df) <- c(" |
- | d | + | df |
# y = 통장갯수 | # y = 통장갯수 | ||
# x1 = 인컴 | # x1 = 인컴 | ||
# x2 = 부양가족수 | # x2 = 부양가족수 | ||
- | lm.y.x1 <- lm(y ~ x1, data=d) | + | lm.y.x1 <- lm(y ~ x1, data=df) |
summary(lm.y.x1) | summary(lm.y.x1) | ||
anova(lm.y.x1) | anova(lm.y.x1) | ||
- | cor(d$x1, d$y)^2 | + | cor(df$x1, df$y)^2 |
summary(lm.y.x1)$r.squared | summary(lm.y.x1)$r.squared | ||
- | lm.y.x2 <- lm(y ~ x2, data=d) | + | lm.y.x2 <- lm(y ~ x2, data=df) |
summary(lm.y.x2) | summary(lm.y.x2) | ||
anova(lm.y.x2) | anova(lm.y.x2) | ||
- | cor(d$x2, d$y)^2 | + | cor(df$x2, df$y)^2 |
summary(lm.y.x2)$r.squared | summary(lm.y.x2)$r.squared | ||
- | lm.y.x1x2 <- lm(y ~ x1+x2, data=d) | + | lm.y.x1x2 <- lm(y ~ x1+x2, data=df) |
summary(lm.y.x1x2) | summary(lm.y.x1x2) | ||
anova(lm.y.x1x2) | anova(lm.y.x1x2) | ||
Line 38: | Line 38: | ||
lm.y.x1x2$coefficient | lm.y.x1x2$coefficient | ||
- | # y.hat = 6.399103 + (0.01184145)*x1 + (−0.54472725)*x2 | + | # y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2 |
a <- lm.y.x1x2$coefficient[1] | a <- lm.y.x1x2$coefficient[1] | ||
b1 <- lm.y.x1x2$coefficient[2] | b1 <- lm.y.x1x2$coefficient[2] | ||
Line 46: | Line 46: | ||
b2 | b2 | ||
- | y.pred <- a + (b1 * d$x1) + (b2 * d$x2) | + | y.pred <- a + (b1 * df$x1) + (b2 * df$x2) |
y.pred | y.pred | ||
# or | # or | ||
Line 52: | Line 52: | ||
head(y.pred == y.pred2) | head(y.pred == y.pred2) | ||
- | y.real <- d$y | + | y.real <- df$y |
y.real | y.real | ||
- | y.mean <- mean(d$y) | + | y.mean <- mean(df$y) |
y.mean | y.mean | ||
- | deviation.score <- d$y - y.mean | + | deviation.score <- df$y - y.mean |
ds <- deviation.score | ds <- deviation.score | ||
res <- y.real - y.pred | res <- y.real - y.pred | ||
Line 64: | Line 64: | ||
# remember y is sum of res + reg + y.mean | # remember y is sum of res + reg + y.mean | ||
y2 <- res + reg + y.mean | y2 <- res + reg + y.mean | ||
- | d$y==y2 | + | df$y==y2 |
ss.tot <- sum(ds^2) | ss.tot <- sum(ds^2) | ||
Line 70: | Line 70: | ||
ss.reg <- sum(reg^2) | ss.reg <- sum(reg^2) | ||
- | ss.tot2 <- var(d$y) * (length(d$y)-1) | + | ss.tot2 <- var(df$y) * (length(df$y)-1) |
ss.tot | ss.tot | ||
ss.tot2 | ss.tot2 | ||
Line 78: | Line 78: | ||
k <- 3 # # of parameters a, b1, b2 | k <- 3 # # of parameters a, b1, b2 | ||
- | df.tot <- length(y)-1 | + | df.tot <- length(df$y)-1 |
df.reg <- k - 1 | df.reg <- k - 1 | ||
df.res <- df.tot - df.reg | df.res <- df.tot - df.reg | ||
Line 95: | Line 95: | ||
anova(lm.y.x1x2) | anova(lm.y.x1x2) | ||
- | summary(lm(y~x2+x1, | + | summary(lm(y~x2+x1, |
- | anova(lm(y~x2+x1, | + | anova(lm(y~x2+x1, |
# note on 2 t-tests in summary | # note on 2 t-tests in summary | ||
Line 122: | Line 122: | ||
# beta coefficient (standardized b) | # beta coefficient (standardized b) | ||
# beta <- b * (sd(x)/ | # beta <- b * (sd(x)/ | ||
- | beta1 <- b1 * (sd(x1)/ | + | beta1 <- b1 * (sd(df$x1)/sd(df$y)) |
- | beta2 <- b2 * (sd(x2)/ | + | beta2 <- b2 * (sd(df$x2)/sd(df$y)) |
beta1 | beta1 | ||
beta2 | beta2 | ||
Line 136: | Line 136: | ||
# understand with diagrams first | # understand with diagrams first | ||
# then calculate with r | # then calculate with r | ||
- | lm.tmp.1 <- lm(x2~x1, data=d) | + | lm.tmp.1 <- lm(x2~x1, data=df) |
res.x2.x1 <- lm.tmp.1$residuals | res.x2.x1 <- lm.tmp.1$residuals | ||
- | lm.tmp.2 <- lm(y~x1, data=d) | + | lm.tmp.2 <- lm(y~x1, data=df) |
res.y.x1 <- lm.tmp.2$residuals | res.y.x1 <- lm.tmp.2$residuals | ||
- | lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d) | + | lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df) |
summary(lm.tmp.3) | summary(lm.tmp.3) | ||
summary(lm.tmp.3)$r.squared | summary(lm.tmp.3)$r.squared | ||
Line 148: | Line 148: | ||
# install.packages(" | # install.packages(" | ||
library(ppcor) | library(ppcor) | ||
- | partial.r <- pcor.test(d$y, d$x2, d$x1) | + | partial.r <- pcor.test(df$y, df$x2, df$x1) |
str(partial.r) | str(partial.r) | ||
partial.r$estimate | partial.r$estimate | ||
Line 157: | Line 157: | ||
# x1's own explanation? | # x1's own explanation? | ||
- | lm.tmp.4 <- lm(x1~x2, data=d) | + | lm.tmp.4 <- lm(x1~x2, data=df) |
res.x1.x2 <- lm.tmp.4$residuals | res.x1.x2 <- lm.tmp.4$residuals | ||
- | lm.tmp.5 <- lm(y~x2, data=d) | + | lm.tmp.5 <- lm(y~x2, data=df) |
res.y.x2 <- lm.tmp.5$residuals | res.y.x2 <- lm.tmp.5$residuals | ||
- | lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d) | + | lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df) |
summary(lm.tmp.6) | summary(lm.tmp.6) | ||
- | partial.r <- pcor.test(d$y, d$x1, d$x2) | + | partial.r <- pcor.test(df$y, df$x1, df$x2) |
str(partial.r) | str(partial.r) | ||
partial.r$estimate # this is partial correlation, | partial.r$estimate # this is partial correlation, | ||
Line 176: | Line 176: | ||
# semipartial correlation coefficient and spr2 | # semipartial correlation coefficient and spr2 | ||
# | # | ||
- | spr.y.x2.x1 <- spcor.test(d$y,d$x2,d$x1) | + | spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1) |
- | spr.y.x1.x2 <- spcor.test(d$y,d$x1,d$x2) | + | spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2) |
spr.y.x2.x1 | spr.y.x2.x1 | ||
spr.y.x1.x2 | spr.y.x1.x2 | ||
Line 185: | Line 185: | ||
spr2.y.x1.x2 | spr2.y.x1.x2 | ||
- | lm.tmp.7 <- lm(y ~ res.x2.x1, data = d) | + | lm.tmp.7 <- lm(y ~ res.x2.x1, data=df) |
summary(lm.tmp.7) | summary(lm.tmp.7) | ||
spr2.y.x2.x1 | spr2.y.x2.x1 | ||
- | lm.tmp.8 <- lm(y~res.x1.x2, | + | lm.tmp.8 <- lm(y~res.x1.x2, |
summary(lm.tmp.8) | summary(lm.tmp.8) | ||
spr2.y.x1.x2 | spr2.y.x1.x2 | ||
Line 250: | Line 250: | ||
# y). | # y). | ||
############################################# | ############################################# | ||
- | |||
Line 260: | Line 259: | ||
> # | > # | ||
> rm(list=ls()) | > rm(list=ls()) | ||
- | > d <- read.csv(" | + | > df <- read.csv(" |
- | > d | + | > df |
| | ||
1 6 220 5 | 1 6 220 5 | ||
Line 274: | Line 273: | ||
10 | 10 | ||
> | > | ||
- | > colnames(d) <- c(" | + | > colnames(df) <- c(" |
- | > d | + | > df |
y x1 x2 | y x1 x2 | ||
1 6 220 5 | 1 6 220 5 | ||
Line 290: | Line 289: | ||
> # x1 = 인컴 | > # x1 = 인컴 | ||
> # x2 = 부양가족수 | > # x2 = 부양가족수 | ||
- | > lm.y.x1 <- lm(y ~ x1, data=d) | + | > lm.y.x1 <- lm(y ~ x1, data=df) |
> summary(lm.y.x1) | > summary(lm.y.x1) | ||
Call: | Call: | ||
- | lm(formula = y ~ x1, data = d) | + | lm(formula = y ~ x1, data = df) |
Residuals: | Residuals: | ||
Line 320: | Line 319: | ||
--- | --- | ||
Signif. codes: | Signif. codes: | ||
- | > cor(d$x1, d$y)^2 | + | > cor(df$x1, df$y)^2 |
[1] 0.6311 | [1] 0.6311 | ||
> summary(lm.y.x1)$r.squared | > summary(lm.y.x1)$r.squared | ||
Line 326: | Line 325: | ||
> | > | ||
> | > | ||
- | > lm.y.x2 <- lm(y ~ x2, data=d) | + | > lm.y.x2 <- lm(y ~ x2, data=df) |
> summary(lm.y.x2) | > summary(lm.y.x2) | ||
Call: | Call: | ||
- | lm(formula = y ~ x2, data = d) | + | lm(formula = y ~ x2, data = df) |
Residuals: | Residuals: | ||
Line 356: | Line 355: | ||
--- | --- | ||
Signif. codes: | Signif. codes: | ||
- | > cor(d$x2, d$y)^2 | + | > cor(df$x2, df$y)^2 |
[1] 0.4793 | [1] 0.4793 | ||
> summary(lm.y.x2)$r.squared | > summary(lm.y.x2)$r.squared | ||
Line 362: | Line 361: | ||
> | > | ||
> | > | ||
- | > lm.y.x1x2 <- lm(y ~ x1+x2, data=d) | + | > lm.y.x1x2 <- lm(y ~ x1+x2, data=df) |
> summary(lm.y.x1x2) | > summary(lm.y.x1x2) | ||
Call: | Call: | ||
- | lm(formula = y ~ x1 + x2, data = d) | + | lm(formula = y ~ x1 + x2, data = df) |
Residuals: | Residuals: | ||
Line 407: | Line 406: | ||
(Intercept) | (Intercept) | ||
6.39910 | 6.39910 | ||
- | > # y.hat = 6.399103 + (0.01184145)*x1 + (−0.54472725)*x2 | + | > # y.hat = 6.399103 + (0.01184145)*x1 + (?0.54472725)*x2 |
> a <- lm.y.x1x2$coefficient[1] | > a <- lm.y.x1x2$coefficient[1] | ||
> b1 <- lm.y.x1x2$coefficient[2] | > b1 <- lm.y.x1x2$coefficient[2] | ||
Line 421: | Line 420: | ||
-0.5447 | -0.5447 | ||
> | > | ||
- | > y.pred <- a + (b1 * d$x1) + (b2 * d$x2) | + | > y.pred <- a + (b1 * df$x1) + (b2 * df$x2) |
> y.pred | > y.pred | ||
| | ||
Line 430: | Line 429: | ||
TRUE TRUE TRUE TRUE TRUE TRUE | TRUE TRUE TRUE TRUE TRUE TRUE | ||
> | > | ||
- | > y.real <- d$y | + | > y.real <- df$y |
> y.real | > y.real | ||
| | ||
- | > y.mean <- mean(d$y) | + | > y.mean <- mean(df$y) |
> y.mean | > y.mean | ||
[1] 8 | [1] 8 | ||
> | > | ||
- | > deviation.score <- d$y - y.mean | + | > deviation.score <- df$y - y.mean |
> ds <- deviation.score | > ds <- deviation.score | ||
> res <- y.real - y.pred | > res <- y.real - y.pred | ||
Line 445: | Line 444: | ||
> # remember y is sum of res + reg + y.mean | > # remember y is sum of res + reg + y.mean | ||
> y2 <- res + reg + y.mean | > y2 <- res + reg + y.mean | ||
- | > d$y==y2 | + | > df$y==y2 |
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE | [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE | ||
> | > | ||
Line 452: | Line 451: | ||
> ss.reg <- sum(reg^2) | > ss.reg <- sum(reg^2) | ||
> | > | ||
- | > ss.tot2 <- var(d$y) * (length(d$y)-1) | + | > ss.tot2 <- var(df$y) * (length(df$y)-1) |
> ss.tot | > ss.tot | ||
[1] 30 | [1] 30 | ||
Line 465: | Line 464: | ||
> | > | ||
> k <- 3 # # of parameters a, b1, b2 | > k <- 3 # # of parameters a, b1, b2 | ||
- | > df.tot <- length(d$y)-1 | + | > df.tot <- length(df$y)-1 |
> df.reg <- k - 1 | > df.reg <- k - 1 | ||
> df.res <- df.tot - df.reg | > df.res <- df.tot - df.reg | ||
Line 486: | Line 485: | ||
Call: | Call: | ||
- | lm(formula = y ~ x1 + x2, data = d) | + | lm(formula = y ~ x1 + x2, data = df) |
Residuals: | Residuals: | ||
Line 515: | Line 514: | ||
Signif. codes: | Signif. codes: | ||
> | > | ||
- | > summary(lm(y~x2+x1, | + | > summary(lm(y~x2+x1, |
Call: | Call: | ||
- | lm(formula = y ~ x2 + x1, data = d) | + | lm(formula = y ~ x2 + x1, data = df) |
Residuals: | Residuals: | ||
Line 536: | Line 535: | ||
F-statistic: | F-statistic: | ||
- | > anova(lm(y~x2+x1, | + | > anova(lm(y~x2+x1, |
Analysis of Variance Table | Analysis of Variance Table | ||
Line 571: | Line 570: | ||
> # beta coefficient (standardized b) | > # beta coefficient (standardized b) | ||
> # beta <- b * (sd(x)/ | > # beta <- b * (sd(x)/ | ||
- | > beta1 <- b1 * (sd(d$x1)/sd(d$y)) | + | > beta1 <- b1 * (sd(df$x1)/sd(df$y)) |
- | > beta2 <- b2 * (sd(d$x2)/sd(d$y)) | + | > beta2 <- b2 * (sd(df$x2)/sd(df$y)) |
> beta1 | > beta1 | ||
x1 | x1 | ||
Line 582: | Line 581: | ||
> # install.packages(" | > # install.packages(" | ||
> library(lm.beta) | > library(lm.beta) | ||
- | 경고메시지(들): | ||
- | 패키지 ‘lm.beta’는 R 버전 4.3.3에서 작성되었습니다 | ||
> lm.beta(lm.y.x1x2) | > lm.beta(lm.y.x1x2) | ||
Call: | Call: | ||
- | lm(formula = y ~ x1 + x2, data = d) | + | lm(formula = y ~ x1 + x2, data = df) |
Standardized Coefficients:: | Standardized Coefficients:: | ||
Line 599: | Line 596: | ||
> # understand with diagrams first | > # understand with diagrams first | ||
> # then calculate with r | > # then calculate with r | ||
- | > lm.tmp.1 <- lm(x2~x1, data=d) | + | > lm.tmp.1 <- lm(x2~x1, data=df) |
> res.x2.x1 <- lm.tmp.1$residuals | > res.x2.x1 <- lm.tmp.1$residuals | ||
> | > | ||
- | > lm.tmp.2 <- lm(y~x1, data=d) | + | > lm.tmp.2 <- lm(y~x1, data=df) |
> res.y.x1 <- lm.tmp.2$residuals | > res.y.x1 <- lm.tmp.2$residuals | ||
> | > | ||
- | > lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=d) | + | > lm.tmp.3 <- lm(res.y.x1 ~ res.x2.x1, data=df) |
> summary(lm.tmp.3) | > summary(lm.tmp.3) | ||
Call: | Call: | ||
- | lm(formula = res.y.x1 ~ res.x2.x1, data = d) | + | lm(formula = res.y.x1 ~ res.x2.x1, data = df) |
Residuals: | Residuals: | ||
Line 632: | Line 629: | ||
> # install.packages(" | > # install.packages(" | ||
> library(ppcor) | > library(ppcor) | ||
- | > partial.r <- pcor.test(d$y, d$x2, d$x1) | + | > partial.r <- pcor.test(df$y, df$x2, df$x1) |
> str(partial.r) | > str(partial.r) | ||
' | ' | ||
Line 646: | Line 643: | ||
Call: | Call: | ||
- | lm(formula = res.y.x1 ~ res.x2.x1, data = d) | + | lm(formula = res.y.x1 ~ res.x2.x1, data = df) |
Residuals: | Residuals: | ||
Line 670: | Line 667: | ||
> | > | ||
> # x1's own explanation? | > # x1's own explanation? | ||
- | > lm.tmp.4 <- lm(x1~x2, data=d) | + | > lm.tmp.4 <- lm(x1~x2, data=df) |
> res.x1.x2 <- lm.tmp.4$residuals | > res.x1.x2 <- lm.tmp.4$residuals | ||
> | > | ||
- | > lm.tmp.5 <- lm(y~x2, data=d) | + | > lm.tmp.5 <- lm(y~x2, data=df) |
> res.y.x2 <- lm.tmp.5$residuals | > res.y.x2 <- lm.tmp.5$residuals | ||
> | > | ||
- | > lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=d) | + | > lm.tmp.6 <- lm(res.y.x2 ~ res.x1.x2, data=df) |
> summary(lm.tmp.6) | > summary(lm.tmp.6) | ||
Call: | Call: | ||
- | lm(formula = res.y.x2 ~ res.x1.x2, data = d) | + | lm(formula = res.y.x2 ~ res.x1.x2, data = df) |
Residuals: | Residuals: | ||
Line 698: | Line 695: | ||
> | > | ||
- | > partial.r <- pcor.test(d$y, d$x1, d$x2) | + | > partial.r <- pcor.test(df$y, df$x1, df$x2) |
> str(partial.r) | > str(partial.r) | ||
' | ' | ||
Line 718: | Line 715: | ||
> # semipartial correlation coefficient and spr2 | > # semipartial correlation coefficient and spr2 | ||
> # | > # | ||
- | > spr.y.x2.x1 <- spcor.test(d$y,d$x2,d$x1) | + | > spr.y.x2.x1 <- spcor.test(df$y,df$x2,df$x1) |
- | > spr.y.x1.x2 <- spcor.test(d$y,d$x1,d$x2) | + | > spr.y.x1.x2 <- spcor.test(df$y,df$x1,df$x2) |
> spr.y.x2.x1 | > spr.y.x2.x1 | ||
estimate p.value statistic | estimate p.value statistic | ||
Line 733: | Line 730: | ||
[1] 0.3189 | [1] 0.3189 | ||
> | > | ||
- | > lm.tmp.7 <- lm(y ~ res.x2.x1, data = d) | + | > lm.tmp.7 <- lm(y ~ res.x2.x1, data=df) |
> summary(lm.tmp.7) | > summary(lm.tmp.7) | ||
Call: | Call: | ||
- | lm(formula = y ~ res.x2.x1, data = d) | + | lm(formula = y ~ res.x2.x1, data = df) |
Residuals: | Residuals: | ||
Line 757: | Line 754: | ||
[1] 0.167 | [1] 0.167 | ||
> | > | ||
- | > lm.tmp.8 <- lm(y~res.x1.x2, | + | > lm.tmp.8 <- lm(y~res.x1.x2, |
> summary(lm.tmp.8) | > summary(lm.tmp.8) | ||
Call: | Call: | ||
- | lm(formula = y ~ res.x1.x2, data = d) | + | lm(formula = y ~ res.x1.x2, data = df) |
Residuals: | Residuals: | ||
Line 795: | Line 792: | ||
Call: | Call: | ||
- | lm(formula = y ~ x2, data = d) | + | lm(formula = y ~ x2, data = df) |
Residuals: | Residuals: | ||
Line 825: | Line 822: | ||
Call: | Call: | ||
- | lm(formula = y ~ x1, data = d) | + | lm(formula = y ~ x1, data = df) |
Residuals: | Residuals: | ||
Line 855: | Line 852: | ||
Call: | Call: | ||
- | lm(formula = y ~ x1 + x2, data = d) | + | lm(formula = y ~ x1 + x2, data = df) |
Residuals: | Residuals: | ||
Line 918: | Line 915: | ||
> ############################################# | > ############################################# | ||
> | > | ||
+ | > | ||
+ | </ | ||
+ | |||
+ | ====== explanation. added ====== | ||
+ | {{: | ||
+ | < | ||
+ | # ex. | ||
+ | # resid(lm(y~x1, | ||
+ | # resid(lm(y~x2, | ||
+ | # resid(lm(y~x1+x2, | ||
+ | # b / delta.y = ? | ||
+ | # ce / delta.x2 = ? | ||
+ | </ | ||
+ | |||
+ | < | ||
+ | # exp.added | ||
+ | spcor.test(df$y, | ||
+ | spcor.test(df$y, | ||
+ | spcor.test(df$y, | ||
+ | spcor.test(df$y, | ||
+ | summary(lm(y~x1+x2, | ||
+ | |||
+ | b <- spcor.test(df$y, | ||
+ | d <- spcor.test(df$y, | ||
+ | bcd <- summary(lm(y~x1+x2, | ||
+ | |||
+ | summary(lm(df$y~df$x1+df$x2, | ||
+ | (spcor.test(df$y, | ||
+ | | ||
+ | bcd - (b + d) | ||
+ | |||
</ | </ | ||
c/ms/2025/schedule/w13.lecture.note.1749420697.txt.gz · Last modified: 2025/06/09 07:11 by hkimscil