c:ms:2025:schedule:w11.lecture.note
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
c:ms:2025:schedule:w11.lecture.note [2025/05/21 08:57] – [code output] hkimscil | c:ms:2025:schedule:w11.lecture.note [2025/05/21 08:59] (current) – [code] hkimscil | ||
---|---|---|---|
Line 233: | Line 233: | ||
b <- -10 | b <- -10 | ||
n.y <- n.x | n.y <- n.x | ||
- | re <- rnorm(n.y, 0, 12) | + | re <- rnorm(n.y, 0, 20) |
y <- (a*x) + b + re | y <- (a*x) + b + re | ||
y | y | ||
Line 487: | Line 487: | ||
> b <- -10 | > b <- -10 | ||
> n.y <- n.x | > n.y <- n.x | ||
- | > re <- rnorm(n.y, 0, 12) | + | > re <- rnorm(n.y, 0, 20) |
> y <- (a*x) + b + re | > y <- (a*x) + b + re | ||
> y | > y | ||
[,1] | [,1] | ||
- | | + | |
- | | + | |
- | | + | |
- | | + | |
- | | + | |
- | | + | |
- | | + | |
- | | + | |
- | | + | |
- | [10,] 180.9157 | + | [10,] 182.5276 |
- | [11,] 190.5796 | + | [11,] 194.5656 |
- | [12,] 223.6552 | + | [12,] 232.4421 |
- | [13,] 240.9688 | + | [13,] 255.3070 |
- | [14,] 203.2454 | + | [14,] 209.0313 |
- | [15,] 217.8105 | + | [15,] 219.1602 |
- | [16,] 181.9690 | + | [16,] 181.7819 |
- | [17,] 204.7766 | + | [17,] 214.3124 |
- | [18,] 178.9262 | + | [18,] 187.2961 |
- | [19,] 196.2171 | + | [19,] 207.3581 |
- | [20,] 225.5945 | + | [20,] 235.0318 |
- | [21,] 200.1858 | + | [21,] 199.2244 |
- | [22,] 200.7760 | + | [22,] 208.1031 |
- | [23,] 188.6682 | + | [23,] 182.2575 |
- | [24,] 199.9381 | + | [24,] 201.4683 |
- | [25,] 176.0522 | + | [25,] 169.6124 |
- | [26,] 183.6903 | + | [26,] 173.7941 |
- | [27,] 165.2536 | + | [27,] 161.0332 |
- | [28,] 156.5911 | + | [28,] 152.1292 |
- | [29,] 160.9614 | + | [29,] 153.4874 |
- | [30,] 146.8763 | + | [30,] 147.7652 |
- | [31,] 202.7808 | + | [31,] 189.2685 |
- | [32,] 188.7577 | + | [32,] 188.5691 |
- | [33,] 157.6145 | + | [33,] 155.5620 |
- | [34,] 176.6427 | + | [34,] 172.8731 |
- | [35,] 187.3785 | + | [35,] 184.9119 |
- | [36,] 182.3525 | + | [36,] 183.3182 |
- | [37,] 129.7807 | + | [37,] 119.8266 |
- | [38,] 173.6276 | + | [38,] 162.6349 |
- | [39,] 171.7040 | + | [39,] 170.0872 |
- | [40,] 160.5982 | + | [40,] 150.7684 |
attr(," | attr(," | ||
[1] -0.08680858 | [1] -0.08680858 | ||
Line 540: | Line 540: | ||
> ss.y <- sum((y-m.y)^2) | > ss.y <- sum((y-m.y)^2) | ||
> ss.y/df.y | > ss.y/df.y | ||
- | [1] 567.0313 | + | [1] 804.2779 |
> var(y) | > var(y) | ||
[,1] | [,1] | ||
- | [1,] 567.0313 | + | [1,] 804.2779 |
> | > | ||
> # | > # | ||
Line 549: | Line 549: | ||
> sp <- sum(prod) | > sp <- sum(prod) | ||
> sp/df.y # covariance 값 | > sp/df.y # covariance 값 | ||
- | [1] 213.432 | + | [1] 222.3867 |
> cov(x,y) # covariance 펑션 | > cov(x,y) # covariance 펑션 | ||
- | | + | [,1] |
- | [1,] 213.432 | + | [1,] 222.3867 |
> | > | ||
> s.x <- sd(x) # x변인에 대한 standard deviation 값 | > s.x <- sd(x) # x변인에 대한 standard deviation 값 | ||
> s.y <- sd(y) # y | > s.y <- sd(y) # y | ||
> (sp/ | > (sp/ | ||
- | [1] 0.8963055 | + | [1] 0.7841619 |
> cor(x,y) # correlation 펑션 | > cor(x,y) # correlation 펑션 | ||
[,1] | [,1] | ||
- | [1,] 0.8963055 | + | [1,] 0.7841619 |
> | > | ||
> df.y | > df.y | ||
Line 569: | Line 569: | ||
> cov(x, | > cov(x, | ||
[,1] | [,1] | ||
- | [1,] 0.8963055 | + | [1,] 0.7841619 |
> # covariance를 각각의 sd값을 곱한여 나눠 준 값이 r | > # covariance를 각각의 sd값을 곱한여 나눠 준 값이 r | ||
> (sp/ | > (sp/ | ||
- | [1] 0.8963055 | + | [1] 0.7841619 |
> # 분모 분자에서 n-1을 제거하고 보면 | > # 분모 분자에서 n-1을 제거하고 보면 | ||
> sp/ | > sp/ | ||
- | [1] 0.8963055 | + | [1] 0.7841619 |
> cor(x,y) | > cor(x,y) | ||
[,1] | [,1] | ||
- | [1,] 0.8963055 | + | [1,] 0.7841619 |
> | > | ||
> # graph with ggplot2 | > # graph with ggplot2 | ||
Line 600: | Line 600: | ||
Residuals: | Residuals: | ||
- | Min | + | |
- | -21.5280 | + | -35.880 -11.932 -0.458 12.199 34.148 |
Coefficients: | Coefficients: | ||
Estimate Std. Error t value Pr(> | Estimate Std. Error t value Pr(> | ||
- | (Intercept) -24.3920 17.2128 -1.417 0.165 | + | (Intercept) -33.9867 28.6879 -1.185 0.243 |
- | x 2.1343 0.1713 12.460 5.42e-15 *** | + | x 2.2239 0.2855 7.790 2.16e-09 *** |
--- | --- | ||
Signif. codes: | Signif. codes: | ||
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | ||
- | Residual standard error: | + | Residual standard error: |
- | Multiple R-squared: | + | Multiple R-squared: |
- | F-statistic: | + | F-statistic: |
> a <- summary(lm.df)$coefficients[1] | > a <- summary(lm.df)$coefficients[1] | ||
> b <- summary(lm.df)$coefficients[2] | > b <- summary(lm.df)$coefficients[2] | ||
> a | > a | ||
- | [1] -24.392 | + | [1] -33.98667 |
> b | > b | ||
- | [1] 2.13432 | + | [1] 2.223867 |
> # how to derive a and b | > # how to derive a and b | ||
> # how to derive coefficient values | > # how to derive coefficient values | ||
Line 631: | Line 631: | ||
> | > | ||
> b3 | > b3 | ||
- | | + | [,1] |
- | [1,] 2.13432 | + | [1,] 2.223867 |
> b2 | > b2 | ||
- | [1] 2.13432 | + | [1] 2.223867 |
> b | > b | ||
- | [1] 2.13432 | + | [1] 2.223867 |
> | > | ||
> a2 | > a2 | ||
- | [1] -24.392 | + | [1] -33.98667 |
> a | > a | ||
- | [1] -24.392 | + | [1] -33.98667 |
> | > | ||
> # | > # | ||
Line 650: | Line 650: | ||
> head(y) | > head(y) | ||
[,1] | [,1] | ||
- | [1,] 180.9286 | + | [1,] 175.7367 |
- | [2,] 202.1895 | + | [2,] 200.0202 |
- | [3,] 229.4630 | + | [3,] 241.0439 |
- | [4,] 176.1518 | + | [4,] 166.0401 |
- | [5,] 196.4237 | + | [5,] 195.5682 |
- | [6,] 220.8350 | + | [6,] 210.8411 |
> head(y.hat) | > head(y.hat) | ||
[,1] | [,1] | ||
- | [1,] 187.6701 | + | [1,] 186.9727 |
- | [2,] 205.5206 | + | [2,] 205.5720 |
- | [3,] 212.6152 | + | [3,] 212.9643 |
- | [4,] 190.4480 | + | [4,] 189.8671 |
- | [5,] 197.2646 | + | [5,] 196.9697 |
- | [6,] 237.9436 | + | [6,] 239.3554 |
> m.y | > m.y | ||
- | [1] 189.04 | + | [1] 188.4 |
> | > | ||
> tmp <- data.frame(x, | > tmp <- data.frame(x, | ||
> head(tmp) | > head(tmp) | ||
x y y.hat | x y y.hat | ||
- | 1 99.35817 | + | 1 99.35817 |
- | 2 107.72169 | + | 2 107.72169 |
- | 3 111.04574 | + | 3 111.04574 |
- | 4 100.65968 | + | 4 100.65968 |
- | 5 103.85350 | + | 5 103.85350 |
- | 6 122.91296 | + | 6 122.91296 |
> plot(x,y) # 원래 데이터 | > plot(x,y) # 원래 데이터 | ||
> plot(x, | > plot(x, | ||
Line 684: | Line 684: | ||
> tmp2 <- data.frame(tmp, | > tmp2 <- data.frame(tmp, | ||
> head(tmp2) | > head(tmp2) | ||
- | x y y.hat explained | + | x y y.hat explained |
- | 1 99.35817 | + | 1 99.35817 |
- | 2 107.72169 | + | 2 107.72169 |
- | 3 111.04574 | + | 3 111.04574 |
- | 4 100.65968 | + | 4 100.65968 |
- | 5 103.85350 196.4237 197.2646 | + | 5 103.85350 |
- | 6 122.91296 | + | 6 122.91296 |
> | > | ||
> head(y.hat == explained + m.y) | > head(y.hat == explained + m.y) | ||
Line 715: | Line 715: | ||
> cor(x, | > cor(x, | ||
[,1] | [,1] | ||
- | [1,] 3.695762e-16 | + | [1,] 7.981058e-17 |
> ################ | > ################ | ||
> plot(x, | > plot(x, | ||
Line 728: | Line 728: | ||
> head(explained+error) | > head(explained+error) | ||
[,1] | [,1] | ||
- | [1,] -8.111452 | + | [1,] -12.663308 |
- | [2, | + | [2, |
- | [3, | + | [3, |
- | [4,] -12.888227 | + | [4,] -22.359948 |
- | [5,] 7.383718 | + | [5,] 7.168197 |
- | [6, | + | [6, |
> plot(x, | > plot(x, | ||
> head(explained+error+m.y == y) | > head(explained+error+m.y == y) | ||
Line 747: | Line 747: | ||
> cor(x, | > cor(x, | ||
[,1] | [,1] | ||
- | [1,] 0.8963055 | + | [1,] 0.7841619 |
> cor(x, | > cor(x, | ||
[,1] | [,1] | ||
- | [1,] 0.8963055 | + | [1,] 0.7841619 |
> cor(x,y) | > cor(x,y) | ||
[,1] | [,1] | ||
- | [1,] 0.8963055 | + | [1,] 0.7841619 |
> # see this also | > # see this also | ||
> round(cor(tmp2), | > round(cor(tmp2), | ||
x y y.hat explained error | x y y.hat explained error | ||
- | x 1.000 0.896 1.000 1.000 0.000 | + | x 1.000 0.784 1.000 1.000 0.000 |
- | y 0.896 1.000 0.896 0.896 0.443 | + | y 0.784 1.000 0.784 0.784 0.621 |
- | y.hat 1.000 0.896 1.000 1.000 0.000 | + | y.hat 1.000 0.784 1.000 1.000 0.000 |
- | explained 1.000 0.896 1.000 1.000 0.000 | + | explained 1.000 0.784 1.000 1.000 0.000 |
- | error 0.000 0.443 0.000 0.000 1.000 | + | error 0.000 0.621 0.000 0.000 1.000 |
> ############### | > ############### | ||
> | > | ||
Line 768: | Line 768: | ||
> ss.reg <- sum(explained^2) | > ss.reg <- sum(explained^2) | ||
> ss.y | > ss.y | ||
- | [1] 22114.22 | + | [1] 31366.84 |
> ss.res | > ss.res | ||
- | [1] 4348.462 | + | [1] 12079.06 |
> ss.reg | > ss.reg | ||
- | [1] 17765.76 | + | [1] 19287.78 |
> ss.res + ss.reg | > ss.res + ss.reg | ||
- | [1] 22114.22 | + | [1] 31366.84 |
> | > | ||
> # degrees of freedom values | > # degrees of freedom values | ||
Line 784: | Line 784: | ||
> | > | ||
> ss.reg | > ss.reg | ||
- | [1] 17765.76 | + | [1] 19287.78 |
> df.reg | > df.reg | ||
[1] 1 | [1] 1 | ||
> ms.reg | > ms.reg | ||
- | [1] 17765.76 | + | [1] 19287.78 |
> | > | ||
> ss.res | > ss.res | ||
- | [1] 4348.462 | + | [1] 12079.06 |
> df.res | > df.res | ||
[1] 38 | [1] 38 | ||
> ms.res | > ms.res | ||
- | [1] 114.4332 | + | [1] 317.87 |
> | > | ||
> # r square | > # r square | ||
> ss.y | > ss.y | ||
- | [1] 22114.22 | + | [1] 31366.84 |
> ss.reg | > ss.reg | ||
- | [1] 17765.76 | + | [1] 19287.78 |
> r.sq <- ss.reg/ss.y | > r.sq <- ss.reg/ss.y | ||
> r.sq | > r.sq | ||
- | [1] 0.8033635 | + | [1] 0.6149098 |
> # 위의 r.sq 값이 충분히 컸는지를 알아보는 것은 | > # 위의 r.sq 값이 충분히 컸는지를 알아보는 것은 | ||
> # ss.reg가 충분히 컸는지를 알아보는 것 | > # ss.reg가 충분히 컸는지를 알아보는 것 | ||
Line 813: | Line 813: | ||
> f.cal <- ms.reg/ | > f.cal <- ms.reg/ | ||
> f.cal | > f.cal | ||
- | [1] 155.25 | + | [1] 60.67819 |
> pf(f.cal, df.reg, df.res, lower.tail = F) | > pf(f.cal, df.reg, df.res, lower.tail = F) | ||
- | [1] 5.41667e-15 | + | [1] 2.157284e-09 |
> # check anova test | > # check anova test | ||
> anova(lm.df) | > anova(lm.df) | ||
Line 821: | Line 821: | ||
Response: y | Response: y | ||
- | Df Sum Sq Mean Sq F value Pr(> | + | Df Sum Sq Mean Sq F value Pr(> |
- | x 1 17765.8 17765.8 | + | x 1 19288 19287.8 |
- | Residuals 38 | + | Residuals 38 |
--- | --- | ||
Signif. codes: | Signif. codes: | ||
Line 833: | Line 833: | ||
Residuals: | Residuals: | ||
- | Min | + | |
- | -21.5280 | + | -35.880 -11.932 -0.458 12.199 34.148 |
Coefficients: | Coefficients: | ||
Estimate Std. Error t value Pr(> | Estimate Std. Error t value Pr(> | ||
- | (Intercept) -24.3920 17.2128 -1.417 0.165 | + | (Intercept) -33.9867 28.6879 -1.185 0.243 |
- | x 2.1343 0.1713 12.460 5.42e-15 *** | + | x 2.2239 0.2855 7.790 2.16e-09 *** |
--- | --- | ||
Signif. codes: | Signif. codes: | ||
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | ||
- | Residual standard error: | + | Residual standard error: |
- | Multiple R-squared: | + | Multiple R-squared: |
- | F-statistic: | + | F-statistic: |
> | > | ||
Line 858: | Line 858: | ||
> res <- resid(lm.df) | > res <- resid(lm.df) | ||
> data.frame(head(res)) | > data.frame(head(res)) | ||
- | | + | head.res. |
- | 1 -6.7415738 | + | 1 -11.235956 |
- | 2 -3.3310912 | + | 2 -5.551819 |
- | 3 | + | 3 |
- | 4 -14.2961895 | + | 4 -23.826982 |
- | 5 -0.8408845 | + | 5 -1.401474 |
- | 6 -17.1085500 | + | 6 -28.514250 |
> data.frame(head(error)) | > data.frame(head(error)) | ||
head.error. | head.error. | ||
- | 1 -6.7415738 | + | 1 -11.235956 |
- | 2 -3.3310912 | + | 2 |
- | 3 16.8477873 | + | 3 28.079646 |
- | 4 -14.2961895 | + | 4 -23.826982 |
- | 5 -0.8408845 | + | 5 |
- | 6 -17.1085500 | + | 6 -28.514250 |
> se.b <- sqrt(ms.res/ | > se.b <- sqrt(ms.res/ | ||
> se.b | > se.b | ||
- | [1] 0.1712946 | + | [1] 0.285491 |
> # as x increased by 1 unit, y would increase b +- 2se.b | > # as x increased by 1 unit, y would increase b +- 2se.b | ||
> b + c(-2*se.b, 2*se.b) | > b + c(-2*se.b, 2*se.b) | ||
- | [1] 1.791731 | + | [1] 1.652885 |
> b | > b | ||
- | [1] 2.13432 | + | [1] 2.223867 |
> | > | ||
> # to be exact | > # to be exact | ||
Line 887: | Line 887: | ||
[1] 2.024394 | [1] 2.024394 | ||
> b + c(-t.crit*se.b, | > b + c(-t.crit*se.b, | ||
- | [1] 1.787552 | + | [1] 1.645921 |
> b | > b | ||
- | [1] 2.13432 | + | [1] 2.223867 |
> | > | ||
> t.cal <- lm.df$coefficients[2]/ | > t.cal <- lm.df$coefficients[2]/ | ||
> t.cal | > t.cal | ||
| | ||
- | 12.45994 | + | 7.789621 |
> | > | ||
> | > | ||
> pt(t.cal, df.res, lower.tail = F)*2 | > pt(t.cal, df.res, lower.tail = F)*2 | ||
- | | + | x |
- | 5.41667e-15 | + | 2.157284e-09 |
> pf(f.cal, df.reg, df.res, lower.tail = F) | > pf(f.cal, df.reg, df.res, lower.tail = F) | ||
- | [1] 5.41667e-15 | + | [1] 2.157284e-09 |
> | > | ||
> # see also | > # see also | ||
> t.cal | > t.cal | ||
| | ||
- | 12.45994 | + | 7.789621 |
> t.cal^2 | > t.cal^2 | ||
- | x | + | x |
- | 155.25 | + | 60.67819 |
> f.cal | > f.cal | ||
- | [1] 155.25 | + | [1] 60.67819 |
- | > | + | |
> | > | ||
> | > |
c/ms/2025/schedule/w11.lecture.note.1747785479.txt.gz · Last modified: 2025/05/21 08:57 by hkimscil