r:regression
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
r:regression [2025/10/13 07:31] – [1] hkimscil | r:regression [2025/10/13 07:57] (current) – [10] hkimscil | ||
---|---|---|---|
Line 219: | Line 219: | ||
> ########################### | > ########################### | ||
> rm(list = ls()) | > rm(list = ls()) | ||
- | > rnorm2 <- function(n, | + | > rnorm2 <- function(n, |
+ | > + | ||
+ | > + } | ||
> set.seed(101) | > set.seed(101) | ||
> n.s <- 36 | > n.s <- 36 | ||
Line 236: | Line 238: | ||
</ | </ | ||
===== 2 ===== | ===== 2 ===== | ||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
< | < | ||
> mod.r <- lm(y ~ x, data = df) | > mod.r <- lm(y ~ x, data = df) | ||
Line 259: | Line 263: | ||
> | > | ||
</ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ++++++++++++++++++++ | ||
+ | </ | ||
+ | </ | ||
===== 3 ===== | ===== 3 ===== | ||
- | < | + | <WRAP group> |
+ | <WRAP column half>< | ||
> sp.xy <- sum((x-mean(x))*(y-mean(y))) | > sp.xy <- sum((x-mean(x))*(y-mean(y))) | ||
> df.tot <- length(y) - 1 | > df.tot <- length(y) - 1 | ||
Line 279: | Line 288: | ||
> | > | ||
</ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ++++++++++++++++++++ | ||
+ | </ | ||
+ | </ | ||
===== 4 ===== | ===== 4 ===== | ||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
< | < | ||
> ss.x <- sum((x-mean(x))^2) | > ss.x <- sum((x-mean(x))^2) | ||
Line 294: | Line 309: | ||
> | > | ||
</ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ++++++++++++++++++++ | ||
+ | </ | ||
+ | </ | ||
===== 5 ===== | ===== 5 ===== | ||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
< | < | ||
> y.pred <- a + b*x | > y.pred <- a + b*x | ||
Line 343: | Line 364: | ||
> | > | ||
</ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ++++++++++++++++++++ | ||
+ | {{: | ||
+ | </ | ||
+ | </ | ||
===== 6 ===== | ===== 6 ===== | ||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
< | < | ||
> y.hat <- mod.r$fitted.values | > y.hat <- mod.r$fitted.values | ||
Line 507: | Line 535: | ||
</ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ++++++++++++++++++++ | ||
+ | 아래를 실제로 구해서 | ||
+ | * y.obs 실제 y 값 | ||
+ | * y.hat prediction 값 | ||
+ | * y.mean y 평균 | ||
+ | 그 다음 residual값, | ||
+ | * res = y.obs - y.hat | ||
+ | * reg = y.hat - y.mean | ||
+ | * tot = y.obs - y.mean | ||
+ | 각각의 Sum of Square값을 구한다 | ||
+ | * ss.res <- sum(res^2) | ||
+ | * ss.reg <- sum(reg^2) | ||
+ | * ss.tot <- sum(tot^2) | ||
+ | 그 후에 각각의 df는 | ||
+ | * df.res <- n - (# of parameters (a and b) = 2) = 36 - 2 = 34 | ||
+ | * df.reg <- # of parameters - 1 = 2 - 1 = 1 | ||
+ | * df.tot <- # of observation (36) - 1 = 35 | ||
+ | </ | ||
+ | </ | ||
===== 7 ===== | ===== 7 ===== | ||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
< | < | ||
> ms.tot <- ss.tot / df.tot | > ms.tot <- ss.tot / df.tot | ||
Line 521: | Line 572: | ||
> | > | ||
</ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ++++++++++++++++++++ | ||
+ | <fc # | ||
+ | MS total = y의 분산값 | ||
+ | MS res = residual의 분산값 = 독립변인이 있음에도 불구하고 랜덤하게 나타난 분산 | ||
+ | MS reg = regression의 분산값 = 독립변인의 영향력으로 생긴 분산 | ||
+ | F 값은 독립변인때문에 생긴 차이값 / 랜덤 차이값 = MS reg / MS res | ||
+ | |||
+ | |||
+ | </ | ||
+ | </ | ||
===== 8 ===== | ===== 8 ===== | ||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
< | < | ||
> f.cal <- ms.reg/ | > f.cal <- ms.reg/ | ||
Line 560: | Line 625: | ||
> | > | ||
</ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ++++++++++++++++++++ | ||
+ | </ | ||
+ | </ | ||
===== 9 ===== | ===== 9 ===== | ||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
< | < | ||
> se.res <- sqrt(ss.res/ | > se.res <- sqrt(ss.res/ | ||
Line 570: | Line 642: | ||
> | > | ||
</ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ++++++++++++++++++++ | ||
+ | residuals의 (잔차들의) standard deviation (표준편차) | ||
+ | = <fc # | ||
+ | </ | ||
+ | </ | ||
===== 10 ===== | ===== 10 ===== | ||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
< | < | ||
> ss.x <- sum((x-mean(x))^2) # ss for x | > ss.x <- sum((x-mean(x))^2) # ss for x | ||
Line 609: | Line 690: | ||
> | > | ||
</ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ++++++++++++++++++++ | ||
+ | * b에 대한 standard error (standard deviation of b estimation) | ||
+ | * '' | ||
+ | |||
+ | * b값에 대한 significant test = | ||
+ | * b값이 (기울기가) y를 설명하는데 기여했는가? | ||
+ | * 기여했다는 가설을 테스트하는 것 | ||
+ | * 기여를 하지 않았다면 | ||
+ | * 기울기가 평균값과 같은 역할밖에 하지 못했다는 뜻이므로 | ||
+ | * '' | ||
+ | * 이 값이 t 값 (t.cal) | ||
+ | |||
+ | </ | ||
+ | </ | ||
+ | |||
===== 11 ===== | ===== 11 ===== | ||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
< | < | ||
> ################################## | > ################################## | ||
Line 636: | Line 736: | ||
> | > | ||
</ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ++++++++++++++++++++ | ||
+ | </ | ||
+ | </ | ||
+ | |||
===== 12 ===== | ===== 12 ===== | ||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
< | < | ||
> # Train the model with scaled features | > # Train the model with scaled features | ||
Line 643: | Line 751: | ||
> | > | ||
</ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ++++++++++++++++++++ | ||
+ | </ | ||
+ | </ | ||
+ | |||
===== 13 ===== | ===== 13 ===== | ||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
< | < | ||
> a <- rnorm(1) | > a <- rnorm(1) | ||
Line 654: | Line 770: | ||
> | > | ||
</ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ++++++++++++++++++++ | ||
+ | </ | ||
+ | </ | ||
+ | |||
===== 14 ===== | ===== 14 ===== | ||
+ | <WRAP group> | ||
+ | <WRAP column half> | ||
< | < | ||
> nlen <- 75 | > nlen <- 75 | ||
Line 696: | Line 820: | ||
> | > | ||
</ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | ++++++++++++++++++++ | ||
+ | </ | ||
+ | </ | ||
+ | |||
r/regression.1760308267.txt.gz · Last modified: by hkimscil