====== Simple Regression in R ======
###########################
# regression sum up
###########################
rm(list = ls())
rnorm2 <- function(n,mean,sd){ mean+sd*scale(rnorm(n)) }
set.seed(101)
n.s <- 36
m.x <- 100
df.x <- m.x - 1
ssq.x <- 100
x <- rnorm2(n.s, m.x, sqrt(9900/df.x))
y <- 5*x + rnorm2(n.s, 0, 36)
df <- data.frame(x,y)
mod.r <- lm(y ~ x, data = df)
summary(mod.r)
sp.xy <- sum((x-mean(x))*(y-mean(y)))
df.tot <- length(y) - 1
cov.xy <- sp.xy / df.tot
cov.xy
cov(x,y)
cor.xy <- cov.xy / ((sd(x)*sd(y)))
cor.xy
cor(x,y)
ss.x <- sum((x-mean(x))^2)
ss.x
b <- sp.xy / ss.x
b
a <- mean(y)- b*mean(x)
a
y.pred <- a + b*x
head(y.pred)
data.frame(head(mod.r$fitted.values))
mt <- paste("a = ", round(a,2),
"\n", "b = ", round(b,3))
plot(x,y, main=mt)
abline(h=mean(y), col="green", lwd=2)
# abline(v=mean(x), col="blue", lwd=2)
abline(mod.r, col="red", lwd=2)
x[35]
y[35]
y.obs.35 <- y[35]
y.hat.35 <- mod.r$fitted.values[35]
y.hat.35
y.mean <- mean(y)
points(x = x[35], y = y.obs.35, col = "blue", pch = 16)
points(x = x[35], y = y.hat.35, col = "red", pch = 16)
points(x = x[35], y = y.mean, col = "black", pch = 16)
abline(v=x[35])
text(x[35], y.obs.35, labels="y.obs", pos = 2)
text(x[35], y.hat.35, labels="y.hat", pos = 2)
text(x[35], y.mean, labels="y.mean", pos = 2)
y.hat <- mod.r$fitted.values
y.hat <- y.pred
y.obs <- y
y.mean <- y.mean
res <- y.obs - y.hat
reg <- y.hat - y.mean
df.a <- data.frame(y.obs, y.hat, y.mean, res, reg)
df.a
y.obs.35 - y.hat.35
y.hat.35 - y.mean
res.sq <- res^2
reg.sq <- reg^2
df.b <- data.frame(df.a, res.sq, reg.sq)
df.b
ss.res <- sum(res.sq)
ss.reg <- sum(reg.sq)
ss.tot <- sum((y.obs-y.mean)^2)
ss.res
ss.reg
ss.tot
ss.res + ss.reg
df.tot <- length(y)-1
df.res <- length(y)-ncol(df)
df.reg <- ncol(df)-1
df.tot
df.res
df.reg
df.res+df.reg
ms.tot <- ss.tot / df.tot
ms.reg <- ss.reg / df.reg
ms.res <- ss.res / df.res
ms.tot
ms.reg
ms.res
f.cal <- ms.reg/ms.res
f.cal
df.reg
df.res
pf(f.cal, df.reg, df.res, lower.tail = F)
summary(mod.r)
explained.area.in.y <- ss.reg / ss.tot
explained.area.in.y
r.sq <- explained.area.in.y
se.res <- sqrt(ss.res/(df.res)) # standard deviation of res
se.res
sqrt(ms.res)
ss.x <- sum((x-mean(x))^2) # ss for x
se.b.x <- sqrt(ms.res/ss.x) # se for b
se.b.x
b.x <- mod.r$coefficients[2]
b.x
t.cal <- b.x/se.b.x
t.cal
df.res
pv.t.cal <- pt(t.cal, df.res, lower.tail = F)*2
pv.t.cal
t.cal
df.res
mod.f <- aov(mod.r)
summary(mod.f)
##################################
# gradient descent
##################################
a <- rnorm(1)
b <- rnorm(1)
a.start <- a
b.start <- b
predict <- function(x, a, b){
return (a + b * x)
}
residuals <- function(predictions, y) {
return(y - predictions)
}
gradient <- function(x, y, predictions){
error = y - predictions
db = -2 * mean(x * error)
da = -2 * mean(error)
return(list("b" = db, "a" = da))
# return(c(db, da))
}
mseloss <- function(predictions, y) {
residuals <- (y - predictions)
return(mean(residuals^2))
}
# Train the model with scaled features
learning.rate = 1e-1
# Record Loss for each epoch:
# as = c()
# bs = c()
mses = c()
# sses = c()
# mres = c()
zx <- (x-mean(x))/sd(x)
nlen <- 75
for (epoch in 1:nlen) {
predictions <- predict(zx, a, b)
grad <- gradient(zx, y, predictions)
step.b <- grad$b * learning.rate
step.a <- grad$a * learning.rate
b <- b-step.b
a <- a-step.a
}
# mses
# mres
# as
# bs
# scaled
a
b
# unscale coefficients to make them comprehensible
# see http://commres.net/wiki/gradient_descent#why_normalize_scale_or_make_z-score_xi
# and
# http://commres.net/wiki/gradient_descent#how_to_unnormalize_unscale_a_and_b
#
a = a - (mean(x) / sd(x)) * b
b = b / sd(x)
a
b
cat(paste0("Intercept: ", a, "\n", "Slope: ", b, "\n"))
summary(lm(y~x))$coefficients
====== output ======
===== 1 =====
> ###########################
> # regression sum up
> ###########################
> rm(list = ls())
> rnorm2 <- function(n,mean,sd) {
> + mean+sd*scale(rnorm(n))
> + }
> set.seed(101)
> n.s <- 36
> m.x <- 100
> df.x <- m.x - 1
> ssq.x <- 100
> x <- rnorm2(n.s, m.x, sqrt(9900/df.x))
> y <- 5*x + rnorm2(n.s, 0, 36)
> df <- data.frame(x,y)
>
++++++++++++++++++++
===== 2 =====
> mod.r <- lm(y ~ x, data = df)
> summary(mod.r)
Call:
lm(formula = y ~ x, data = df)
Residuals:
Min 1Q Median 3Q Max
-91.412 -22.328 7.718 20.489 72.508
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -85.7661 60.2526 -1.423 0.164
x 5.8577 0.5996 9.769 2.12e-11 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 35.47 on 34 degrees of freedom
Multiple R-squared: 0.7373, Adjusted R-squared: 0.7296
F-statistic: 95.43 on 1 and 34 DF, p-value: 2.115e-11
>
++++++++++++++++++++
===== 3 =====
> sp.xy <- sum((x-mean(x))*(y-mean(y)))
> df.tot <- length(y) - 1
> cov.xy <- sp.xy / df.tot
> cov.xy
[1] 585.7661
> cov(x,y)
[,1]
[1,] 585.7661
>
> cor.xy <- cov.xy / ((sd(x)*sd(y)))
> cor.xy
[1] 0.8586711
> cor(x,y)
[,1]
[1,] 0.8586711
>
++++++++++++++++++++
===== 4 =====
> ss.x <- sum((x-mean(x))^2)
> ss.x
[1] 3500
> b <- sp.xy / ss.x
> b
[1] 5.857661
> a <- mean(y)- b*mean(x)
> a
[1] -85.76606
>
>
++++++++++++++++++++
===== 5 =====
> y.pred <- a + b*x
> head(y.pred)
[,1]
[1,] 482.2777
[2,] 539.3226
[3,] 459.6216
[4,] 517.3681
[5,] 523.6284
[6,] 579.6797
>
> data.frame(head(mod.r$fitted.values))
head.mod.r.fitted.values.
1 482.2777
2 539.3226
3 459.6216
4 517.3681
5 523.6284
6 579.6797
>
> mt <- paste("a = ", round(a,2),
+ "\n", "b = ", round(b,3))
> plot(x,y, main=mt)
> abline(h=mean(y), col="green", lwd=2)
> # abline(v=mean(x), col="blue", lwd=2)
> abline(mod.r, col="red", lwd=2)
> x[35]
[1] 113.7788
> y[35]
[1] 630.7971
> y.obs.35 <- y[35]
> y.hat.35 <- mod.r$fitted.values[35]
> y.hat.35
35
580.7113
> y.mean <- mean(y)
>
> points(x = x[35], y = y.obs.35, col = "blue", pch = 16)
> points(x = x[35], y = y.hat.35, col = "red", pch = 16)
> points(x = x[35], y = y.mean, col = "black", pch = 16)
> abline(v=x[35])
> text(x[35], y.obs.35, labels="y.obs", pos = 2)
> text(x[35], y.hat.35, labels="y.hat", pos = 2)
> text(x[35], y.mean, labels="y.mean", pos = 2)
>
>
++++++++++++++++++++
{{:r:pasted:20251013-073857.png}}
===== 6 =====
> y.hat <- mod.r$fitted.values
> y.hat <- y.pred
>
> y.obs <- y
> y.mean <- y.mean
>
> res <- y.obs - y.hat
> reg <- y.hat - y.mean
>
> df.a <- data.frame(y.obs, y.hat, y.mean, res, reg)
> df.a
y.obs y.hat y.mean res reg
1 495.2865 482.2777 500 13.0088479 -17.722299
2 572.8469 539.3226 500 33.5242925 39.322571
3 405.3625 459.6216 500 -54.2590625 -40.378431
4 536.0013 517.3681 500 18.6332683 17.368054
5 542.6492 523.6284 500 19.0208401 23.628375
6 601.0744 579.6797 500 21.3947218 79.679665
7 452.2171 543.6295 500 -91.4124153 43.629548
8 483.0325 496.1284 500 -13.0959080 -3.871624
9 515.3267 562.9955 500 -47.6688104 62.995518
10 509.9980 488.9515 500 21.0465311 -11.048516
11 557.9217 537.6334 500 20.2883651 37.633380
12 435.7639 451.8359 500 -16.0719728 -48.164119
13 574.9176 596.1593 500 -21.2417014 96.159349
14 368.6743 408.2015 500 -39.5272035 -91.798511
15 449.6582 488.0798 500 -38.4216090 -11.920198
16 485.6646 490.8944 500 -5.2298139 -9.105584
17 481.9870 448.2703 500 33.7166607 -51.729694
18 456.5535 507.2452 500 -50.6916186 7.245154
19 490.3326 450.3537 500 39.9789139 -49.646309
20 352.9357 370.3130 500 -17.3772823 -129.687034
21 504.1814 492.8153 500 11.3660873 -7.184671
22 589.5343 549.4563 500 40.0780792 49.456269
23 537.0947 486.0475 500 51.0471491 -13.952464
24 409.3543 408.3897 500 0.9646127 -91.610336
25 538.2055 551.7883 500 -13.5828576 51.788309
26 374.5917 411.8657 500 -37.2740105 -88.134288
27 508.1914 533.7775 500 -25.5860867 33.777536
28 504.6137 495.7007 500 8.9129424 -4.299270
29 549.0252 533.7887 500 15.2365812 33.788663
30 549.3653 535.7949 500 13.5703706 35.794919
31 530.1250 561.5610 500 -31.4359983 61.561044
32 528.0982 521.5753 500 6.5228603 21.575326
33 560.5632 568.8940 500 -8.3308199 68.894006
34 389.1352 368.8325 500 20.3026105 -131.167455
35 630.7971 580.7113 500 50.0858331 80.711286
36 528.9194 456.4118 500 72.5076032 -43.588169
>
> y.obs.35 - y.hat.35
35
50.08583
> y.hat.35 - y.mean
35
80.71129
>
> res.sq <- res^2
> reg.sq <- reg^2
> df.b <- data.frame(df.a, res.sq, reg.sq)
> df.b
y.obs y.hat y.mean res reg res.sq
1 495.2865 482.2777 500 13.0088479 -17.722299 169.2301237
2 572.8469 539.3226 500 33.5242925 39.322571 1123.8781868
3 405.3625 459.6216 500 -54.2590625 -40.378431 2944.0458598
4 536.0013 517.3681 500 18.6332683 17.368054 347.1986863
5 542.6492 523.6284 500 19.0208401 23.628375 361.7923589
6 601.0744 579.6797 500 21.3947218 79.679665 457.7341189
7 452.2171 543.6295 500 -91.4124153 43.629548 8356.2296621
8 483.0325 496.1284 500 -13.0959080 -3.871624 171.5028055
9 515.3267 562.9955 500 -47.6688104 62.995518 2272.3154845
10 509.9980 488.9515 500 21.0465311 -11.048516 442.9564724
11 557.9217 537.6334 500 20.2883651 37.633380 411.6177585
12 435.7639 451.8359 500 -16.0719728 -48.164119 258.3083084
13 574.9176 596.1593 500 -21.2417014 96.159349 451.2098789
14 368.6743 408.2015 500 -39.5272035 -91.798511 1562.3998194
15 449.6582 488.0798 500 -38.4216090 -11.920198 1476.2200395
16 485.6646 490.8944 500 -5.2298139 -9.105584 27.3509539
17 481.9870 448.2703 500 33.7166607 -51.729694 1136.8132091
18 456.5535 507.2452 500 -50.6916186 7.245154 2569.6401982
19 490.3326 450.3537 500 39.9789139 -49.646309 1598.3135561
20 352.9357 370.3130 500 -17.3772823 -129.687034 301.9699398
21 504.1814 492.8153 500 11.3660873 -7.184671 129.1879413
22 589.5343 549.4563 500 40.0780792 49.456269 1606.2524356
23 537.0947 486.0475 500 51.0471491 -13.952464 2605.8114284
24 409.3543 408.3897 500 0.9646127 -91.610336 0.9304776
25 538.2055 551.7883 500 -13.5828576 51.788309 184.4940217
26 374.5917 411.8657 500 -37.2740105 -88.134288 1389.3518589
27 508.1914 533.7775 500 -25.5860867 33.777536 654.6478348
28 504.6137 495.7007 500 8.9129424 -4.299270 79.4405414
29 549.0252 533.7887 500 15.2365812 33.788663 232.1534061
30 549.3653 535.7949 500 13.5703706 35.794919 184.1549582
31 530.1250 561.5610 500 -31.4359983 61.561044 988.2219908
32 528.0982 521.5753 500 6.5228603 21.575326 42.5477061
33 560.5632 568.8940 500 -8.3308199 68.894006 69.4025607
34 389.1352 368.8325 500 20.3026105 -131.167455 412.1959914
35 630.7971 580.7113 500 50.0858331 80.711286 2508.5906729
36 528.9194 456.4118 500 72.5076032 -43.588169 5257.3525227
reg.sq
1 314.07989
2 1546.26461
3 1630.41773
4 301.64929
5 558.30011
6 6348.84894
7 1903.53748
8 14.98947
9 3968.43534
10 122.06970
11 1416.27132
12 2319.78237
13 9246.62035
14 8426.96671
15 142.09111
16 82.91166
17 2675.96119
18 52.49225
19 2464.75603
20 16818.72676
21 51.61950
22 2445.92253
23 194.67126
24 8392.45357
25 2682.02895
26 7767.65273
27 1140.92197
28 18.48372
29 1141.67377
30 1281.27626
31 3789.76215
32 465.49470
33 4746.38400
34 17204.90124
35 6514.31162
36 1899.92850
> ss.res <- sum(res.sq)
> ss.reg <- sum(reg.sq)
> ss.tot <- sum((y.obs-y.mean)^2)
> ss.res
[1] 42785.46
> ss.reg
[1] 120092.7
> ss.tot
[1] 162878.1
> ss.res + ss.reg
[1] 162878.1
>
> df.tot <- length(y)-1
> df.res <- length(y)-ncol(df)
> df.reg <- ncol(df)-1
> df.tot
[1] 35
> df.res
[1] 34
> df.reg
[1] 1
> df.res+df.reg
[1] 35
>
>
++++++++++++++++++++
아래를 실제로 구해서
* y.obs 실제 y 값
* y.hat prediction 값
* y.mean y 평균
그 다음 residual값, regression값 (explained value), 그리고 ss total에 쓰일 total (분산을 구할 때의 SS) 구한 뒤에
* res = y.obs - y.hat
* reg = y.hat - y.mean
* tot = y.obs - y.mean
각각의 Sum of Square값을 구한다
* ss.res <- sum(res^2)
* ss.reg <- sum(reg^2)
* ss.tot <- sum(tot^2)
그 후에 각각의 df는
* df.res <- n - (# of parameters (a and b) = 2) = 36 - 2 = 34
* df.reg <- # of parameters - 1 = 2 - 1 = 1
* df.tot <- # of observation (36) - 1 = 35
===== 7 =====
> ms.tot <- ss.tot / df.tot
> ms.reg <- ss.reg / df.reg
> ms.res <- ss.res / df.res
> ms.tot
[1] 4653.661
> ms.reg
[1] 120092.7
> ms.res
[1] 1258.396
>
++++++++++++++++++++
**MS = variance**
MS total = y의 분산값
MS res = residual의 분산값 = 독립변인이 있음에도 불구하고 랜덤하게 나타난 분산
MS reg = regression의 분산값 = 독립변인의 영향력으로 생긴 분산
F 값은 독립변인때문에 생긴 차이값 / 랜덤 차이값 = MS reg / MS res
===== 8 =====
> f.cal <- ms.reg/ms.res
> f.cal
[1] 95.43312
> df.reg
[1] 1
> df.res
[1] 34
> pf(f.cal, df.reg, df.res, lower.tail = F)
[1] 2.115182e-11
> summary(mod.r)
Call:
lm(formula = y ~ x, data = df)
Residuals:
Min 1Q Median 3Q Max
-91.412 -22.328 7.718 20.489 72.508
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -85.7661 60.2526 -1.423 0.164
x 5.8577 0.5996 9.769 2.12e-11 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 35.47 on 34 degrees of freedom
Multiple R-squared: 0.7373, Adjusted R-squared: 0.7296
F-statistic: 95.43 on 1 and 34 DF, p-value: 2.115e-11
>
> explained.area.in.y <- ss.reg / ss.tot
> explained.area.in.y
[1] 0.7373161
> r.sq <- explained.area.in.y
>
++++++++++++++++++++
===== 9 =====
> se.res <- sqrt(ss.res/(df.res)) # standard deviation of res
> se.res
[1] 35.47388
> sqrt(ms.res)
[1] 35.47388
>
++++++++++++++++++++
residuals의 (잔차들의) standard deviation (표준편차)
= sqrt(잔차들의 분산)
===== 10 =====
> ss.x <- sum((x-mean(x))^2) # ss for x
> se.b.x <- sqrt(ms.res/ss.x) # se for b
> se.b.x
[1] 0.599618
> b.x <- mod.r$coefficients[2]
> b.x
x
5.857661
> t.cal <- b.x/se.b.x
> t.cal
x
9.768988
> df.res
[1] 34
> pv.t.cal <- pt(t.cal, df.res, lower.tail = F)*2
> pv.t.cal
x
2.115182e-11
> t.cal
x
9.768988
> df.res
[1] 34
> t.cal^2
x
95.43312
>
> mod.f <- aov(mod.r)
> summary(mod.f)
Df Sum Sq Mean Sq F value Pr(>F)
x 1 120093 120093 95.43 2.12e-11 ***
Residuals 34 42785 1258
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
>
++++++++++++++++++++
* b에 대한 standard error (standard deviation of b estimation)
* ''sqrt(ms.res/ss.x)''
* b값에 대한 significant test =
* b값이 (기울기가) y를 설명하는데 기여했는가? =
* 기여했다는 가설을 테스트하는 것
* 기여를 하지 않았다면
* 기울기가 평균값과 같은 역할밖에 하지 못했다는 뜻이므로
* ''(b - 0)/se.b'' 를 이용해서 테스트.
* 이 값이 t 값 (t.cal)
===== 11 =====
> ##################################
> # gradient descent
> ##################################
> predict <- function(x, a, b){
+ return (a + b * x)
+ }
>
> residuals <- function(predictions, y) {
+ return(y - predictions)
+ }
>
> gradient <- function(x, y, predictions){
+ error = y - predictions
+ db = -2 * mean(x * error)
+ da = -2 * mean(error)
+ return(list("b" = db, "a" = da))
+ }
>
> mseloss <- function(predictions, y) {
+ residuals <- (y - predictions)
+ return(mean(residuals^2))
+ }
>
++++++++++++++++++++
===== 12 =====
> # Train the model with scaled features
> learning.rate = 1e-1
>
++++++++++++++++++++
===== 13 =====
> a <- rnorm(1)
> b <- rnorm(1)
> a.init <- a
> b.init <- b
>
> zx <- (x-mean(x))/sd(x)
>
++++++++++++++++++++
===== 14 =====
> nlen <- 75
> for (epoch in 1:nlen) {
+ predictions <- predict(zx, a, b)
+
+ grad <- gradient(zx, y, predictions)
+
+ step.b <- grad$b * learning.rate
+ step.a <- grad$a * learning.rate
+ b <- b-step.b
+ a <- a-step.a
+ }
>
> # scaled a and b
> a
[1] 500
> b
[1] 58.5766
>
> # unscale coefficients to make them comprehensible
> # see http://commres.net/wiki/gradient_descent#why_normalize_scale_or_make_z-score_xi
> # and
> # http://commres.net/wiki/gradient_descent#how_to_unnormalize_unscale_a_and_b
> #
> a = a - (mean(x) / sd(x)) * b
> b = b / sd(x)
> a
[1] -85.76604
> b
[1] 5.85766
>
> cat(paste0("Intercept: ", a, "\n", "Slope: ", b, "\n"))
Intercept: -85.7660381466164
Slope: 5.85766011248848
> summary(lm(y~x))$coefficients
Estimate Std. Error t value Pr(>|t|)
(Intercept) -85.766064 60.252573 -1.423442 1.637203e-01
x 5.857661 0.599618 9.768988 2.115182e-11
>
++++++++++++++++++++