====== Simple Regression in R ======


###########################
# regression sum up 
###########################
rm(list = ls())
rnorm2 <- function(n,mean,sd){ mean+sd*scale(rnorm(n)) } 
set.seed(101)
n.s <- 36
m.x <- 100
df.x <- m.x - 1
ssq.x <- 100
x <- rnorm2(n.s, m.x, sqrt(9900/df.x))
y <- 5*x + rnorm2(n.s, 0, 36)
df <- data.frame(x,y)

mod.r <- lm(y ~ x, data = df) 
summary(mod.r)

sp.xy <- sum((x-mean(x))*(y-mean(y)))
df.tot <- length(y) - 1
cov.xy <- sp.xy / df.tot
cov.xy
cov(x,y)

cor.xy <- cov.xy / ((sd(x)*sd(y)))
cor.xy 
cor(x,y)

ss.x <- sum((x-mean(x))^2)
ss.x
b <- sp.xy / ss.x
b
a <- mean(y)- b*mean(x)
a


y.pred <- a + b*x
head(y.pred)

data.frame(head(mod.r$fitted.values))

mt <- paste("a = ", round(a,2), 
            "\n", "b = ", round(b,3))
plot(x,y, main=mt)
abline(h=mean(y), col="green", lwd=2)
# abline(v=mean(x), col="blue", lwd=2)
abline(mod.r, col="red", lwd=2)
x[35]
y[35]
y.obs.35 <- y[35]
y.hat.35 <- mod.r$fitted.values[35]
y.hat.35
y.mean <- mean(y)

points(x = x[35], y = y.obs.35, col = "blue", pch = 16)
points(x = x[35], y = y.hat.35, col = "red", pch = 16)
points(x = x[35], y = y.mean, col = "black", pch = 16)
abline(v=x[35])
text(x[35], y.obs.35, labels="y.obs", pos = 2)
text(x[35], y.hat.35, labels="y.hat", pos = 2)
text(x[35], y.mean, labels="y.mean", pos = 2)


y.hat <- mod.r$fitted.values
y.hat <- y.pred

y.obs <- y
y.mean <- y.mean

res <- y.obs - y.hat
reg <- y.hat - y.mean

df.a <- data.frame(y.obs, y.hat, y.mean, res, reg)
df.a

y.obs.35 - y.hat.35
y.hat.35 - y.mean 

res.sq <- res^2
reg.sq <- reg^2
df.b <- data.frame(df.a, res.sq, reg.sq)
df.b
ss.res <- sum(res.sq)
ss.reg <- sum(reg.sq)
ss.tot <- sum((y.obs-y.mean)^2)
ss.res
ss.reg
ss.tot
ss.res + ss.reg

df.tot <- length(y)-1
df.res <- length(y)-ncol(df)
df.reg <- ncol(df)-1
df.tot
df.res
df.reg
df.res+df.reg


ms.tot <- ss.tot / df.tot
ms.reg <- ss.reg / df.reg
ms.res <- ss.res / df.res
ms.tot
ms.reg
ms.res
f.cal <- ms.reg/ms.res
f.cal
df.reg
df.res
pf(f.cal, df.reg, df.res, lower.tail = F)
summary(mod.r)

explained.area.in.y <- ss.reg / ss.tot
explained.area.in.y
r.sq <- explained.area.in.y

se.res <- sqrt(ss.res/(df.res)) # standard deviation of res
se.res
sqrt(ms.res)

ss.x <- sum((x-mean(x))^2) # ss for x
se.b.x <- sqrt(ms.res/ss.x) # se for b
se.b.x
b.x <- mod.r$coefficients[2]
b.x
t.cal <- b.x/se.b.x
t.cal
df.res
pv.t.cal <- pt(t.cal, df.res, lower.tail = F)*2
pv.t.cal
t.cal
df.res
mod.f <- aov(mod.r)
summary(mod.f)

##################################
# gradient descent
##################################
a <- rnorm(1)
b <- rnorm(1)
a.start <- a
b.start <- b

predict <- function(x, a, b){
  return (a + b * x)
}

residuals <- function(predictions, y) {
  return(y - predictions)
}

gradient <- function(x, y, predictions){
  error = y - predictions
  db = -2 * mean(x * error)
  da = -2 * mean(error)
  return(list("b" = db, "a" = da))
  # return(c(db, da))
}

mseloss <- function(predictions, y) {
  residuals <- (y - predictions)
  return(mean(residuals^2))
}

# Train the model with scaled features
learning.rate = 1e-1

# Record Loss for each epoch:
# as = c()
# bs = c()
mses = c()
# sses = c()
# mres = c()
zx <- (x-mean(x))/sd(x)

nlen <- 75
for (epoch in 1:nlen) {
  predictions <- predict(zx, a, b)

  grad <- gradient(zx, y, predictions)
  
  step.b <- grad$b * learning.rate 
  step.a <- grad$a * learning.rate
  b <- b-step.b
  a <- a-step.a
}
# mses
# mres
# as
# bs

# scaled
a
b

# unscale coefficients to make them comprehensible
# see http://commres.net/wiki/gradient_descent#why_normalize_scale_or_make_z-score_xi
# and 
# http://commres.net/wiki/gradient_descent#how_to_unnormalize_unscale_a_and_b
#  
a =  a - (mean(x) / sd(x)) * b
b =  b / sd(x)
a
b

cat(paste0("Intercept: ", a, "\n", "Slope: ", b, "\n"))
summary(lm(y~x))$coefficients

====== output ====== ===== 1 =====


> ###########################
> # regression sum up 
> ###########################
> rm(list = ls())
> rnorm2 <- function(n,mean,sd) { 
> +    mean+sd*scale(rnorm(n)) 
> + } 
> set.seed(101)
> n.s <- 36
> m.x <- 100
> df.x <- m.x - 1
> ssq.x <- 100
> x <- rnorm2(n.s, m.x, sqrt(9900/df.x))
> y <- 5*x + rnorm2(n.s, 0, 36)
> df <- data.frame(x,y)
>

++++++++++++++++++++ ===== 2 =====


> mod.r <- lm(y ~ x, data = df) 
> summary(mod.r)

Call:
lm(formula = y ~ x, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-91.412 -22.328   7.718  20.489  72.508 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -85.7661    60.2526  -1.423    0.164    
x             5.8577     0.5996   9.769 2.12e-11 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 35.47 on 34 degrees of freedom
Multiple R-squared:  0.7373,	Adjusted R-squared:  0.7296 
F-statistic: 95.43 on 1 and 34 DF,  p-value: 2.115e-11
>

++++++++++++++++++++ ===== 3 =====


> sp.xy <- sum((x-mean(x))*(y-mean(y)))
> df.tot <- length(y) - 1
> cov.xy <- sp.xy / df.tot
> cov.xy
[1] 585.7661
> cov(x,y)
         [,1]
[1,] 585.7661
> 
> cor.xy <- cov.xy / ((sd(x)*sd(y)))
> cor.xy 
[1] 0.8586711
> cor(x,y)
          [,1]
[1,] 0.8586711
>

++++++++++++++++++++ ===== 4 =====


> ss.x <- sum((x-mean(x))^2)
> ss.x
[1] 3500
> b <- sp.xy / ss.x
> b
[1] 5.857661
> a <- mean(y)- b*mean(x)
> a
[1] -85.76606
> 
>

++++++++++++++++++++ ===== 5 =====


> y.pred <- a + b*x
> head(y.pred)
         [,1]
[1,] 482.2777
[2,] 539.3226
[3,] 459.6216
[4,] 517.3681
[5,] 523.6284
[6,] 579.6797
> 
> data.frame(head(mod.r$fitted.values))
  head.mod.r.fitted.values.
1                  482.2777
2                  539.3226
3                  459.6216
4                  517.3681
5                  523.6284
6                  579.6797
> 
> mt <- paste("a = ", round(a,2), 
+             "\n", "b = ", round(b,3))
> plot(x,y, main=mt)
> abline(h=mean(y), col="green", lwd=2)
> # abline(v=mean(x), col="blue", lwd=2)
> abline(mod.r, col="red", lwd=2)
> x[35]
[1] 113.7788
> y[35]
[1] 630.7971
> y.obs.35 <- y[35]
> y.hat.35 <- mod.r$fitted.values[35]
> y.hat.35
      35 
580.7113 
> y.mean <- mean(y)
> 
> points(x = x[35], y = y.obs.35, col = "blue", pch = 16)
> points(x = x[35], y = y.hat.35, col = "red", pch = 16)
> points(x = x[35], y = y.mean, col = "black", pch = 16)
> abline(v=x[35])
> text(x[35], y.obs.35, labels="y.obs", pos = 2)
> text(x[35], y.hat.35, labels="y.hat", pos = 2)
> text(x[35], y.mean, labels="y.mean", pos = 2)
> 
>

++++++++++++++++++++ {{:r:pasted:20251013-073857.png}} ===== 6 =====


> y.hat <- mod.r$fitted.values
> y.hat <- y.pred
> 
> y.obs <- y
> y.mean <- y.mean
> 
> res <- y.obs - y.hat
> reg <- y.hat - y.mean
> 
> df.a <- data.frame(y.obs, y.hat, y.mean, res, reg)
> df.a
      y.obs    y.hat y.mean         res         reg
1  495.2865 482.2777    500  13.0088479  -17.722299
2  572.8469 539.3226    500  33.5242925   39.322571
3  405.3625 459.6216    500 -54.2590625  -40.378431
4  536.0013 517.3681    500  18.6332683   17.368054
5  542.6492 523.6284    500  19.0208401   23.628375
6  601.0744 579.6797    500  21.3947218   79.679665
7  452.2171 543.6295    500 -91.4124153   43.629548
8  483.0325 496.1284    500 -13.0959080   -3.871624
9  515.3267 562.9955    500 -47.6688104   62.995518
10 509.9980 488.9515    500  21.0465311  -11.048516
11 557.9217 537.6334    500  20.2883651   37.633380
12 435.7639 451.8359    500 -16.0719728  -48.164119
13 574.9176 596.1593    500 -21.2417014   96.159349
14 368.6743 408.2015    500 -39.5272035  -91.798511
15 449.6582 488.0798    500 -38.4216090  -11.920198
16 485.6646 490.8944    500  -5.2298139   -9.105584
17 481.9870 448.2703    500  33.7166607  -51.729694
18 456.5535 507.2452    500 -50.6916186    7.245154
19 490.3326 450.3537    500  39.9789139  -49.646309
20 352.9357 370.3130    500 -17.3772823 -129.687034
21 504.1814 492.8153    500  11.3660873   -7.184671
22 589.5343 549.4563    500  40.0780792   49.456269
23 537.0947 486.0475    500  51.0471491  -13.952464
24 409.3543 408.3897    500   0.9646127  -91.610336
25 538.2055 551.7883    500 -13.5828576   51.788309
26 374.5917 411.8657    500 -37.2740105  -88.134288
27 508.1914 533.7775    500 -25.5860867   33.777536
28 504.6137 495.7007    500   8.9129424   -4.299270
29 549.0252 533.7887    500  15.2365812   33.788663
30 549.3653 535.7949    500  13.5703706   35.794919
31 530.1250 561.5610    500 -31.4359983   61.561044
32 528.0982 521.5753    500   6.5228603   21.575326
33 560.5632 568.8940    500  -8.3308199   68.894006
34 389.1352 368.8325    500  20.3026105 -131.167455
35 630.7971 580.7113    500  50.0858331   80.711286
36 528.9194 456.4118    500  72.5076032  -43.588169
> 
> y.obs.35 - y.hat.35
      35 
50.08583 
> y.hat.35 - y.mean 
      35 
80.71129 
> 
> res.sq <- res^2
> reg.sq <- reg^2
> df.b <- data.frame(df.a, res.sq, reg.sq)
> df.b
      y.obs    y.hat y.mean         res         reg       res.sq
1  495.2865 482.2777    500  13.0088479  -17.722299  169.2301237
2  572.8469 539.3226    500  33.5242925   39.322571 1123.8781868
3  405.3625 459.6216    500 -54.2590625  -40.378431 2944.0458598
4  536.0013 517.3681    500  18.6332683   17.368054  347.1986863
5  542.6492 523.6284    500  19.0208401   23.628375  361.7923589
6  601.0744 579.6797    500  21.3947218   79.679665  457.7341189
7  452.2171 543.6295    500 -91.4124153   43.629548 8356.2296621
8  483.0325 496.1284    500 -13.0959080   -3.871624  171.5028055
9  515.3267 562.9955    500 -47.6688104   62.995518 2272.3154845
10 509.9980 488.9515    500  21.0465311  -11.048516  442.9564724
11 557.9217 537.6334    500  20.2883651   37.633380  411.6177585
12 435.7639 451.8359    500 -16.0719728  -48.164119  258.3083084
13 574.9176 596.1593    500 -21.2417014   96.159349  451.2098789
14 368.6743 408.2015    500 -39.5272035  -91.798511 1562.3998194
15 449.6582 488.0798    500 -38.4216090  -11.920198 1476.2200395
16 485.6646 490.8944    500  -5.2298139   -9.105584   27.3509539
17 481.9870 448.2703    500  33.7166607  -51.729694 1136.8132091
18 456.5535 507.2452    500 -50.6916186    7.245154 2569.6401982
19 490.3326 450.3537    500  39.9789139  -49.646309 1598.3135561
20 352.9357 370.3130    500 -17.3772823 -129.687034  301.9699398
21 504.1814 492.8153    500  11.3660873   -7.184671  129.1879413
22 589.5343 549.4563    500  40.0780792   49.456269 1606.2524356
23 537.0947 486.0475    500  51.0471491  -13.952464 2605.8114284
24 409.3543 408.3897    500   0.9646127  -91.610336    0.9304776
25 538.2055 551.7883    500 -13.5828576   51.788309  184.4940217
26 374.5917 411.8657    500 -37.2740105  -88.134288 1389.3518589
27 508.1914 533.7775    500 -25.5860867   33.777536  654.6478348
28 504.6137 495.7007    500   8.9129424   -4.299270   79.4405414
29 549.0252 533.7887    500  15.2365812   33.788663  232.1534061
30 549.3653 535.7949    500  13.5703706   35.794919  184.1549582
31 530.1250 561.5610    500 -31.4359983   61.561044  988.2219908
32 528.0982 521.5753    500   6.5228603   21.575326   42.5477061
33 560.5632 568.8940    500  -8.3308199   68.894006   69.4025607
34 389.1352 368.8325    500  20.3026105 -131.167455  412.1959914
35 630.7971 580.7113    500  50.0858331   80.711286 2508.5906729
36 528.9194 456.4118    500  72.5076032  -43.588169 5257.3525227
        reg.sq
1    314.07989
2   1546.26461
3   1630.41773
4    301.64929
5    558.30011
6   6348.84894
7   1903.53748
8     14.98947
9   3968.43534
10   122.06970
11  1416.27132
12  2319.78237
13  9246.62035
14  8426.96671
15   142.09111
16    82.91166
17  2675.96119
18    52.49225
19  2464.75603
20 16818.72676
21    51.61950
22  2445.92253
23   194.67126
24  8392.45357
25  2682.02895
26  7767.65273
27  1140.92197
28    18.48372
29  1141.67377
30  1281.27626
31  3789.76215
32   465.49470
33  4746.38400
34 17204.90124
35  6514.31162
36  1899.92850
> ss.res <- sum(res.sq)
> ss.reg <- sum(reg.sq)
> ss.tot <- sum((y.obs-y.mean)^2)
> ss.res
[1] 42785.46
> ss.reg
[1] 120092.7
> ss.tot
[1] 162878.1
> ss.res + ss.reg
[1] 162878.1
> 
> df.tot <- length(y)-1
> df.res <- length(y)-ncol(df)
> df.reg <- ncol(df)-1
> df.tot
[1] 35
> df.res
[1] 34
> df.reg
[1] 1
> df.res+df.reg
[1] 35
> 
>

++++++++++++++++++++ 아래를 실제로 구해서 * y.obs 실제 y 값 * y.hat prediction 값 * y.mean y 평균 그 다음 residual값, regression값 (explained value), 그리고 ss total에 쓰일 total (분산을 구할 때의 SS) 구한 뒤에 * res = y.obs - y.hat * reg = y.hat - y.mean * tot = y.obs - y.mean 각각의 Sum of Square값을 구한다 * ss.res <- sum(res^2) * ss.reg <- sum(reg^2) * ss.tot <- sum(tot^2) 그 후에 각각의 df는 * df.res <- n - (# of parameters (a and b) = 2) = 36 - 2 = 34 * df.reg <- # of parameters - 1 = 2 - 1 = 1 * df.tot <- # of observation (36) - 1 = 35 ===== 7 =====


> ms.tot <- ss.tot / df.tot
> ms.reg <- ss.reg / df.reg
> ms.res <- ss.res / df.res
> ms.tot
[1] 4653.661
> ms.reg
[1] 120092.7
> ms.res
[1] 1258.396
>

++++++++++++++++++++ **MS = variance** MS total = y의 분산값 MS res = residual의 분산값 = 독립변인이 있음에도 불구하고 랜덤하게 나타난 분산 MS reg = regression의 분산값 = 독립변인의 영향력으로 생긴 분산 F 값은 독립변인때문에 생긴 차이값 / 랜덤 차이값 = MS reg / MS res ===== 8 =====


> f.cal <- ms.reg/ms.res
> f.cal
[1] 95.43312
> df.reg
[1] 1
> df.res
[1] 34
> pf(f.cal, df.reg, df.res, lower.tail = F)
[1] 2.115182e-11
> summary(mod.r)

Call:
lm(formula = y ~ x, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-91.412 -22.328   7.718  20.489  72.508 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -85.7661    60.2526  -1.423    0.164    
x             5.8577     0.5996   9.769 2.12e-11 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 35.47 on 34 degrees of freedom
Multiple R-squared:  0.7373,	Adjusted R-squared:  0.7296 
F-statistic: 95.43 on 1 and 34 DF,  p-value: 2.115e-11

> 
> explained.area.in.y <- ss.reg / ss.tot
> explained.area.in.y
[1] 0.7373161
> r.sq <- explained.area.in.y
>

++++++++++++++++++++ ===== 9 =====


> se.res <- sqrt(ss.res/(df.res)) # standard deviation of res
> se.res
[1] 35.47388
> sqrt(ms.res)
[1] 35.47388
>

++++++++++++++++++++ residuals의 (잔차들의) standard deviation (표준편차) = sqrt(잔차들의 분산) ===== 10 =====


> ss.x <- sum((x-mean(x))^2) # ss for x
> se.b.x <- sqrt(ms.res/ss.x) # se for b
> se.b.x
[1] 0.599618
> b.x <- mod.r$coefficients[2]
> b.x
       x 
5.857661 
> t.cal <- b.x/se.b.x
> t.cal
       x 
9.768988 
> df.res
[1] 34
> pv.t.cal <- pt(t.cal, df.res, lower.tail = F)*2
> pv.t.cal
           x 
2.115182e-11 
> t.cal
       x 
9.768988 
> df.res
[1] 34
> t.cal^2
       x 
95.43312 
> 
> mod.f <- aov(mod.r)
> summary(mod.f)
            Df Sum Sq Mean Sq F value   Pr(>F)    
x            1 120093  120093   95.43 2.12e-11 ***
Residuals   34  42785    1258                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
>

++++++++++++++++++++ * b에 대한 standard error (standard deviation of b estimation) * ''sqrt(ms.res/ss.x)'' * b값에 대한 significant test = * b값이 (기울기가) y를 설명하는데 기여했는가? = * 기여했다는 가설을 테스트하는 것 * 기여를 하지 않았다면 * 기울기가 평균값과 같은 역할밖에 하지 못했다는 뜻이므로 * ''(b - 0)/se.b'' 를 이용해서 테스트. * 이 값이 t 값 (t.cal) ===== 11 =====


> ##################################
> # gradient descent
> ##################################
> predict <- function(x, a, b){
+   return (a + b * x)
+ }
> 
> residuals <- function(predictions, y) {
+   return(y - predictions)
+ }
> 
> gradient <- function(x, y, predictions){
+   error = y - predictions
+   db = -2 * mean(x * error)
+   da = -2 * mean(error)
+   return(list("b" = db, "a" = da))
+ }
> 
> mseloss <- function(predictions, y) {
+   residuals <- (y - predictions)
+   return(mean(residuals^2))
+ }
>

++++++++++++++++++++ ===== 12 =====


> # Train the model with scaled features
> learning.rate = 1e-1
>

++++++++++++++++++++ ===== 13 =====


> a <- rnorm(1)
> b <- rnorm(1)
> a.init <- a
> b.init <- b
> 
> zx <- (x-mean(x))/sd(x)
>

++++++++++++++++++++ ===== 14 =====


> nlen <- 75
> for (epoch in 1:nlen) {
+   predictions <- predict(zx, a, b)
+ 
+   grad <- gradient(zx, y, predictions)
+   
+   step.b <- grad$b * learning.rate 
+   step.a <- grad$a * learning.rate
+   b <- b-step.b
+   a <- a-step.a
+ }
> 
> # scaled a and b
> a
[1] 500
> b
[1] 58.5766
> 
> # unscale coefficients to make them comprehensible
> # see http://commres.net/wiki/gradient_descent#why_normalize_scale_or_make_z-score_xi
> # and 
> # http://commres.net/wiki/gradient_descent#how_to_unnormalize_unscale_a_and_b
> #  
> a =  a - (mean(x) / sd(x)) * b
> b =  b / sd(x)
> a
[1] -85.76604
> b
[1] 5.85766
> 
> cat(paste0("Intercept: ", a, "\n", "Slope: ", b, "\n"))
Intercept: -85.7660381466164
Slope: 5.85766011248848
> summary(lm(y~x))$coefficients
              Estimate Std. Error   t value     Pr(>|t|)
(Intercept) -85.766064  60.252573 -1.423442 1.637203e-01
x             5.857661   0.599618  9.768988 2.115182e-11
>

++++++++++++++++++++