Differences

This shows you the differences between two versions of the page.

--- gradient_descent:code01 [2025/12/18 02:12] – created hkimscil
+++ gradient_descent:code01 [2025/12/18 19:04] (current) – hkimscil
@@ Line 1: / Line 1: @@
 <code>
+library(tidyverse)
+library(data.table)
 library(ggplot2)
 library(ggpmisc)
-library(tidyverse)
-library(data.table)
-# settle down
-rm(list=ls())
-ss <- function(x) {
-  return(sum((x-mean(x))^2))
-}
 # data preparation
@@ Line 36: / Line 29: @@
   theme_classic()
-# from what we know
+# Initialize random betas
-# get covariance value
+# 우선 b를 고정하고 a만
-sp.yx <- sum((x-mean(x))*(y-mean(y)))
+# 변화시켜서 이해
-df.yx <- length(y)-1
+b <- summary(mo)$coefficients[2]
-sp.yx/df.yx
+a <- 0
-# check with cov function
-cov(x,y)
-# correlation value
-cov(x,y)/(sd(x)*sd(y))
-cor(x,y)
-# regression by hand
+b.init <- b
-# b and a
+a.init <- a
-b <- sp.yx / ss(x) # b2 <- cov(x,y)/var(x)
-a <- mean(y) - b*(mean(x))
-a
-b
-# check a and b value from the lm
-summary(mo)$coefficient[1]
-summary(mo)$coefficient[2]
-summary(mo)
-fit.yx <- a + b*x # predicted value of y from x data
-res <- y - fit.yx # error residuals
-reg <- fit.yx - mean(y) # error regressions
-ss.res <- sum(res^2)
-ss.reg <- sum(reg^2)
-ss.res+ss.reg
-ss.tot <- ss(y)
-ss.tot
-plot(x,y)
-abline(a, b, col="red", lwd=2)
-plot(x, fit.yx)
-plot(x, res)
-df.y <- length(y)-1
-df.reg <- 2-1
-df.res <- df.y - df.reg
-df.res
-r.sq <- ss.reg / ss.tot
-r.sq
-summary(mo)$r.square
-ms.reg <- ss.reg / df.reg
-ms.res <- ss.res / df.res
-f.cal <- ms.reg / ms.res
-f.cal
-pf(f.cal, df.reg, df.res,lower.tail = F)
-t.cal <- sqrt(f.cal)
-t.cal
-se.b <- sqrt(ms.res/ss(x))
-se.b
-t.cal <- (b-0)/se.b
-t.cal
-pt(t.cal, df=df.res, lower.tail = F)*2
-summary(mo)
-# getting a and b from
-# gradient descent
-a <- rnorm(1)
-b <- rnorm(1)
-a.start <- a
-b.start <- b
-a.start
-b.start
 # Predict function:
@@ Line 113: / Line 44: @@
 # And loss function is:
-residuals <- function(fit, y) {
+residuals <- function(predictions, y) {
-  return(y - fit)
+  return(y - predictions)
 }
-gradient <- function(x, res){
+# we use sum of square of error which oftentimes become big
-  db = -2 * mean(x * res)
+ssrloss <- function(predictions, y) {
-  da = -2 * mean(res)
+  residuals <- (y - predictions)
-  return(list("b" = db, "a" = da))
+  return(sum(residuals^2))
 }
-# to check ms.residual
+ssrs <- c() # for sum  of square residuals
-msrloss <- function(fit, y) {
+srs <- c() # sum of residuals
-  res <- residuals(fit, y)
+as <- c() # for as (intercepts)
-  return(mean(res^2))
-}
-# Train the model with scaled features
+for (i in seq(from = -50, to = 50, by = 0.01)) {
-learning.rate = 1e-1 # 0.1
+  pred <- predict(x, i, b)
+  res <- residuals(pred, y)
-# Record Loss for each epoch:
+  ssr <- ssrloss(pred, y)
-as = c()
+  ssrs <- append(ssrs, ssr)
-bs = c()
+  srs <- append(srs, sum(res))
-msrs = c()
+  as <- append(as, i)
-ssrs = c()
-mres = c()
-zx <- (x-mean(x))/sd(x)
-nlen <- 75
-for (epoch in 1:nlen) {
-  fit.val <- predict(zx, a, b)
-  residual <- residuals(fit.val, y)
-  loss <- msrloss(fit.val, y)
-  mres <- append(mres, mean(residual))
-  msrs <- append(msrs, loss)
-  grad <- gradient(zx, residual)
-  step.b <- grad$b * learning.rate
-  step.a <- grad$a * learning.rate
-  b <- b-step.b
-  a <- a-step.a
-  as <- append(as, a)
-  bs <- append(bs, b)
 }
-msrs
+length(ssrs)
-mres
+length(srs)
-as
+length(as)
-bs
-# scaled
+min(ssrs)
-a
+min.pos.ssrs <- which(ssrs == min(ssrs))
-b
+min.pos.ssrs
+print(as[min.pos.ssrs])
-# unscale coefficients to make them comprehensible
-# see http://commres.net/wiki/gradient_descent#why_normalize_scale_or_make_z-score_xi
-# and
-# http://commres.net/wiki/gradient_descent#how_to_unnormalize_unscale_a_and_b
-#
-a =  a - (mean(x) / sd(x)) * b
-b =  b / sd(x)
-a
-b
-# changes of estimators
-as <- as - (mean(x) /sd(x)) * bs
-bs <- bs / sd(x)
-as
-bs
-mres
-msrs
-parameters <- data.frame(as, bs, mres, msrs)
-cat(paste0("Intercept: ", a, "\n", "Slope: ", b, "\n"))
-summary(mo)$coefficients
-msrs <- data.frame(msrs)
-msrs.log <- data.table(epoch = 1:nlen, msrs)
-ggplot(msrs.log, aes(epoch, msrs)) +
-  geom_line(color="blue") +
-  theme_classic()
-mres <- data.frame(mres)
-mres.log <- data.table(epoch = 1:nlen, mres)
-ggplot(mres.log, aes(epoch, mres)) +
-  geom_line(color="red") +
-  theme_classic()
-ch <- data.frame(mres, msrs)
-ch
-max(y)
-ggplot(data, aes(x = x, y = y)) +
-  geom_point(size = 2) +
-  geom_abline(aes(intercept = as, slope = bs),
-              data = parameters, linewidth = 0.5,
-              color = 'green') +
-  stat_poly_line() +
-  stat_poly_eq(use_label(c("eq", "R2"))) +
-  theme_classic() +
-  geom_abline(aes(intercept = as, slope = bs),
-              data = parameters %>% slice_head(),
-              linewidth = 1, color = 'blue') +
-  geom_abline(aes(intercept = as, slope = bs),
-              data = parameters %>% slice_tail(),
-              linewidth = 1, color = 'red') +
-  labs(title = 'Gradient descent. blue: start, red: end, green: gradients')
 summary(mo)
-a.start
+plot(seq(1, length(ssrs)), ssrs)
-b.start
+plot(seq(1, length(ssrs)), srs)
-a
+tail(ssrs)
-b
+max(ssrs)
-summary(mo)$coefficient[1]
+min(ssrs)
-summary(mo)$coefficient[2]
+tail(srs)
+max(srs)
+min(srs)
 </code>