User Tools

Site Tools


gradient_descent:code01

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Next revision
Previous revision
gradient_descent:code01 [2025/12/18 02:12] – created hkimscilgradient_descent:code01 [2025/12/18 19:04] (current) hkimscil
Line 1: Line 1:
 <code> <code>
 +library(tidyverse)
 +library(data.table)
 library(ggplot2) library(ggplot2)
 library(ggpmisc) library(ggpmisc)
-library(tidyverse) 
-library(data.table) 
- 
-# settle down 
-rm(list=ls()) 
- 
-ss <- function(x) {  
-  return(sum((x-mean(x))^2)) 
-} 
  
 # data preparation # data preparation
Line 36: Line 29:
   theme_classic()    theme_classic() 
  
-from what we know +Initialize random betas 
-get covariance value +우선 b를 고정하고 a만  
-sp.yx <- sum((x-mean(x))*(y-mean(y))+# 변화시켜서 이해 
-df.yx <- length(y)-1 +<- summary(mo)$coefficients[2] 
-sp.yx/df.yx  +<- 0
-# check with cov function +
-cov(x,y) +
-# correlation value +
-cov(x,y)/(sd(x)*sd(y)) +
-cor(x,y)+
  
-# regression by hand  +b.init <- b 
-and a  +a.init <- a
-b <- sp.yx / ss(x) # b2 <- cov(x,y)/var(x) +
-a <- mean(y) - b*(mean(x)) +
-a +
-+
- +
-# check a and b value from the lm  +
-summary(mo)$coefficient[1] +
-summary(mo)$coefficient[2] +
-summary(mo) +
- +
-fit.yx <- a + b*x # predicted value of y from x data +
-res <- y - fit.yx # error residuals +
-reg <- fit.yx - mean(y) # error regressions  +
-ss.res <- sum(res^2) +
-ss.reg <- sum(reg^2) +
-ss.res+ss.reg +
-ss.tot <- ss(y) +
-ss.tot +
- +
-plot(x,y) +
-abline(a, b, col="red", lwd=2) +
-plot(x, fit.yx) +
-plot(x, res) +
- +
-df.y <- length(y)-1 +
-df.reg <- 2-1 +
-df.res <- df.y - df.reg +
-df.res +
- +
-r.sq <- ss.reg / ss.tot +
-r.sq +
-summary(mo)$r.square +
-ms.reg <- ss.reg / df.reg +
-ms.res <- ss.res / df.res +
- +
- +
-f.cal <- ms.reg / ms.res +
-f.cal  +
-pf(f.cal, df.reg, df.res,lower.tail = F) +
-t.cal <- sqrt(f.cal) +
-t.cal +
-se.b <- sqrt(ms.res/ss(x)) +
-se.b +
-t.cal <- (b-0)/se.b +
-t.cal +
-pt(t.cal, df=df.res, lower.tail = F)*2 +
-summary(mo) +
- +
- +
-# getting a and b from +
-# gradient descent +
-a <- rnorm(1) +
-b <- rnorm(1) +
-a.start <- a +
-b.start <- b +
-a.start +
-b.start+
  
 # Predict function: # Predict function:
Line 113: Line 44:
  
 # And loss function is: # And loss function is:
-residuals <- function(fit, y) { +residuals <- function(predictions, y) { 
-  return(y - fit)+  return(y - predictions)
 } }
  
-gradient <- function(xres){ +# we use sum of square of error which oftentimes become big 
-  db = -2 * mean(x * res) +ssrloss <- function(predictionsy) { 
-  da = -2 * mean(res+  residuals <- (predictions
-  return(list("b" = db, "a" = da))+  return(sum(residuals^2))
 } }
  
-to check ms.residual +ssrs <- c() for sum  of square residuals 
-msrloss <- function(fit, y{ +srs <- c() # sum of residuals  
-  res <- residuals(fit, y) +as <- c() # for as (intercepts)
-  return(mean(res^2)+
-}+
  
-# Train the model with scaled features +for (i in seq(from -50, to 50, by 0.01)) { 
-learning.rate = 1e-1 # 0.1 +  pred <- predict(xi, b) 
- +  res <- residuals(pred, y) 
-# Record Loss for each epoch: +  ssr <- ssrloss(pred, y) 
-as = c(+  ssrs <- append(ssrsssr
-bs = c(+  srs <- append(srssum(res)) 
-msrs c() +  as <- append(as, i)
-ssrs c() +
-mres c() +
-zx <- (x-mean(x))/sd(x) +
- +
-nlen <- 75 +
-for (epoch in 1:nlen) { +
-  fit.val <- predict(zxa, b) +
-  residual <- residuals(fit.val, y) +
-  loss <- msrloss(fit.val, y) +
-  mres <- append(mresmean(residual)+
-  msrs <- append(msrsloss) +
-   +
-  grad <- gradient(zx, residual) +
-  step.b <- grad$b * learning.rate  +
-  step.a <- grad$a * learning.rate +
-  b <- b-step.b +
-  a <- a-step.a +
-   +
-  as <- append(as, a) +
-  bs <- append(bs, b)+
 } }
-msrs +length(ssrs) 
-mres +length(srs) 
-as +length(as)
-bs+
  
-# scaled +min(ssrs
-+min.pos.ssrs <- which(ssrs == min(ssrs)) 
-+min.pos.ssrs 
- +print(as[min.pos.ssrs])
-# unscale coefficients to make them comprehensible +
-# see http://commres.net/wiki/gradient_descent#why_normalize_scale_or_make_z-score_xi +
-# and  +
-# http://commres.net/wiki/gradient_descent#how_to_unnormalize_unscale_a_and_b +
-#   +
-a =  a - (mean(x) / sd(x)* b +
-b =  b / sd(x) +
-+
-+
- +
-# changes of estimators +
-as <- as - (mean(x) /sd(x)) * bs +
-bs <- bs / sd(x) +
- +
-as +
-bs +
-mres +
-msrs +
- +
-parameters <- data.frame(as, bs, mres, msrs) +
- +
-cat(paste0("Intercept: ", a, "\n", "Slope: ", b, "\n")) +
- +
-summary(mo)$coefficients +
- +
-msrs <- data.frame(msrs) +
-msrs.log <- data.table(epoch 1:nlen, msrs) +
-ggplot(msrs.log, aes(epoch, msrs)) + +
-  geom_line(color="blue") + +
-  theme_classic() +
- +
-mres <- data.frame(mres+
-mres.log <- data.table(epoch = 1:nlen, mres) +
-ggplot(mres.log, aes(epoch, mres)) + +
-  geom_line(color="red") + +
-  theme_classic() +
- +
-ch <- data.frame(mres, msrs) +
-ch +
-max(y) +
-ggplot(data, aes(x = x, y = y)) +  +
-  geom_point(size = 2) +  +
-  geom_abline(aes(intercept = as, slope = bs), +
-              data = parameters, linewidth = 0.5,  +
-              color = 'green') +  +
-  stat_poly_line() + +
-  stat_poly_eq(use_label(c("eq", "R2"))) + +
-  theme_classic() + +
-  geom_abline(aes(intercept = as, slope = bs),  +
-              data = parameters %>% slice_head(),  +
-              linewidth = 1, color = 'blue') +  +
-  geom_abline(aes(intercept = as, slope = bs),  +
-              data = parameters %>% slice_tail(),  +
-              linewidth = 1, color = 'red') + +
-  labs(title = 'Gradient descentblue: start, red: end, green: gradients')+
 summary(mo) summary(mo)
-a.start +plot(seq(1, length(ssrs)), ssrs) 
-b.start +plot(seq(1, length(ssrs)), srs) 
-a +tail(ssrs) 
-b +max(ssrs) 
-summary(mo)$coefficient[1] +min(ssrs) 
-summary(mo)$coefficient[2]+tail(srs) 
 +max(srs
 +min(srs)
 </code> </code>
gradient_descent/code01.1766023949.txt.gz · Last modified: by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki