gradient_descent:code01
Differences
This shows you the differences between two versions of the page.
| Next revision | Previous revision | ||
| gradient_descent:code01 [2025/12/18 02:12] – created hkimscil | gradient_descent:code01 [2025/12/18 19:04] (current) – hkimscil | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| < | < | ||
| + | library(tidyverse) | ||
| + | library(data.table) | ||
| library(ggplot2) | library(ggplot2) | ||
| library(ggpmisc) | library(ggpmisc) | ||
| - | library(tidyverse) | ||
| - | library(data.table) | ||
| - | |||
| - | # settle down | ||
| - | rm(list=ls()) | ||
| - | |||
| - | ss <- function(x) { | ||
| - | return(sum((x-mean(x))^2)) | ||
| - | } | ||
| # data preparation | # data preparation | ||
| Line 36: | Line 29: | ||
| theme_classic() | theme_classic() | ||
| - | # from what we know | + | # Initialize random betas |
| - | # get covariance value | + | # 우선 b를 고정하고 a만 |
| - | sp.yx <- sum((x-mean(x))*(y-mean(y))) | + | # 변화시켜서 이해 |
| - | df.yx <- length(y)-1 | + | b <- summary(mo)$coefficients[2] |
| - | sp.yx/df.yx | + | a <- 0 |
| - | # check with cov function | + | |
| - | cov(x,y) | + | |
| - | # correlation value | + | |
| - | cov(x, | + | |
| - | cor(x,y) | + | |
| - | # regression by hand | + | b.init <- b |
| - | # b and a | + | a.init <- a |
| - | b <- sp.yx / ss(x) # b2 <- cov(x, | + | |
| - | a <- mean(y) | + | |
| - | a | + | |
| - | b | + | |
| - | + | ||
| - | # check a and b value from the lm | + | |
| - | summary(mo)$coefficient[1] | + | |
| - | summary(mo)$coefficient[2] | + | |
| - | summary(mo) | + | |
| - | + | ||
| - | fit.yx <- a + b*x # predicted value of y from x data | + | |
| - | res <- y - fit.yx # error residuals | + | |
| - | reg <- fit.yx - mean(y) # error regressions | + | |
| - | ss.res <- sum(res^2) | + | |
| - | ss.reg <- sum(reg^2) | + | |
| - | ss.res+ss.reg | + | |
| - | ss.tot <- ss(y) | + | |
| - | ss.tot | + | |
| - | + | ||
| - | plot(x,y) | + | |
| - | abline(a, b, col=" | + | |
| - | plot(x, fit.yx) | + | |
| - | plot(x, res) | + | |
| - | + | ||
| - | df.y <- length(y)-1 | + | |
| - | df.reg <- 2-1 | + | |
| - | df.res <- df.y - df.reg | + | |
| - | df.res | + | |
| - | + | ||
| - | r.sq <- ss.reg / ss.tot | + | |
| - | r.sq | + | |
| - | summary(mo)$r.square | + | |
| - | ms.reg <- ss.reg / df.reg | + | |
| - | ms.res <- ss.res / df.res | + | |
| - | + | ||
| - | + | ||
| - | f.cal <- ms.reg / ms.res | + | |
| - | f.cal | + | |
| - | pf(f.cal, df.reg, df.res, | + | |
| - | t.cal <- sqrt(f.cal) | + | |
| - | t.cal | + | |
| - | se.b <- sqrt(ms.res/ | + | |
| - | se.b | + | |
| - | t.cal <- (b-0)/ | + | |
| - | t.cal | + | |
| - | pt(t.cal, df=df.res, lower.tail = F)*2 | + | |
| - | summary(mo) | + | |
| - | + | ||
| - | + | ||
| - | # getting a and b from | + | |
| - | # gradient descent | + | |
| - | a <- rnorm(1) | + | |
| - | b <- rnorm(1) | + | |
| - | a.start <- a | + | |
| - | b.start <- b | + | |
| - | a.start | + | |
| - | b.start | + | |
| # Predict function: | # Predict function: | ||
| Line 113: | Line 44: | ||
| # And loss function is: | # And loss function is: | ||
| - | residuals <- function(fit, y) { | + | residuals <- function(predictions, y) { |
| - | return(y - fit) | + | return(y - predictions) |
| } | } | ||
| - | gradient | + | # we use sum of square of error which oftentimes become big |
| - | | + | ssrloss |
| - | da = -2 * mean(res) | + | |
| - | return(list(" | + | return(sum(residuals^2)) |
| } | } | ||
| - | # to check ms.residual | + | ssrs <- c() # for sum of square residuals |
| - | msrloss | + | srs <- c() # sum of residuals |
| - | | + | as <- c() # for as (intercepts) |
| - | return(mean(res^2)) | + | |
| - | } | + | |
| - | # Train the model with scaled features | + | for (i in seq(from = -50, to = 50, by = 0.01)) { |
| - | learning.rate = 1e-1 # 0.1 | + | |
| - | + | | |
| - | # Record Loss for each epoch: | + | |
| - | as = c() | + | |
| - | bs = c() | + | |
| - | msrs = c() | + | as <- append(as, |
| - | ssrs = c() | + | |
| - | mres = c() | + | |
| - | zx <- (x-mean(x))/ | + | |
| - | + | ||
| - | nlen <- 75 | + | |
| - | for (epoch in 1:nlen) { | + | |
| - | | + | |
| - | | + | |
| - | | + | |
| - | | + | |
| - | | + | |
| - | + | ||
| - | grad <- gradient(zx, | + | |
| - | step.b <- grad$b * learning.rate | + | |
| - | step.a <- grad$a * learning.rate | + | |
| - | b <- b-step.b | + | |
| - | a <- a-step.a | + | |
| - | | + | |
| - | as <- append(as, | + | |
| - | bs <- append(bs, b) | + | |
| } | } | ||
| - | msrs | + | length(ssrs) |
| - | mres | + | length(srs) |
| - | as | + | length(as) |
| - | bs | + | |
| - | # scaled | + | min(ssrs) |
| - | a | + | min.pos.ssrs <- which(ssrs == min(ssrs)) |
| - | b | + | min.pos.ssrs |
| - | + | print(as[min.pos.ssrs]) | |
| - | # unscale coefficients to make them comprehensible | + | |
| - | # see http:// | + | |
| - | # and | + | |
| - | # http:// | + | |
| - | # | + | |
| - | a = a - (mean(x) / sd(x)) * b | + | |
| - | b = b / sd(x) | + | |
| - | a | + | |
| - | b | + | |
| - | + | ||
| - | # changes of estimators | + | |
| - | as <- as - (mean(x) /sd(x)) * bs | + | |
| - | bs <- bs / sd(x) | + | |
| - | + | ||
| - | as | + | |
| - | bs | + | |
| - | mres | + | |
| - | msrs | + | |
| - | + | ||
| - | parameters <- data.frame(as, bs, mres, msrs) | + | |
| - | + | ||
| - | cat(paste0(" | + | |
| - | + | ||
| - | summary(mo)$coefficients | + | |
| - | + | ||
| - | msrs <- data.frame(msrs) | + | |
| - | msrs.log | + | |
| - | ggplot(msrs.log, | + | |
| - | geom_line(color=" | + | |
| - | theme_classic() | + | |
| - | + | ||
| - | mres <- data.frame(mres) | + | |
| - | mres.log <- data.table(epoch = 1:nlen, mres) | + | |
| - | ggplot(mres.log, aes(epoch, mres)) + | + | |
| - | geom_line(color=" | + | |
| - | theme_classic() | + | |
| - | + | ||
| - | ch <- data.frame(mres, | + | |
| - | ch | + | |
| - | max(y) | + | |
| - | ggplot(data, | + | |
| - | geom_point(size = 2) + | + | |
| - | geom_abline(aes(intercept = as, slope = bs), | + | |
| - | data = parameters, linewidth = 0.5, | + | |
| - | color = ' | + | |
| - | stat_poly_line() + | + | |
| - | stat_poly_eq(use_label(c(" | + | |
| - | theme_classic() + | + | |
| - | geom_abline(aes(intercept = as, slope = bs), | + | |
| - | data = parameters %>% slice_head(), | + | |
| - | linewidth = 1, color = ' | + | |
| - | geom_abline(aes(intercept = as, slope = bs), | + | |
| - | data = parameters %>% slice_tail(), | + | |
| - | linewidth = 1, color = ' | + | |
| - | labs(title = ' | + | |
| summary(mo) | summary(mo) | ||
| - | a.start | + | plot(seq(1, length(ssrs)), |
| - | b.start | + | plot(seq(1, length(ssrs)), |
| - | a | + | tail(ssrs) |
| - | b | + | max(ssrs) |
| - | summary(mo)$coefficient[1] | + | min(ssrs) |
| - | summary(mo)$coefficient[2] | + | tail(srs) |
| + | max(srs) | ||
| + | min(srs) | ||
| </ | </ | ||
gradient_descent/code01.1766023949.txt.gz · Last modified: by hkimscil
