why_n-1_gradient_explanation
This is an old revision of the document!
#library(ggplot2) #library(ggpmisc) rm(list=ls()) rnorm2 <- function(n,mean,sd){ mean+sd*scale(rnorm(n)) } # set.seed(191) nx <- 1000 mx <- 50 sdx <- mx * 0.15 x <- rnorm2(nx, mx, sdx) mean(x) sd(x) length(x) hist(x) x.span <- seq(from = mean(x)-4*sd(x), to = mean(x)+4*sd(x), by = 0.1) residuals <- function(x, v) { return(x - v) } ssr <- function(x, v) { residuals <- (x - v) return(sum(residuals^2)) } msr <- function(x, v) { residuals <- (x - v) # return((sum(residuals^2))/(length(x)-1)) return((mean(residuals^2))) } srs <- c() # sum of residuals ssrs <- c() # sum of square residuals msrs <- c() # mean square residuals = variance vs <- c() # the value of v in (x - v) for (i in x.span) { res.x <- residuals(x,i) srs.x <- sum(res.x) ssr.x <- ssr(x,i) msr.x <- msr(x,i) srs <- append(srs, srs.x) ssrs <- append(ssrs, ssr.x) msrs <- append(msrs, msr.x) vs <- append(vs, i) } plot(ssrs) plot(msrs) plot(srs) min(msrs) min.pos.msrs <- which(msrs == min(msrs)) min.pos.msrs print(vs[min.pos.msrs]) mean(x) plot(vs, msrs) plot(vs, ssrs) # the above no gradient # mse 값으로 계산 rather than sse # 후자는 값이 너무 커짐 gradient <- function(x, v){ residuals = x - v # y = (x-v)^2 # dy/dv = 2(x-v)*-1 chain rule # dy/dv = -2(x-v) dx = -2 * mean(residuals) return(list("ds" = dx)) } # function returns ds value residuals <- function(x, v) { return(x - v) } ssr <- function(x, v) { residuals <- (x - v) return(sum(residuals^2)) } msr <- function(x, v) { residuals <- (x - v) return((sum(residuals^2))/(length(x)-1)) # return(mean(residuals^2)) } # pick one random v in (x-v) v <- rnorm(1) # Train the model with scaled features learning.rate = 1e-1 grads <- c() ssrs <- c() msrs <- c() mres <- c() vs <- c() steps <- c() # Record Loss for each epoch: zx <- (x-mean(x))/sd(x) nlen <- 75 for (epoch in 1:nlen) { residual <- residuals(zx, v) # ssr.x <- ssr(zx, v) # msr.x <- msr(zx, v) # ssrs <- append(ssrs, ssr.x) # msrs <- append(msrs, msr.x) grad <- gradient(zx, v) # grads <- append(grads, grad$ds) step.v <- grad$ds * learning.rate # steps <- append(steps, step.v) v <- v - step.v vs <- append(vs, v) } tail(grads) tail(srs) tail(msrs) tail(ssrs) tail(vs) plot(srs) plot(msrs) plot(ssrs) plot(vs) plot(grads) # scaled v # zx <- (x-mean(x))/sd(x) # v.표준화 <- (v.원래 - mean(x))/sd(x) v.orig <- (v*sd(x))+mean(x) v.orig steps vs.orig <- (vs*sd(x))+mean(x) vs.orig grads
why_n-1_gradient_explanation.1756949607.txt.gz · Last modified: 2025/09/04 10:33 by hkimscil