User Tools

Site Tools


why_n-1_gradient_explanation

This is an old revision of the document!


#library(ggplot2)
#library(ggpmisc)

rm(list=ls())
rnorm2 <- function(n,mean,sd){ 
  mean+sd*scale(rnorm(n)) 
}

# set.seed(191)
nx <- 1000
mx <- 50
sdx <- mx * 0.15
x <- rnorm2(nx, mx, sdx)

mean(x)
sd(x)
length(x)
hist(x)

x.span <- seq(from = mean(x)-4*sd(x), 
              to = mean(x)+4*sd(x), 
              by = 0.1)

residuals <- function(x, v) {
  return(x - v)
}

ssr <- function(x, v) {
  residuals <- (x - v)
  return(sum(residuals^2))
}

msr <- function(x, v) {
  residuals <- (x - v)
#  return((sum(residuals^2))/(length(x)-1))
  return((mean(residuals^2)))
}

srs <- c() # sum of residuals
ssrs <- c() # sum of square residuals
msrs <- c() # mean square residuals = variance
vs <- c() # the value of v in (x - v)

for (i in x.span) {
  res.x <- residuals(x,i)
  srs.x <- sum(res.x)  
  ssr.x <- ssr(x,i)
  msr.x <- msr(x,i)
  srs <- append(srs, srs.x)
  ssrs <- append(ssrs, ssr.x)
  msrs <- append(msrs, msr.x)
  vs <- append(vs, i)
}
plot(ssrs)
plot(msrs)
plot(srs)

min(msrs)
min.pos.msrs <- which(msrs == min(msrs))
min.pos.msrs
print(vs[min.pos.msrs])
mean(x)
plot(vs, msrs)
plot(vs, ssrs)

# the above no gradient
# mse 값으로 계산 rather than sse 
# 후자는 값이 너무 커짐

gradient <- function(x, v){
  residuals = x - v
  # y = (x-v)^2
  # dy/dv = 2(x-v)*-1 chain rule
  # dy/dv = -2(x-v)
  dx = -2 * mean(residuals)
  return(list("ds" = dx))
} # function returns ds value

residuals <- function(x, v) {
  return(x - v)
}

ssr <- function(x, v) {
  residuals <- (x - v)
  return(sum(residuals^2))
}

msr <- function(x, v) {
  residuals <- (x - v)
  return((sum(residuals^2))/(length(x)-1))
#  return(mean(residuals^2))
}

# pick one random v in (x-v)
v <- rnorm(1)
# Train the model with scaled features
learning.rate = 1e-1

grads <- c()
ssrs <- c()
msrs <- c()
mres <- c()
vs <- c()
steps <- c()
# Record Loss for each epoch:
zx <- (x-mean(x))/sd(x)

nlen <- 75
for (epoch in 1:nlen) {
  residual <- residuals(zx, v)
  # ssr.x <- ssr(zx, v)
  # msr.x <- msr(zx, v)
  # ssrs <- append(ssrs, ssr.x)
  # msrs <- append(msrs, msr.x)
  
  grad <- gradient(zx, v)
  # grads <- append(grads, grad$ds)
  step.v <- grad$ds * learning.rate
  # steps <- append(steps, step.v)
  v <- v - step.v
  vs <- append(vs, v)
}

tail(grads)
tail(srs)
tail(msrs)
tail(ssrs)
tail(vs)

plot(srs)
plot(msrs)
plot(ssrs)
plot(vs)
plot(grads)
# scaled
v
# zx <- (x-mean(x))/sd(x)
# v.표준화 <- (v.원래 - mean(x))/sd(x)
v.orig <- (v*sd(x))+mean(x) 
v.orig

steps

vs.orig <- (vs*sd(x))+mean(x) 
vs.orig
grads

why_n-1_gradient_explanation.1756949607.txt.gz · Last modified: 2025/09/04 10:33 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki