why_n-1_gradient_explanation
This is an old revision of the document!
#library(ggplot2)
#library(ggpmisc)
rm(list=ls())
# rnorm 펑션을 변형한 펑션으로
# mean값과 sd값을 같는 n개의 샘플원소를
# 구한다. (샘플의 평균과 표준편차가
# 정확히 원하는 값이 되도록 함)
rnorm2 <- function(n,mean,sd){
mean+sd*scale(rnorm(n))
}
# set.seed(191)
nx <- 1000
mx <- 50
sdx <- mx * 0.1
sdx # 5
x <- rnorm2(nx, mx, sdx)
# x <- rnorm2(1000, 50, 5) 와 동일
mean(x)
sd(x)
length(x)
hist(x)
x.span <- seq(from = min(x),
to = max(x),
by = 1)
residuals <- function(x, v) {
return(x - v)
}
# sum of square residual 값을
# 구하는 펑션
ssr <- function(x, v) {
residuals <- (x - v)
return(sum(residuals^2))
}
# mean square residual 값을
# 구하는 펑션 (mean square
# residual = variance)
msr <- function(x, v) {
residuals <- (x - v)
# return((sum(residuals^2))/(length(x)-1))
return((mean(residuals^2)))
}
ssrs <- c() # sum of square residuals
msrs <- c() # mean square residuals = variance
vs <- c() # the value of v in (x - v)
x.span
# x.span의 값들을 v값으로 삼아
# sum(x-x.span)^2 처럼 구하면
# SS값을 구한 것이 된다.
# 우리가 배운 SS값은 x.span의
# 값으로 샘플의 평균을 사용했을 때의
# residual 값이다.
for (i in x.span) {
res.x <- residuals(x,i)
srs.x <- sum(res.x)
ssr.x <- ssr(x,i)
msr.x <- msr(x,i)
srs <- append(srs, srs.x)
ssrs <- append(ssrs, ssr.x)
msrs <- append(msrs, msr.x)
vs <- append(vs, i)
}
plot(ssrs)
plot(msrs)
plot(srs)
min(msrs)
min.pos.msrs <- which(msrs == min(msrs))
min.pos.msrs
print(vs[min.pos.msrs])
mean(x)
plot(vs, msrs)
plot(vs, ssrs)
# the above no gradient
# mse 값으로 계산 rather than sse
# 후자는 값이 너무 커짐
gradient <- function(x, v){
residuals = x - v
# y = (x-v)^2
# dy/dv = 2(x-v)*-1 chain rule
# dy/dv = -2(x-v)
dx = -2 * mean(residuals)
return(list("ds" = dx))
} # function returns ds value
residuals <- function(x, v) {
return(x - v)
}
ssr <- function(x, v) {
residuals <- (x - v)
return(sum(residuals^2))
}
msr <- function(x, v) {
residuals <- (x - v)
return((sum(residuals^2))/(length(x)-1))
# return(mean(residuals^2))
}
# pick one random v in (x-v)
v <- rnorm(1)
# Train the model with scaled features
learning.rate = 1e-1
grads <- c()
ssrs <- c()
msrs <- c()
mres <- c()
vs <- c()
steps <- c()
# Record Loss for each epoch:
zx <- (x-mean(x))/sd(x)
nlen <- 75
for (epoch in 1:nlen) {
residual <- residuals(zx, v)
# ssr.x <- ssr(zx, v)
# msr.x <- msr(zx, v)
# ssrs <- append(ssrs, ssr.x)
# msrs <- append(msrs, msr.x)
grad <- gradient(zx, v)
# grads <- append(grads, grad$ds)
step.v <- grad$ds * learning.rate
# steps <- append(steps, step.v)
v <- v - step.v
vs <- append(vs, v)
}
tail(grads)
tail(srs)
tail(msrs)
tail(ssrs)
tail(vs)
plot(srs)
plot(msrs)
plot(ssrs)
plot(vs)
plot(grads)
# scaled
v
# zx <- (x-mean(x))/sd(x)
# v.표준화 <- (v.원래 - mean(x))/sd(x)
v.orig <- (v*sd(x))+mean(x)
v.orig
steps
vs.orig <- (vs*sd(x))+mean(x)
vs.orig
grads
why_n-1_gradient_explanation.1756955759.txt.gz · Last modified: by hkimscil
