gradient_descent:code01
This is an old revision of the document!
library(ggplot2)
library(ggpmisc)
library(tidyverse)
library(data.table)
# settle down
rm(list=ls())
ss <- function(x) {
return(sum((x-mean(x))^2))
}
# data preparation
set.seed(101)
nx <- 50 # variable x, sample size
mx <- 4.5 # mean of x
sdx <- mx * 0.56 # sd of x
x <- rnorm(nx, mx, sdx) # generating x
slp <- 4 # slop of x = coefficient, b
# y variable
y <- slp * x + rnorm(nx, 0, slp*3*sdx)
data <- data.frame(x, y)
head(data)
# check with regression
mo <- lm(y ~ x, data = data)
summary(mo)
# graph
ggplot(data = data, aes(x = x, y = y)) +
geom_point() +
stat_poly_line() +
stat_poly_eq(use_label(c("eq", "R2"))) +
theme_classic()
# from what we know
# get covariance value
sp.yx <- sum((x-mean(x))*(y-mean(y)))
df.yx <- length(y)-1
sp.yx/df.yx
# check with cov function
cov(x,y)
# correlation value
cov(x,y)/(sd(x)*sd(y))
cor(x,y)
# regression by hand
# b and a
b <- sp.yx / ss(x) # b2 <- cov(x,y)/var(x)
a <- mean(y) - b*(mean(x))
a
b
# check a and b value from the lm
summary(mo)$coefficient[1]
summary(mo)$coefficient[2]
summary(mo)
fit.yx <- a + b*x # predicted value of y from x data
res <- y - fit.yx # error residuals
reg <- fit.yx - mean(y) # error regressions
ss.res <- sum(res^2)
ss.reg <- sum(reg^2)
ss.res+ss.reg
ss.tot <- ss(y)
ss.tot
plot(x,y)
abline(a, b, col="red", lwd=2)
plot(x, fit.yx)
plot(x, res)
df.y <- length(y)-1
df.reg <- 2-1
df.res <- df.y - df.reg
df.res
r.sq <- ss.reg / ss.tot
r.sq
summary(mo)$r.square
ms.reg <- ss.reg / df.reg
ms.res <- ss.res / df.res
f.cal <- ms.reg / ms.res
f.cal
pf(f.cal, df.reg, df.res,lower.tail = F)
t.cal <- sqrt(f.cal)
t.cal
se.b <- sqrt(ms.res/ss(x))
se.b
t.cal <- (b-0)/se.b
t.cal
pt(t.cal, df=df.res, lower.tail = F)*2
summary(mo)
# getting a and b from
# gradient descent
a <- rnorm(1)
b <- rnorm(1)
a.start <- a
b.start <- b
a.start
b.start
# Predict function:
predict <- function(x, a, b){
return (a + b * x)
}
# And loss function is:
residuals <- function(fit, y) {
return(y - fit)
}
gradient <- function(x, res){
db = -2 * mean(x * res)
da = -2 * mean(res)
return(list("b" = db, "a" = da))
}
# to check ms.residual
msrloss <- function(fit, y) {
res <- residuals(fit, y)
return(mean(res^2))
}
# Train the model with scaled features
learning.rate = 1e-1 # 0.1
# Record Loss for each epoch:
as = c()
bs = c()
msrs = c()
ssrs = c()
mres = c()
zx <- (x-mean(x))/sd(x)
nlen <- 75
for (epoch in 1:nlen) {
fit.val <- predict(zx, a, b)
residual <- residuals(fit.val, y)
loss <- msrloss(fit.val, y)
mres <- append(mres, mean(residual))
msrs <- append(msrs, loss)
grad <- gradient(zx, residual)
step.b <- grad$b * learning.rate
step.a <- grad$a * learning.rate
b <- b-step.b
a <- a-step.a
as <- append(as, a)
bs <- append(bs, b)
}
msrs
mres
as
bs
# scaled
a
b
# unscale coefficients to make them comprehensible
# see http://commres.net/wiki/gradient_descent#why_normalize_scale_or_make_z-score_xi
# and
# http://commres.net/wiki/gradient_descent#how_to_unnormalize_unscale_a_and_b
#
a = a - (mean(x) / sd(x)) * b
b = b / sd(x)
a
b
# changes of estimators
as <- as - (mean(x) /sd(x)) * bs
bs <- bs / sd(x)
as
bs
mres
msrs
parameters <- data.frame(as, bs, mres, msrs)
cat(paste0("Intercept: ", a, "\n", "Slope: ", b, "\n"))
summary(mo)$coefficients
msrs <- data.frame(msrs)
msrs.log <- data.table(epoch = 1:nlen, msrs)
ggplot(msrs.log, aes(epoch, msrs)) +
geom_line(color="blue") +
theme_classic()
mres <- data.frame(mres)
mres.log <- data.table(epoch = 1:nlen, mres)
ggplot(mres.log, aes(epoch, mres)) +
geom_line(color="red") +
theme_classic()
ch <- data.frame(mres, msrs)
ch
max(y)
ggplot(data, aes(x = x, y = y)) +
geom_point(size = 2) +
geom_abline(aes(intercept = as, slope = bs),
data = parameters, linewidth = 0.5,
color = 'green') +
stat_poly_line() +
stat_poly_eq(use_label(c("eq", "R2"))) +
theme_classic() +
geom_abline(aes(intercept = as, slope = bs),
data = parameters %>% slice_head(),
linewidth = 1, color = 'blue') +
geom_abline(aes(intercept = as, slope = bs),
data = parameters %>% slice_tail(),
linewidth = 1, color = 'red') +
labs(title = 'Gradient descent. blue: start, red: end, green: gradients')
summary(mo)
a.start
b.start
a
b
summary(mo)$coefficient[1]
summary(mo)$coefficient[2]
gradient_descent/code01.1766023949.txt.gz · Last modified: by hkimscil
