library(ggplot2) library(ggpmisc) library(tidyverse) library(data.table) # settle down rm(list=ls()) ss <- function(x) { return(sum((x-mean(x))^2)) } # data preparation set.seed(101) nx <- 50 # variable x, sample size mx <- 4.5 # mean of x sdx <- mx * 0.56 # sd of x x <- rnorm(nx, mx, sdx) # generating x slp <- 4 # slop of x = coefficient, b # y variable y <- slp * x + rnorm(nx, 0, slp*3*sdx) data <- data.frame(x, y) head(data) # check with regression mo <- lm(y ~ x, data = data) summary(mo) # graph ggplot(data = data, aes(x = x, y = y)) + geom_point() + stat_poly_line() + stat_poly_eq(use_label(c("eq", "R2"))) + theme_classic() # from what we know # get covariance value sp.yx <- sum((x-mean(x))*(y-mean(y))) df.yx <- length(y)-1 sp.yx/df.yx # check with cov function cov(x,y) # correlation value cov(x,y)/(sd(x)*sd(y)) cor(x,y) # regression by hand # b and a b <- sp.yx / ss(x) # b2 <- cov(x,y)/var(x) a <- mean(y) - b*(mean(x)) a b # check a and b value from the lm summary(mo)$coefficient[1] summary(mo)$coefficient[2] summary(mo) fit.yx <- a + b*x # predicted value of y from x data res <- y - fit.yx # error residuals reg <- fit.yx - mean(y) # error regressions ss.res <- sum(res^2) ss.reg <- sum(reg^2) ss.res+ss.reg ss.tot <- ss(y) ss.tot plot(x,y) abline(a, b, col="red", lwd=2) plot(x, fit.yx) plot(x, res) df.y <- length(y)-1 df.reg <- 2-1 df.res <- df.y - df.reg df.res r.sq <- ss.reg / ss.tot r.sq summary(mo)$r.square ms.reg <- ss.reg / df.reg ms.res <- ss.res / df.res f.cal <- ms.reg / ms.res f.cal pf(f.cal, df.reg, df.res,lower.tail = F) t.cal <- sqrt(f.cal) t.cal se.b <- sqrt(ms.res/ss(x)) se.b t.cal <- (b-0)/se.b t.cal pt(t.cal, df=df.res, lower.tail = F)*2 summary(mo) # getting a and b from # gradient descent a <- rnorm(1) b <- rnorm(1) a.start <- a b.start <- b a.start b.start # Predict function: predict <- function(x, a, b){ return (a + b * x) } # And loss function is: residuals <- function(fit, y) { return(y - fit) } gradient <- function(x, res){ db = -2 * mean(x * res) da = -2 * mean(res) return(list("b" = db, "a" = da)) } # to check ms.residual msrloss <- function(fit, y) { res <- residuals(fit, y) return(mean(res^2)) } # Train the model with scaled features learning.rate = 1e-1 # 0.1 # Record Loss for each epoch: as = c() bs = c() msrs = c() ssrs = c() mres = c() zx <- (x-mean(x))/sd(x) nlen <- 75 for (epoch in 1:nlen) { fit.val <- predict(zx, a, b) residual <- residuals(fit.val, y) loss <- msrloss(fit.val, y) mres <- append(mres, mean(residual)) msrs <- append(msrs, loss) grad <- gradient(zx, residual) step.b <- grad$b * learning.rate step.a <- grad$a * learning.rate b <- b-step.b a <- a-step.a as <- append(as, a) bs <- append(bs, b) } msrs mres as bs # scaled a b # unscale coefficients to make them comprehensible # see http://commres.net/wiki/gradient_descent#why_normalize_scale_or_make_z-score_xi # and # http://commres.net/wiki/gradient_descent#how_to_unnormalize_unscale_a_and_b # a = a - (mean(x) / sd(x)) * b b = b / sd(x) a b # changes of estimators as <- as - (mean(x) /sd(x)) * bs bs <- bs / sd(x) as bs mres msrs parameters <- data.frame(as, bs, mres, msrs) cat(paste0("Intercept: ", a, "\n", "Slope: ", b, "\n")) summary(mo)$coefficients msrs <- data.frame(msrs) msrs.log <- data.table(epoch = 1:nlen, msrs) ggplot(msrs.log, aes(epoch, msrs)) + geom_line(color="blue") + theme_classic() mres <- data.frame(mres) mres.log <- data.table(epoch = 1:nlen, mres) ggplot(mres.log, aes(epoch, mres)) + geom_line(color="red") + theme_classic() ch <- data.frame(mres, msrs) ch max(y) ggplot(data, aes(x = x, y = y)) + geom_point(size = 2) + geom_abline(aes(intercept = as, slope = bs), data = parameters, linewidth = 0.5, color = 'green') + stat_poly_line() + stat_poly_eq(use_label(c("eq", "R2"))) + theme_classic() + geom_abline(aes(intercept = as, slope = bs), data = parameters %>% slice_head(), linewidth = 1, color = 'blue') + geom_abline(aes(intercept = as, slope = bs), data = parameters %>% slice_tail(), linewidth = 1, color = 'red') + labs(title = 'Gradient descent. blue: start, red: end, green: gradients') summary(mo) a.start b.start a b summary(mo)$coefficient[1] summary(mo)$coefficient[2]