library(tidyverse) library(data.table) library(ggplot2) library(ggpmisc) # data preparation set.seed(101) nx <- 50 # variable x, sample size mx <- 4.5 # mean of x sdx <- mx * 0.56 # sd of x x <- rnorm(nx, mx, sdx) # generating x slp <- 4 # slop of x = coefficient, b # y variable y <- slp * x + rnorm(nx, 0, slp*3*sdx) data <- data.frame(x, y) head(data) # check with regression mo <- lm(y ~ x, data = data) summary(mo) # graph ggplot(data = data, aes(x = x, y = y)) + geom_point() + stat_poly_line() + stat_poly_eq(use_label(c("eq", "R2"))) + theme_classic() # Initialize random betas # 우선 b를 고정하고 a만 # 변화시켜서 이해 b <- summary(mo)$coefficients[2] a <- 0 b.init <- b a.init <- a # Predict function: predict <- function(x, a, b){ return (a + b * x) } # And loss function is: residuals <- function(predictions, y) { return(y - predictions) } # we use sum of square of error which oftentimes become big ssrloss <- function(predictions, y) { residuals <- (y - predictions) return(sum(residuals^2)) } ssrs <- c() # for sum of square residuals srs <- c() # sum of residuals as <- c() # for as (intercepts) for (i in seq(from = -50, to = 50, by = 0.01)) { pred <- predict(x, i, b) res <- residuals(pred, y) ssr <- ssrloss(pred, y) ssrs <- append(ssrs, ssr) srs <- append(srs, sum(res)) as <- append(as, i) } length(ssrs) length(srs) length(as) min(ssrs) min.pos.ssrs <- which(ssrs == min(ssrs)) min.pos.ssrs print(as[min.pos.ssrs]) summary(mo) plot(seq(1, length(ssrs)), ssrs) plot(seq(1, length(ssrs)), srs) tail(ssrs) max(ssrs) min(ssrs) tail(srs) max(srs) min(srs)