library(tidyverse)
library(data.table)
library(ggplot2)
library(ggpmisc)
# data preparation
set.seed(101)
nx <- 50 # variable x, sample size
mx <- 4.5 # mean of x
sdx <- mx * 0.56 # sd of x
x <- rnorm(nx, mx, sdx) # generating x
slp <- 4 # slop of x = coefficient, b
# y variable
y <- slp * x + rnorm(nx, 0, slp*3*sdx)
data <- data.frame(x, y)
head(data)
# check with regression
mo <- lm(y ~ x, data = data)
summary(mo)
# graph
ggplot(data = data, aes(x = x, y = y)) +
geom_point() +
stat_poly_line() +
stat_poly_eq(use_label(c("eq", "R2"))) +
theme_classic()
# Initialize random betas
# 우선 b를 고정하고 a만
# 변화시켜서 이해
b <- summary(mo)$coefficients[2]
a <- 0
b.init <- b
a.init <- a
# Predict function:
predict <- function(x, a, b){
return (a + b * x)
}
# And loss function is:
residuals <- function(predictions, y) {
return(y - predictions)
}
# we use sum of square of error which oftentimes become big
ssrloss <- function(predictions, y) {
residuals <- (y - predictions)
return(sum(residuals^2))
}
ssrs <- c() # for sum of square residuals
srs <- c() # sum of residuals
as <- c() # for as (intercepts)
for (i in seq(from = -50, to = 50, by = 0.01)) {
pred <- predict(x, i, b)
res <- residuals(pred, y)
ssr <- ssrloss(pred, y)
ssrs <- append(ssrs, ssr)
srs <- append(srs, sum(res))
as <- append(as, i)
}
length(ssrs)
length(srs)
length(as)
min(ssrs)
min.pos.ssrs <- which(ssrs == min(ssrs))
min.pos.ssrs
print(as[min.pos.ssrs])
summary(mo)
plot(seq(1, length(ssrs)), ssrs)
plot(seq(1, length(ssrs)), srs)
tail(ssrs)
max(ssrs)
min(ssrs)
tail(srs)
max(srs)
min(srs)