User Tools

Site Tools


c:ma:regression_lecture_note

regression.lecturenote.r

set.seed(401)
sn <- 25
x <- rnorm(sn, 100, 10)
x
y <- 1.4 * x + 2 + rnorm(sn, 0, 10)
y
df <- data.frame(x, y)
# density graph
install.packages("ggplot2")
library(ggplot2)
ggplot(data=df, aes(y)) + 
  geom_histogram() + 
  geom_vline(aes(xintercept=mean(y)),
             color="red", linetype="dashed", size=1) +
  coord_flip()

ggplot(data=df, aes(y)) + 
  geom_density(color="blue", size=1.5) +
  geom_vline(aes(xintercept=mean(y)),
             color="red", linetype="dashed", size=1) +
  coord_flip()

lm.mod <- lm(y~x, data=df)
summary(lm.mod)
str(lm.mod)
inc.y <- lm.mod$coefficients[1]
slope.x <- lm.mod$coefficients[2]
inc.y
slope.x

ggplot(data=df, aes(x,y)) +
  geom_point(color="blue", size=1.5, pch=1.5) +
  geom_hline(aes(yintercept=mean(y))) +
  geom_abline(intercept=inc.y, slope=slope.x)


ggplot(data=df, aes(x,y)) +
  geom_point(color="blue", size=2.5, pch=2) +
  geom_hline(aes(yintercept=mean(y)), size=1.5, color="red") +
  geom_abline(intercept=inc.y, slope=slope.x, size=1.5, color="darkgreen")

################################
################################
################################
################################

set.seed(101)
sn <- 400
x <- rnorm(sn, 100, 10)
x
y <- 1.4*x + 2 + rnorm(sn, 0, 16)
y
df <- data.frame(x,y)
# density graph
ggplot(data=df, aes(y)) + 
  geom_histogram() + 
  geom_vline(aes(xintercept=mean(y)),
             color="red", linetype="dashed", size=1) +
  coord_flip()

ggplot(data=df, aes(y)) + 
  geom_density(color="blue", size=1.5) +
  geom_vline(aes(xintercept=mean(y)),
             color="red", linetype="dashed", size=1) +
  coord_flip()


ggplot(data=df, aes(x,y)) +
  geom_point(color="blue", size=1.5, pch=2) +
  geom_hline(aes(yintercept=mean(y)), size=1, color="darkgreen") +
  geom_abline(intercept=10, slope=1.5, size=1.5, color="red")

lm.mod2 <- lm(y~x, data=df)
sum.lm.mod2 <- summary(lm.mod2)
sum.lm.mod2

lm.mod2$coefficients[2]
lm.mod2$coefficients[1]

b <- lm.mod2$coefficients[2]
a <- lm.mod2$coefficients[1]

ggplot(data=df, aes(x,y)) +
  geom_point(color="blue", size=1.5, pch=2) +
  geom_hline(aes(yintercept=mean(y)), size=1, color="darkgreen") +
  geom_abline(intercept=a, slope=b, size=1.5, color="red")

lm.mod2$residuals
sum(lm.mod2$residuals^2)
ss.res <- sum(lm.mod2$residuals^2)

mean.y <- mean(df$y)
var.tot <- var(df$y)
df.tot <- length(df$y)-1
ss.tot <- var.tot*df.tot
ss.tot

y.hat <- lm.mod2$fitted.values
y.hat - mean(df$y)
explained <- y.hat - mean(df$y)
ss.exp <- sum(explained^2) 
ss.exp
ss.res

ss.exp + ss.res
ss.tot

r.square <- ss.exp / ss.tot
r.square
sum.lm.mod2

r.coeff <- sqrt(r.square)
r.coeff
cor(x,y)

###
ggplot(data=df, aes(x,y)) +
  geom_point(color="blue", size=1.5, pch=1.5) +
  geom_hline(aes(yintercept=mean(y)), size=1, color="darkgreen") +
  stat_smooth(method = "lm",
              formula = y ~ x,
              geom = "smooth", color="red", size=1)


c/ma/regression_lecture_note.txt · Last modified: 2023/10/27 22:19 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki