User Tools

Site Tools


c:ms:regression_lecture_note_for_r

This is an old revision of the document!


################
rnorm2 <- function(n,mean,sd) { mean+sd*scale(rnorm(n)) } 
n <- 400

set.seed(101)
x <- rnorm2(n, 265, 15) 
rand.00 <- rnorm2(n, 0, 12)

rand.01 <- rnorm2(n, 0, 240)
plot(rand.01)
points(rand.00, col="red")

b = 170 / 265
b
y <- b * x + rand.00

df <- data.frame(x, y)
head(df)

# plot method 0
plot(x,y)

# method 1
plot(x, y, pch = 1, cex = 1, col = "black", main = "HEIGHT against SHOE SIZE", xlab = "SHOE SIZE", ylab = "HEIGHT (cm)")
abline(h = mean(y), lwd=1.5, col="red")
abline(lm(y ~ x), lwd=1.5, col="blue")

# method 2
lm.y.x <- lm(y ~ x, data = df)
summary(lm.y.x)
# str(lm.y.x)
lm.y.x$coefficients
intercept <- lm.y.x$coefficients[1]
slope <- lm.y.x$coefficients[2]
intercept
slope

# library(ggplot2)
# ggplot(data=df, aes(x,y)) +
#  geom_point(color="blue", size=1.5, pch=1.5) +
#  geom_hline(aes(yintercept=mean(y))) +
#  geom_abline(intercept=intercept, slope=slope)
#
#####
ss.y <- sum((y-mean(y))^2)
ss.y
df.y <- length(y)-1
df.y
ss.y/df.y
var(y)

m.x <- mean(x)
m.y <- mean(y)
sp <- sum((x-m.x)*(y-m.y))
df.tot <- n-1
sp/df.tot
cov.xy <- cov(x,y)

sd.x <- sd(x)
sd.y <- sd(y)

r.xy <- cov.xy/(sd.x*sd.y)
r.xy
cor(x,y)
cor.test(x,y)

b <- cov(x,y) / var(x)
# note that 
# (sp(x,y)/n-1) / (ss(x)/n-1)
# = sp(x,y) / ss(x)
# m.y = a + b * mean.x
a <- m.y - (b * m.x)
b
a


summary(lm.y.x)
lm.y.x$coefficients

# str(lm.y.x)

y.pred <- lm.y.x$fitted.values
y <- y
m.y

res <- y - y.pred
reg <- y.pred - m.y
ss.y 
ss.res <- sum(res^2)
ss.reg <- sum(reg^2)
ss.y
ss.res
ss.reg
ss.res + ss.reg

r.square <- ss.reg / ss.y 
r.square 


df.tot <- df.y
df.reg <- 2 - 1
df.res <- df.tot - df.reg

df.reg
df.res
df.tot

ss.tot <- ss.y
ss.reg
ss.res
ss.tot

ms.reg <- ss.reg / df.reg
ms.res <- ss.res / df.res
ms.reg
ms.res

f.cal <- ms.reg/ms.res
f.cal

p.val <- pf(f.cal, df.reg, df.res, lower.tail = F)
p.val
anova(lm.y.x)

########################################
# also make it sure that you understand
########################################

res <- lm.y.x$residuals
reg <- lm.y.x$fitted.values-m.y

# This is essentially a perfect linear 
# relationship between the regression 
# line and x data
lm.reg.x <- lm(reg~x, data = df)
summary(lm.reg.x)

# The relationship between residuals and 
# x should be zero
lm.res.x <- lm(res~x, data = df)
summary(lm.res.x)
summary(lm.y.x)
c/ms/regression_lecture_note_for_r.1716935498.txt.gz · Last modified: 2024/05/29 07:31 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki