c:ma:regression_lecture_note
set.seed(401) sn <- 25 x <- rnorm(sn, 100, 10) x y <- 1.4 * x + 2 + rnorm(sn, 0, 10) y df <- data.frame(x, y) # density graph install.packages("ggplot2") library(ggplot2) ggplot(data=df, aes(y)) + geom_histogram() + geom_vline(aes(xintercept=mean(y)), color="red", linetype="dashed", size=1) + coord_flip() ggplot(data=df, aes(y)) + geom_density(color="blue", size=1.5) + geom_vline(aes(xintercept=mean(y)), color="red", linetype="dashed", size=1) + coord_flip() lm.mod <- lm(y~x, data=df) summary(lm.mod) str(lm.mod) inc.y <- lm.mod$coefficients[1] slope.x <- lm.mod$coefficients[2] inc.y slope.x ggplot(data=df, aes(x,y)) + geom_point(color="blue", size=1.5, pch=1.5) + geom_hline(aes(yintercept=mean(y))) + geom_abline(intercept=inc.y, slope=slope.x) ggplot(data=df, aes(x,y)) + geom_point(color="blue", size=2.5, pch=2) + geom_hline(aes(yintercept=mean(y)), size=1.5, color="red") + geom_abline(intercept=inc.y, slope=slope.x, size=1.5, color="darkgreen") ################################ ################################ ################################ ################################ set.seed(101) sn <- 400 x <- rnorm(sn, 100, 10) x y <- 1.4*x + 2 + rnorm(sn, 0, 16) y df <- data.frame(x,y) # density graph ggplot(data=df, aes(y)) + geom_histogram() + geom_vline(aes(xintercept=mean(y)), color="red", linetype="dashed", size=1) + coord_flip() ggplot(data=df, aes(y)) + geom_density(color="blue", size=1.5) + geom_vline(aes(xintercept=mean(y)), color="red", linetype="dashed", size=1) + coord_flip() ggplot(data=df, aes(x,y)) + geom_point(color="blue", size=1.5, pch=2) + geom_hline(aes(yintercept=mean(y)), size=1, color="darkgreen") + geom_abline(intercept=10, slope=1.5, size=1.5, color="red") lm.mod2 <- lm(y~x, data=df) sum.lm.mod2 <- summary(lm.mod2) sum.lm.mod2 lm.mod2$coefficients[2] lm.mod2$coefficients[1] b <- lm.mod2$coefficients[2] a <- lm.mod2$coefficients[1] ggplot(data=df, aes(x,y)) + geom_point(color="blue", size=1.5, pch=2) + geom_hline(aes(yintercept=mean(y)), size=1, color="darkgreen") + geom_abline(intercept=a, slope=b, size=1.5, color="red") lm.mod2$residuals sum(lm.mod2$residuals^2) ss.res <- sum(lm.mod2$residuals^2) mean.y <- mean(df$y) var.tot <- var(df$y) df.tot <- length(df$y)-1 ss.tot <- var.tot*df.tot ss.tot y.hat <- lm.mod2$fitted.values y.hat - mean(df$y) explained <- y.hat - mean(df$y) ss.exp <- sum(explained^2) ss.exp ss.res ss.exp + ss.res ss.tot r.square <- ss.exp / ss.tot r.square sum.lm.mod2 r.coeff <- sqrt(r.square) r.coeff cor(x,y) ### ggplot(data=df, aes(x,y)) + geom_point(color="blue", size=1.5, pch=1.5) + geom_hline(aes(yintercept=mean(y)), size=1, color="darkgreen") + stat_smooth(method = "lm", formula = y ~ x, geom = "smooth", color="red", size=1)
c/ma/regression_lecture_note.txt · Last modified: 2023/10/27 22:19 by hkimscil