c:ma:anova_note
Table of Contents
ANOVA
prev
set.seed(10) sa <- rnorm2(100, 7.6, 2) sb <- rnorm2(100, 8.3, 2) mean.sa <- mean(sa) mean.sb <- mean(sb) na <- length(sa) nb <- length(sb) dfa <- na-1 dfb <- nb-1 var(sa) var(sb) ssa <- var(sa)*dfa ssb <- var(sb)*dfb ssa ssb ssa+ssb dfa+dfb var.pooled <- (ssa+ssb)/(dfa+dfb) vp <- var.pooled vp se <- sqrt((vp/na)+(vp/nb)) diff <- mean.sa - mean.sb t.cal <- diff/se se diff t.cal t.crit <- qt(.975, 198) t.crit pt(t.cal, 198)*2 t.test(sa, sb) sab <- c(sa, sb) sab group <- c(rep("ga",na), rep("gb",nb)) group sall <- data.frame(sab,group) sall # attach(sall) df.tot <- (na+nb)-1 ss.tot <- var(sab)*df.tot var.tot <- var(sab) df.tot ss.tot var.tot var.tot*df.tot # for uniqueN function data.table required # install.packages("data.table") # library(data.table) uniqueN(sall, by = c("group")) nofg <- uniqueN(sall, by = c("group")) df.bet <- nofg - 1 df.sa <- na-1 df.sb <- nb-1 df.within <- df.sa+df.sb df.within ss.sa <- var(sa)*df.sa ss.sb <- var(sb)*df.sb ss.sa ss.sb ss.within <- ss.sa + ss.sb ss.within mean.grand <- mean(sab) # mean.sa <- mean(sa) # mean.sb <- mean(sb) mean.grand mean.sa mean.sb error.ga <- na * (mean.grand - mean.sa)^2 error.gb <- nb * (mean.grand - mean.sb)^2 error.ga error.gb ss.bet <- error.ga + error.gb ss.bet # sumup ss.tot ss.bet ss.within ss.bet+ss.within df.tot df.bet df.within df.bet + df.within ms.bet <- ss.bet / df.bet ms.wit <- ss.within / df.within ms.bet ms.wit fvalue <- ms.bet/ms.wit fvalue 1-pf(fvalue, df.bet, df.within) f.res <- (aov(sall$sab~sall$group)) summary(f.res) t.test(sa,sb) # str(t.test(sa,sb)) t.test(sa,sb)$statistic t.test(sa,sb)$statistic^2 t.test(sa,sb)$p.value
ANOVA e.g.1
set.seed(1024) na <-50 nb <- 50 nc <- 50 s1 <- round(rnorm(na, 11.6, 2),0) s2 <- round(rnorm(nb, 12.9, 2),0) s3 <- round(rnorm(nc, 13.4, 2),0) s1 s2 s3 sabc <- c(s1,s2,s3) sabc group <- c(rep("g1",na), rep("g2",nb), rep("g3",nc)) group sall <- data.frame(sabc, group) sall attach(sall) aov.sall <- aov(sabc ~ group, data=sall) summary(aov.sall) df.total <- length(sabc) - 1 ss.total <- var(sabc)*df.total var.total <- var(sabc) df.total ss.total var.total df.s1 <- na-1 df.s2 <- nb-1 df.s3 <- nc-1 df.within <- df.s1+df.s2+df.s3 df.within ss.s1 <- var(s1)*df.s1 ss.s2 <- var(s2)*df.s2 ss.s3 <- var(s3)*df.s3 ss.s1 ss.s2 ss.s3 ss.within <- ss.s1+ss.s2+ss.s3 ss.within # for uniqueN function data.table required # install.packages("data.table") library(data.table) uniqueN(sall, by = c("group")) nofg <- uniqueN(sall, by = c("group")) nofg df.between <- nofg - 1 df.between mean.grand <- mean(sabc) mean.s1 <- mean(s1) mean.s2 <- mean(s2) mean.s3 <- mean(s3) mean.grand mean.s1 mean.s2 mean.s3 error.g1 <- na * (mean.grand - mean.s1)^2 error.g2 <- nb * (mean.grand - mean.s2)^2 error.g3 <- nc * (mean.grand - mean.s3)^2 error.g1 error.g2 error.g3 ss.between <- error.g1 + error.g2 + error.g3 ss.between # sumup ss.total ss.between ss.within ss.between + ss.within ss.total df.total df.between df.within df.between + df.within ms.between <- ss.between / df.between ms.within <- ss.within / df.within ms.between ms.within fvalue <- ms.between/ms.within fvalue # fvalue에서 판단했을 때 그 판단이 잘못일 확률 1 - pf(fvalue, df.between, df.within) f.res <- aov(sabc ~ group, data=sall) summary(f.res) # for regression r.res <- lm(sabc ~ group, data=sall) summary(r.res) anova(r.res) summary(r.res)$r.square ss.total ss.between ss.within # this is r.square value ss.between/ss.total
> set.seed(1024) > na <-50 > nb <- 50 > nc <- 50 > > s1 <- round(rnorm(na, 11.6, 2),0) > s2 <- round(rnorm(nb, 12.9, 2),0) > s3 <- round(rnorm(nc, 13.4, 2),0) > s1 [1] 10 11 8 10 12 7 11 16 14 13 8 9 14 12 11 15 9 11 10 9 11 13 15 14 11 11 12 13 10 13 10 12 [33] 14 11 14 11 14 11 10 11 11 14 10 9 14 13 10 10 7 13 > s2 [1] 12 12 16 14 12 12 12 12 11 12 13 12 10 13 15 12 12 18 14 13 13 13 17 13 13 14 11 13 13 11 14 15 [33] 11 12 10 14 13 12 14 15 13 10 10 17 12 14 14 16 13 12 > s3 [1] 13 12 12 16 14 14 9 12 11 14 15 13 7 17 16 12 12 12 9 11 12 16 17 13 18 14 14 12 15 11 13 12 [33] 13 10 14 10 15 10 11 14 10 14 14 13 13 13 11 11 12 11 > > sabc <- c(s1,s2,s3) > sabc [1] 10 11 8 10 12 7 11 16 14 13 8 9 14 12 11 15 9 11 10 9 11 13 15 14 11 11 12 13 10 13 10 12 [33] 14 11 14 11 14 11 10 11 11 14 10 9 14 13 10 10 7 13 12 12 16 14 12 12 12 12 11 12 13 12 10 13 [65] 15 12 12 18 14 13 13 13 17 13 13 14 11 13 13 11 14 15 11 12 10 14 13 12 14 15 13 10 10 17 12 14 [97] 14 16 13 12 13 12 12 16 14 14 9 12 11 14 15 13 7 17 16 12 12 12 9 11 12 16 17 13 18 14 14 12 [129] 15 11 13 12 13 10 14 10 15 10 11 14 10 14 14 13 13 13 11 11 12 11 > group <- c(rep("g1",na), rep("g2",nb), rep("g3",nc)) > group [1] "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" [20] "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" [39] "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g1" "g2" "g2" "g2" "g2" "g2" "g2" "g2" [58] "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" [77] "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" "g2" [96] "g2" "g2" "g2" "g2" "g2" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" [115] "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" [134] "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" "g3" > > sall <- data.frame(sabc, group) > sall sabc group 1 10 g1 2 11 g1 3 8 g1 4 10 g1 5 12 g1 6 7 g1 7 11 g1 8 16 g1 9 14 g1 10 13 g1 11 8 g1 12 9 g1 13 14 g1 14 12 g1 15 11 g1 16 15 g1 17 9 g1 18 11 g1 19 10 g1 20 9 g1 21 11 g1 22 13 g1 23 15 g1 24 14 g1 25 11 g1 26 11 g1 27 12 g1 28 13 g1 29 10 g1 30 13 g1 31 10 g1 32 12 g1 33 14 g1 34 11 g1 35 14 g1 36 11 g1 37 14 g1 38 11 g1 39 10 g1 40 11 g1 41 11 g1 42 14 g1 43 10 g1 44 9 g1 45 14 g1 46 13 g1 47 10 g1 48 10 g1 49 7 g1 50 13 g1 51 12 g2 52 12 g2 53 16 g2 54 14 g2 55 12 g2 56 12 g2 57 12 g2 58 12 g2 59 11 g2 60 12 g2 61 13 g2 62 12 g2 63 10 g2 64 13 g2 65 15 g2 66 12 g2 67 12 g2 68 18 g2 69 14 g2 70 13 g2 71 13 g2 72 13 g2 73 17 g2 74 13 g2 75 13 g2 76 14 g2 77 11 g2 78 13 g2 79 13 g2 80 11 g2 81 14 g2 82 15 g2 83 11 g2 84 12 g2 85 10 g2 86 14 g2 87 13 g2 88 12 g2 89 14 g2 90 15 g2 91 13 g2 92 10 g2 93 10 g2 94 17 g2 95 12 g2 96 14 g2 97 14 g2 98 16 g2 99 13 g2 100 12 g2 101 13 g3 102 12 g3 103 12 g3 104 16 g3 105 14 g3 106 14 g3 107 9 g3 108 12 g3 109 11 g3 110 14 g3 111 15 g3 112 13 g3 113 7 g3 114 17 g3 115 16 g3 116 12 g3 117 12 g3 118 12 g3 119 9 g3 120 11 g3 121 12 g3 122 16 g3 123 17 g3 124 13 g3 125 18 g3 126 14 g3 127 14 g3 128 12 g3 129 15 g3 130 11 g3 131 13 g3 132 12 g3 133 13 g3 134 10 g3 135 14 g3 136 10 g3 137 15 g3 138 10 g3 139 11 g3 140 14 g3 141 10 g3 142 14 g3 143 14 g3 144 13 g3 145 13 g3 146 13 g3 147 11 g3 148 11 g3 149 12 g3 150 11 g3 > attach(sall) The following objects are masked _by_ .GlobalEnv: group, sabc > aov.sall <- aov(sabc ~ group, data=sall) > summary(aov.sall) Df Sum Sq Mean Sq F value Pr(>F) group 2 68.7 34.33 8.049 0.000482 *** Residuals 147 626.9 4.26 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > > df.total <- length(sabc) - 1 > ss.total <- var(sabc)*df.total > var.total <- var(sabc) > df.total [1] 149 > ss.total [1] 695.5733 > var.total [1] 4.668277 > > df.s1 <- na-1 > df.s2 <- nb-1 > df.s3 <- nc-1 > > df.within <- df.s1+df.s2+df.s3 > df.within [1] 147 > > ss.s1 <- var(s1)*df.s1 > ss.s2 <- var(s2)*df.s2 > ss.s3 <- var(s3)*df.s3 > ss.s1 [1] 222.32 > ss.s2 [1] 160.98 > ss.s3 [1] 243.62 > ss.within <- ss.s1+ss.s2+ss.s3 > ss.within [1] 626.92 > > > # for uniqueN function data.table required > # install.packages("data.table") > library(data.table) data.table 1.14.8 using 8 threads (see ?getDTthreads). Latest news: r-datatable.com > uniqueN(sall, by = c("group")) [1] 3 > nofg <- uniqueN(sall, by = c("group")) > nofg [1] 3 > df.between <- nofg - 1 > df.between [1] 2 > > mean.grand <- mean(sabc) > mean.s1 <- mean(s1) > mean.s2 <- mean(s2) > mean.s3 <- mean(s3) > mean.grand [1] 12.38667 > mean.s1 [1] 11.44 > mean.s2 [1] 12.98 > mean.s3 [1] 12.74 > > error.g1 <- na * (mean.grand - mean.s1)^2 > error.g2 <- nb * (mean.grand - mean.s2)^2 > error.g3 <- nc * (mean.grand - mean.s3)^2 > > error.g1 [1] 44.80889 > error.g2 [1] 17.60222 > error.g3 [1] 6.242222 > > ss.between <- error.g1 + error.g2 + error.g3 > ss.between [1] 68.65333 > > # sumup > ss.total [1] 695.5733 > ss.between [1] 68.65333 > ss.within [1] 626.92 > ss.between + ss.within [1] 695.5733 > ss.total [1] 695.5733 > > df.total [1] 149 > df.between [1] 2 > df.within [1] 147 > df.between + df.within [1] 149 > > ms.between <- ss.between / df.between > ms.within <- ss.within / df.within > ms.between [1] 34.32667 > ms.within [1] 4.264762 > > fvalue <- ms.between/ms.within > fvalue [1] 8.048906 > > # fvalue에서 판단했을 때 그 판단이 잘못일 확률 > 1 - pf(fvalue, df.between, df.within) [1] 0.0004818216 > > > f.res <- aov(sabc ~ group, data=sall) > summary(f.res) Df Sum Sq Mean Sq F value Pr(>F) group 2 68.7 34.33 8.049 0.000482 *** Residuals 147 626.9 4.26 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > > # for regression > r.res <- lm(sabc ~ group, data=sall) > summary(r.res) Call: lm(formula = sabc ~ group, data = sall) Residuals: Min 1Q Median 3Q Max -5.74 -1.44 -0.21 1.26 5.26 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 11.4400 0.2921 39.171 < 2e-16 *** groupg2 1.5400 0.4130 3.729 0.000274 *** groupg3 1.3000 0.4130 3.148 0.001994 ** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 2.065 on 147 degrees of freedom Multiple R-squared: 0.0987, Adjusted R-squared: 0.08644 F-statistic: 8.049 on 2 and 147 DF, p-value: 0.0004818 > anova(r.res) Analysis of Variance Table Response: sabc Df Sum Sq Mean Sq F value Pr(>F) group 2 68.65 34.327 8.0489 0.0004818 *** Residuals 147 626.92 4.265 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > summary(r.res)$r.square [1] 0.09870035 > > ss.total [1] 695.5733 > ss.between [1] 68.65333 > ss.within [1] 626.92 > > # this is r.square value > ss.between/ss.total [1] 0.09870035 >
ANOVA e.g. 2
########################## # http://commres.net/wiki/r/oneway_anova # ########################## x1 <- c(50.5, 52.1, 51.9, 52.4, 50.6, 51.4, 51.2, 52.2, 51.5, 50.8) x2 <- c(47.5, 47.7, 46.6, 47.1, 47.2, 47.8, 45.2, 47.4, 45.0, 47.9) x3 <- c(46.0, 47.1, 45.6, 47.1, 47.2, 46.4, 45.9, 47.1, 44.9, 46.2) xc <- data.frame(x1,x2,x3) xs <- stack(xc) xs colnames(xs) <- c("score", "temp") xs levels(xs$temp) <- c("low", "mid", "hi") xs str(xs) # 데이터 구조 체크 attach(xs) aov.xs <- aov(score ~ temp, data = xs) summary(aov.xs) mean.tot <- mean(score) mean.tot df.tot <- length(score)-1 ss.tot <- var(score) * df.tot ss.tot df.tot var.tot <- var(score) var.tot2 <- ss.tot/df.tot var.tot == var.tot2 mean.each <- tapply(score, list(temp), mean) mean.each var.each <- tapply(score, list(temp), var) var.each df.within <- 3*(10-1) ss.each <- var.each * 9 ss.within <- sum(ss.each) ss.within ss.tot - ss.within ss.bet <- sum(10*(mean.tot - mean.each)^2) ss.bet df.bet <- 3-1 ss.tot ss.bet ss.within ss.bet + ss.within df.tot df.bet df.within df.bet + df.within ms.bet <- ss.bet / df.bet ms.within <- ss.within / df.within f.value <- ms.bet / ms.within ss.bet ss.within df.bet df.within ms.bet ms.within f.value alpha <- .05 qf(alpha, df.bet, df.within, lower.tail = F) pf(f.value, df.bet, df.within, lower.tail = F) aov.xs <- aov(score ~ temp, data = xs) summary(aov.xs)
Two way ANOVA (Factorial ANOVA)
Factorial ANOVA
Factorial ANOVA in R
################################################# # two-way anova # subject = factor(paste('sub', 1:30, sep='')) ################################################# n.a.group <- 3 # a treatment 숫자 n.b.group <- 2 # b 그룹 숫자 n.sub <- 30 # 총 샘플 숫자 # 데이터 생성 set.seed(9) a <- gl(3, 10, n.sub, labels=c('a1', 'a2', 'a3')) b <- gl(2, 5, n.sub, labels=c('b1', 'b2')) a b y <- rnorm(30, mean=10) + 3.14 * (a=='a1' & b=='b2') + 1.43 * (a=='a3' & b=='b2') y dat <- data.frame(a, b, y) aov.dat <- aov(y~a*b) # anova test summary(aov.dat) # summary of the test output # hand calculation table(a,b) tapply(y, list(a,b), mean) # 각 셀에서의 평균 df.within.each <- tapply(y, list(a,b), length) -1 # 각 셀에서의 샘플숫자 n.within.each <- df.within.each + 1 df.within <- sum(df.within.each) # df within var.within <- tapply(y, list(a,b), var) # var.within ss.within.each <- tapply(y, list(a,b), var) * df.within.each ss.within.each ss.within <- sum(ss.within.each) # ss.within ss.within interaction.plot(a,b,y) mean.a <- tapply(y, list(a), mean) mean.b <- tapply(y, list(b), mean) mean.a mean.b var.a <- tapply(y, list(a), var) var.b <- tapply(y, list(b), var) mean.tot <- mean(dat$y) var.tot <- var(dat$y) df.tot <- n.sub - 1 ss.tot <- var.tot * df.tot ## between mean.each <- tapply(y, list(a,b), mean) mean.each mean.tot <- mean(y) mean.tot n.each <- tapply(y, list(a,b), length) n.each n.a.each <- tapply(y, list(a), length) n.b.each <- tapply(y, list(b), length) ss.bet <- sum(n.each*(mean.each-mean.tot)^2) ss.bet ss.tot ss.within ss.bet ss.bet + ss.within ss.a <- sum(n.a.each * ((mean.tot - mean.a)^2)) ss.b <- sum(n.b.each * ((mean.tot - mean.b)^2)) ss.a ss.b ss.ab <- ss.bet - (ss.a + ss.b) ss.ab ss.tot ss.bet ss.within ss.a ss.b ss.ab df.tot <- n.sub - 1 df.bet <- (n.a.group * n.b.group) - 1 df.a <- n.a.group - 1 df.b <- n.b.group - 1 df.ab <- df.bet - (df.a + df.b) df.within <- sum(df.within.each) df.tot df.bet df.a df.b df.ab df.within ms.a <- ss.a / df.a ms.b <- ss.b / df.b ms.ab <- ss.ab / df.ab ms.within <- ss.within / df.within ms.a ms.b ms.ab ms.within f.a <- ms.a / ms.within f.b <- ms.b / ms.within f.ab <- ms.ab / ms.within alpha <- .05 f.a # 봐야할 F분포표에서의 F-value # qt 처럼 qf 사용 # qf(alpha, df.between, df.within, lower.tail=F) 처럼 사용 qf(alpha, df.a, df.within, lower.tail = F) pf(f.a, df.a, df.within, lower.tail = F) f.b qf(alpha, df.b, df.within, lower.tail = F) pf(f.b, df.b, df.within, lower.tail = F) f.ab qf(alpha, df.ab, df.within, lower.tail = F) pf(f.ab, df.ab, df.within, lower.tail = F) # aov result summary(aov.dat)
c/ma/anova_note.txt · Last modified: 2024/09/23 10:54 by hkimscil