data sample: {{:r:diet.csv}}
====== Twoway ANOVA ======
recap of oneway ANOVA
# One Way Anova (Completely Randomized Design)
fit <- aov(y ~ A, data=mydataframe)
Twoway ANOVA without interaction
# Randomized Block Design (B is the blocking factor)
fit <- aov(y ~ A + B, data=mydataframe)
Twoway ANOVA with interaction
# Two Way Factorial Design
fit <- aov(y ~ A + B + A:B, data=mydataframe)
fit <- aov(y ~ A*B, data=mydataframe) # same thing
===== E.g. 1 =====
> delivery.df = data.frame(
Service = c(rep("Carrier 1", 15), rep("Carrier 2", 15),
rep("Carrier 3", 15)),
Destination = c(rep(c("Office 1", "Office 2", "Office 3",
"Office 4", "Office 5"), 9)),
Time = c(15.23, 14.32, 14.77, 15.12, 14.05,
15.48, 14.13, 14.46, 15.62, 14.23, 15.19, 14.67, 14.48, 15.34, 14.22,
16.66, 16.27, 16.35, 16.93, 15.05, 16.98, 16.43, 15.95, 16.73, 15.62,
16.53, 16.26, 15.69, 16.97, 15.37, 17.12, 16.65, 15.73, 17.77, 15.52,
16.15, 16.86, 15.18, 17.96, 15.26, 16.36, 16.44, 14.82, 17.62, 15.04)
)
> delivery.df
Service Destination Time
1 Carrier 1 Office 1 15.23
2 Carrier 1 Office 2 14.32
3 Carrier 1 Office 3 14.77
4 Carrier 1 Office 4 15.12
5 Carrier 1 Office 5 14.05
6 Carrier 1 Office 1 15.48
7 Carrier 1 Office 2 14.13
8 Carrier 1 Office 3 14.46
9 Carrier 1 Office 4 15.62
10 Carrier 1 Office 5 14.23
11 Carrier 1 Office 1 15.19
12 Carrier 1 Office 2 14.67
13 Carrier 1 Office 3 14.48
14 Carrier 1 Office 4 15.34
15 Carrier 1 Office 5 14.22
16 Carrier 2 Office 1 16.66
17 Carrier 2 Office 2 16.27
18 Carrier 2 Office 3 16.35
19 Carrier 2 Office 4 16.93
20 Carrier 2 Office 5 15.05
21 Carrier 2 Office 1 16.98
22 Carrier 2 Office 2 16.43
23 Carrier 2 Office 3 15.95
24 Carrier 2 Office 4 16.73
25 Carrier 2 Office 5 15.62
26 Carrier 2 Office 1 16.53
27 Carrier 2 Office 2 16.26
28 Carrier 2 Office 3 15.69
29 Carrier 2 Office 4 16.97
30 Carrier 2 Office 5 15.37
31 Carrier 3 Office 1 17.12
32 Carrier 3 Office 2 16.65
33 Carrier 3 Office 3 15.73
34 Carrier 3 Office 4 17.77
35 Carrier 3 Office 5 15.52
36 Carrier 3 Office 1 16.15
37 Carrier 3 Office 2 16.86
38 Carrier 3 Office 3 15.18
39 Carrier 3 Office 4 17.96
40 Carrier 3 Office 5 15.26
41 Carrier 3 Office 1 16.36
42 Carrier 3 Office 2 16.44
43 Carrier 3 Office 3 14.82
44 Carrier 3 Office 4 17.62
45 Carrier 3 Office 5 15.04
> library("ggpubr")
> ggline(delivery.df, x = "Destination", y = "Time", color = "Service", add = c("mean_se", "dotplot"))
> delivery.mod1 = aov(Time ~ Destination*Service, data = delivery.df)
> summary(delivery.mod1)
Df Sum Sq Mean Sq F value Pr(>F)
Destination 4 17.5415 4.3854 61.1553 5.408e-14 ***
Service 2 23.1706 11.5853 161.5599 < 2.2e-16 ***
Destination:Service 8 4.1888 0.5236 7.3018 2.360e-05 ***
Residuals 30 2.1513 0.0717
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> TukeyHSD(delivery.mod1, which = "Service")
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = Time ~ Destination * Service, data = delivery.df)
$Service
diff lwr upr p adj
Carrier 2-Carrier 1 1.498667 1.2576092 1.7397241 0.0000000
Carrier 3-Carrier 1 1.544667 1.3036092 1.7857241 0.0000000
Carrier 3-Carrier 2 0.046000 -0.1950575 0.2870575 0.8856246
===== E.g. 2 =====
plant.df = PlantGrowth
plant.df$group = factor(plant.df$group,
labels = c("Control", "Treatment 1", "Treatment 2"))
require(ggplot2)
ggplot(plant.df, aes(x = group, y = weight)) +
geom_boxplot(fill = "grey80", colour = "blue") +
scale_x_discrete() + xlab("Treatment Group") +
ylab("Dried weight of plants")
> plant.mod2 = aov(weight ~ group, data = plant.df)
> summary(plant.mod2)
Df Sum Sq Mean Sq F value Pr(>F)
group 2 3.7663 1.8832 4.8461 0.01591 *
Residuals 27 10.4921 0.3886
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> TukeyHSD(plant.mod2)
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = weight ~ group, data = plant.df)
$group
diff lwr upr p adj
Treatment 1-Control -0.371 -1.0622161 0.3202161 0.3908711
Treatment 2-Control 0.494 -0.1972161 1.1852161 0.1979960
Treatment 2-Treatment 1 0.865 0.1737839 1.5562161 0.0120064
plot(TukeyHSD(plant.mod2))
===== 3 =====
Time Shoes Made
Morning Others 25
Morning Others 26
Night Others 27
Night Others 27
Morning Favorite 32
Morning Favorite 22
Night Favorite 30
Night Favorite 34
Morning Others 35
Morning Others 34
Night Others 33
Night Others 30
Morning Favorite 33
Morning Favorite 37
Night Favorite 36
Night Favorite 38
> basketball <- read.table("http://commres.net/wiki/_export/code/r/twoway_anova?codeblock=11",header=TRUE)
> basketball
Time Shoes Made
1 Morning Others 25
2 Morning Others 26
3 Night Others 27
4 Night Others 27
5 Morning Favorite 32
6 Morning Favorite 22
7 Night Favorite 30
8 Night Favorite 34
9 Morning Others 35
10 Morning Others 34
11 Night Others 33
12 Night Others 30
13 Morning Favorite 33
14 Morning Favorite 37
15 Night Favorite 36
16 Night Favorite 38
> attach(basketball)
>
> b.model <- aov(Made~Time+Shoes)
> summary(b.model)
Df Sum Sq Mean Sq F value Pr(>F)
Time 1 7.56 7.56 0.349 0.565
Shoes 1 39.06 39.06 1.802 0.202
Residuals 13 281.81 21.68
> b.model2 <- aov(Made~Time*Shoes)
> summary(b.model2)
Df Sum Sq Mean Sq F value Pr(>F)
Time 1 7.56 7.56 0.344 0.568
Shoes 1 39.06 39.06 1.777 0.207
Time:Shoes 1 18.06 18.06 0.822 0.382
Residuals 12 263.75 21.98
===== 4 Interaction effects =====
[{{:r:interaction effect eg01.png|Interaction effect example 1}}]
[{{:r:interaction effect eg02.png|Interaction effect example 2}}]
[{{:r:interaction effect eg03.png|Interaction effect example 3}}]
[{{:r:interaction effect eg04.png|Interaction effect example 4}}]
===== e.g., 5 =====
download {{:r:dataset_anova_twoWay_comparisons.csv|dataset_anova_twoWay_interactions.csv}}
stressdata <- read.csv("http://commres.net/wiki/_media/r/dataset_anova_twoway_comparisons.csv")
> #display the data
> stressdata
Treatment Age StressReduction
1 mental young 10
2 mental young 9
3 mental young 8
4 mental mid 7
5 mental mid 6
6 mental mid 5
7 mental old 4
8 mental old 3
9 mental old 2
10 physical young 9
11 physical young 8
12 physical young 7
13 physical mid 6
14 physical mid 5
15 physical mid 4
16 physical old 3
17 physical old 2
18 physical old 1
19 medical young 8
20 medical young 7
21 medical young 6
22 medical mid 5
23 medical mid 4
24 medical mid 3
25 medical old 2
26 medical old 1
27 medical old 0
> stressdata <- read.csv("http://commres.net/wiki/_media/r/dataset_anova_twoway_comparisons.csv")
>
> a.mod <- aov(StressReduction~Treatment*Age, data=stressdata)
> summary(a.mod)
Df Sum Sq Mean Sq F value Pr(>F)
Treatment 2 18 9 9 0.00195 **
Age 2 162 81 81 1e-09 ***
Treatment:Age 4 0 0 0 1.00000
Residuals 18 18 1
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
>
> TukeyHSD(a.mod, which="Treatment")
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = StressReduction ~ Treatment * Age, data = stressdata)
$Treatment
diff lwr upr p adj
mental-medical 2 0.7968988 3.2031012 0.0013531
physical-medical 1 -0.2031012 2.2031012 0.1135025
physical-mental -1 -2.2031012 0.2031012 0.1135025
>
> TukeyHSD(a.mod, which="Age")
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = StressReduction ~ Treatment * Age, data = stressdata)
$Age
diff lwr upr p adj
old-mid -3 -4.203101 -1.796899 1.54e-05
young-mid 3 1.796899 4.203101 1.54e-05
young-old 6 4.796899 7.203101 0.00e+00
>
위는 아래의 linear model 을 이용하여도 가능. 사실 모든 ANOVA 테스트는 linear model이기도 함(lm)
anova(lm(d$StressReduction ~ d$Treatment * d$Age, d))
Analysis of Variance Table
Response: d$StressReduction
Df Sum Sq Mean Sq F value Pr(>F)
d$Treatment 2 18 9 9 0.001953 **
d$Age 2 162 81 81 1e-09 ***
d$Treatment:d$Age 4 0 0 0 1.000000
Residuals 18 18 1
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
>
===== e.g., 6 =====
tdata <- ToothGrowth
tdata
str(tdata)
tdata$dose <- factor(tdata$dose)
# or a nicer way
tdata$dose <- factor(tdata$dose, levels=c(0.5, 1, 2), labels=c("dhalf", "d1", "d2"))
tdata
table(tdata$supp, tdata$dose)
==== Visualize the data ====
install.packages("ggpubr")
library("ggpubr")
ggboxplot(tdata, x = "dose", y = "len", color = "supp",
palette = c("red", "blue"))
ggline(tdata, x = "dose", y = "len", color = "supp",
add = c("mean_se", "dotplot"),
palette = c("red", "blue"))
# prettier
boxplot(len ~ supp * dose, data=tdata, frame = FALSE,
col = c("#00AFBB", "#E7B800"), ylab="Tooth Length")
# or
interaction.plot(x.factor = tdata$dose, trace.factor =
tdata$supp, response = tdata$len, fun = mean,
type = "b", legend = TRUE,
xlab = "Dose", ylab="Tooth Length",
pch=c(1,19), col = c("#00AFBB", "#E7B800"))
==== Compute ANOVA test ====
res.aov2 <- aov(len ~ supp + dose, data = tdata)
summary(res.aov2)
Df Sum Sq Mean Sq F value Pr(>F)
supp 1 205.4 205.4 14.02 0.000429 ***
dose 2 2426.4 1213.2 82.81 < 2e-16 ***
Residuals 56 820.4 14.7
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
res.aov3 <- aov(len ~ supp * dose, data = tdata)
res.aov3 <- aov(len ~ supp + dose + supp:dose, data = tdata)
summary(res.aov3)
Df Sum Sq Mean Sq F value Pr(>F)
supp 1 205.4 205.4 15.572 0.000231 ***
dose 2 2426.4 1213.2 92.000 < 2e-16 ***
supp:dose 2 108.3 54.2 4.107 0.021860 *
Residuals 54 712.1 13.2
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
From the ANOVA results, you can conclude the following, based on the p-values and a significance level of 0.05:
* the p-value of supp is 0.000429 (significant), which indicates that the levels of supp are associated with significant different tooth length.
* the p-value of dose is < 2e-16 (significant), which indicates that the levels of dose are associated with significant different tooth length.
* the p-value for the interaction between supp*dose is 0.02 (significant), which indicates that the relationships between dose and tooth length depends on the supp method.
==== pair-wise test ====
> TukeyHSD(res.aov3, which = "dose")
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = len ~ supp * dose, data = tdata)
$dose
diff lwr upr p adj
d1-dh 9.130 6.362488 11.897512 0.0e+00
d2-dh 15.495 12.727488 18.262512 0.0e+00
d2-d1 6.365 3.597488 9.132512 2.7e-06
pairwise.t.test(tdata$len, tdata$dose, p.adjust.method = "BH")
==== Homogeneity ====
plot(res.aov3, 1)
install.packages("car")
library(car)
leveneTest(len ~ supp*dose, data = tdata)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 5 1.7086 0.1484
54
p value is greater than .05, which indicates that there is no evidence of significant difference between variances of groups.
==== check normality ====
plot(res.aov3, 2)
The residuals are plotted against the quantiles of normal distribution (the straight line).
==== unbalance design ====
> library(car)
> my_anova <- aov(len ~ supp * dose, data = tdata)
>
> Anova(my_anova, type="III"
+ )
Anova Table (Type III tests)
Response: len
Sum Sq Df F value Pr(>F)
(Intercept) 1750.33 1 132.730 3.603e-16 ***
supp 137.81 1 10.450 0.002092 **
dose 885.26 2 33.565 3.363e-10 ***
supp:dose 108.32 2 4.107 0.021860 *
Residuals 712.11 54
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1