data sample: {{:r:diet.csv}} ====== Twoway ANOVA ====== recap of oneway ANOVA


# One Way Anova (Completely Randomized Design)
fit <- aov(y ~ A, data=mydataframe)

Twoway ANOVA without interaction

# Randomized Block Design (B is the blocking factor) 
fit <- aov(y ~ A + B, data=mydataframe)

Twoway ANOVA with interaction

# Two Way Factorial Design 
fit <- aov(y ~ A + B + A:B, data=mydataframe)
fit <- aov(y ~ A*B, data=mydataframe) # same thing

===== E.g. 1 =====

> delivery.df = data.frame(
     Service = c(rep("Carrier 1", 15), rep("Carrier 2", 15),
                 rep("Carrier 3", 15)),
     Destination = c(rep(c("Office 1", "Office 2", "Office 3",
                           "Office 4", "Office 5"), 9)),
     Time = c(15.23, 14.32, 14.77, 15.12, 14.05,
              15.48, 14.13, 14.46, 15.62, 14.23, 15.19, 14.67, 14.48, 15.34, 14.22,
              16.66, 16.27, 16.35, 16.93, 15.05, 16.98, 16.43, 15.95, 16.73, 15.62,
              16.53, 16.26, 15.69, 16.97, 15.37, 17.12, 16.65, 15.73, 17.77, 15.52,
              16.15, 16.86, 15.18, 17.96, 15.26, 16.36, 16.44, 14.82, 17.62, 15.04)
)
> delivery.df
     Service Destination  Time
1  Carrier 1    Office 1 15.23
2  Carrier 1    Office 2 14.32
3  Carrier 1    Office 3 14.77
4  Carrier 1    Office 4 15.12
5  Carrier 1    Office 5 14.05
6  Carrier 1    Office 1 15.48
7  Carrier 1    Office 2 14.13
8  Carrier 1    Office 3 14.46
9  Carrier 1    Office 4 15.62
10 Carrier 1    Office 5 14.23
11 Carrier 1    Office 1 15.19
12 Carrier 1    Office 2 14.67
13 Carrier 1    Office 3 14.48
14 Carrier 1    Office 4 15.34
15 Carrier 1    Office 5 14.22
16 Carrier 2    Office 1 16.66
17 Carrier 2    Office 2 16.27
18 Carrier 2    Office 3 16.35
19 Carrier 2    Office 4 16.93
20 Carrier 2    Office 5 15.05
21 Carrier 2    Office 1 16.98
22 Carrier 2    Office 2 16.43
23 Carrier 2    Office 3 15.95
24 Carrier 2    Office 4 16.73
25 Carrier 2    Office 5 15.62
26 Carrier 2    Office 1 16.53
27 Carrier 2    Office 2 16.26
28 Carrier 2    Office 3 15.69
29 Carrier 2    Office 4 16.97
30 Carrier 2    Office 5 15.37
31 Carrier 3    Office 1 17.12
32 Carrier 3    Office 2 16.65
33 Carrier 3    Office 3 15.73
34 Carrier 3    Office 4 17.77
35 Carrier 3    Office 5 15.52
36 Carrier 3    Office 1 16.15
37 Carrier 3    Office 2 16.86
38 Carrier 3    Office 3 15.18
39 Carrier 3    Office 4 17.96
40 Carrier 3    Office 5 15.26
41 Carrier 3    Office 1 16.36
42 Carrier 3    Office 2 16.44
43 Carrier 3    Office 3 14.82
44 Carrier 3    Office 4 17.62
45 Carrier 3    Office 5 15.04

> library("ggpubr")
> ggline(delivery.df, x = "Destination", y = "Time", color = "Service", add = c("mean_se", "dotplot"))

> delivery.mod1 = aov(Time ~ Destination*Service, data = delivery.df)

> summary(delivery.mod1)
                    Df  Sum Sq Mean Sq  F value    Pr(>F)    
Destination          4 17.5415  4.3854  61.1553 5.408e-14 ***
Service              2 23.1706 11.5853 161.5599 < 2.2e-16 ***
Destination:Service  8  4.1888  0.5236   7.3018 2.360e-05 ***
Residuals           30  2.1513  0.0717                       
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

> TukeyHSD(delivery.mod1, which = "Service")
  Tukey multiple comparisons of means
    95% family-wise confidence level
 
Fit: aov(formula = Time ~ Destination * Service, data = delivery.df)
 
$Service
                        diff        lwr       upr     p adj
Carrier 2-Carrier 1 1.498667  1.2576092 1.7397241 0.0000000
Carrier 3-Carrier 1 1.544667  1.3036092 1.7857241 0.0000000
Carrier 3-Carrier 2 0.046000 -0.1950575 0.2870575 0.8856246

===== E.g. 2 =====


plant.df = PlantGrowth
plant.df$group = factor(plant.df$group,
  labels = c("Control", "Treatment 1", "Treatment 2"))

require(ggplot2)
 
ggplot(plant.df, aes(x = group, y = weight)) +
  geom_boxplot(fill = "grey80", colour = "blue") +
  scale_x_discrete() + xlab("Treatment Group") +
  ylab("Dried weight of plants")

> plant.mod2 = aov(weight ~ group, data = plant.df)
> summary(plant.mod2)
            Df  Sum Sq Mean Sq F value  Pr(>F)  
group        2  3.7663  1.8832  4.8461 0.01591 *
Residuals   27 10.4921  0.3886                  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

> TukeyHSD(plant.mod2)
  Tukey multiple comparisons of means
    95% family-wise confidence level
 
Fit: aov(formula = weight ~ group, data = plant.df)
 
$group
                          diff        lwr       upr     p adj
Treatment 1-Control     -0.371 -1.0622161 0.3202161 0.3908711
Treatment 2-Control      0.494 -0.1972161 1.1852161 0.1979960
Treatment 2-Treatment 1  0.865  0.1737839 1.5562161 0.0120064

plot(TukeyHSD(plant.mod2))

===== 3 =====


Time	Shoes	Made
Morning	Others	25
Morning	Others	26
Night	Others	27
Night	Others	27
Morning	Favorite	32
Morning	Favorite	22
Night	Favorite	30
Night	Favorite	34
Morning	Others	35
Morning	Others	34
Night	Others	33
Night	Others	30
Morning	Favorite	33
Morning	Favorite	37
Night	Favorite	36
Night	Favorite	38

> basketball <- read.table("http://commres.net/wiki/_export/code/r/twoway_anova?codeblock=11",header=TRUE)
> basketball
      Time    Shoes Made
1  Morning   Others   25
2  Morning   Others   26
3    Night   Others   27
4    Night   Others   27
5  Morning Favorite   32
6  Morning Favorite   22
7    Night Favorite   30
8    Night Favorite   34
9  Morning   Others   35
10 Morning   Others   34
11   Night   Others   33
12   Night   Others   30
13 Morning Favorite   33
14 Morning Favorite   37
15   Night Favorite   36
16   Night Favorite   38

> attach(basketball)
>
> b.model <- aov(Made~Time+Shoes)
> summary(b.model)
            Df Sum Sq Mean Sq F value Pr(>F)
Time         1   7.56    7.56   0.349  0.565
Shoes        1  39.06   39.06   1.802  0.202
Residuals   13 281.81   21.68   

> b.model2 <- aov(Made~Time*Shoes)
> summary(b.model2)
            Df Sum Sq Mean Sq F value Pr(>F)
Time         1   7.56    7.56   0.344  0.568
Shoes        1  39.06   39.06   1.777  0.207
Time:Shoes   1  18.06   18.06   0.822  0.382
Residuals   12 263.75   21.98

===== 4 Interaction effects ===== [{{:r:interaction effect eg01.png|Interaction effect example 1}}] [{{:r:interaction effect eg02.png|Interaction effect example 2}}] [{{:r:interaction effect eg03.png|Interaction effect example 3}}] [{{:r:interaction effect eg04.png|Interaction effect example 4}}] ===== e.g., 5 ===== download {{:r:dataset_anova_twoWay_comparisons.csv|dataset_anova_twoWay_interactions.csv}}

stressdata <- read.csv("http://commres.net/wiki/_media/r/dataset_anova_twoway_comparisons.csv")
> #display the data
> stressdata
   Treatment   Age StressReduction
1     mental young              10
2     mental young               9
3     mental young               8
4     mental   mid               7
5     mental   mid               6
6     mental   mid               5
7     mental   old               4
8     mental   old               3
9     mental   old               2
10  physical young               9
11  physical young               8
12  physical young               7
13  physical   mid               6
14  physical   mid               5
15  physical   mid               4
16  physical   old               3
17  physical   old               2
18  physical   old               1
19   medical young               8
20   medical young               7
21   medical young               6
22   medical   mid               5
23   medical   mid               4
24   medical   mid               3
25   medical   old               2
26   medical   old               1
27   medical   old               0


> stressdata <- read.csv("http://commres.net/wiki/_media/r/dataset_anova_twoway_comparisons.csv")
> 
> a.mod <- aov(StressReduction~Treatment*Age, data=stressdata)
> summary(a.mod)
              Df Sum Sq Mean Sq F value  Pr(>F)    
Treatment      2     18       9       9 0.00195 ** 
Age            2    162      81      81   1e-09 ***
Treatment:Age  4      0       0       0 1.00000    
Residuals     18     18       1                    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
>

> TukeyHSD(a.mod, which="Treatment")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = StressReduction ~ Treatment * Age, data = stressdata)

$Treatment
                 diff        lwr       upr     p adj
mental-medical      2  0.7968988 3.2031012 0.0013531
physical-medical    1 -0.2031012 2.2031012 0.1135025
physical-mental    -1 -2.2031012 0.2031012 0.1135025

>


> TukeyHSD(a.mod, which="Age")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = StressReduction ~ Treatment * Age, data = stressdata)

$Age
          diff       lwr       upr    p adj
old-mid     -3 -4.203101 -1.796899 1.54e-05
young-mid    3  1.796899  4.203101 1.54e-05
young-old    6  4.796899  7.203101 0.00e+00
>

위는 아래의 linear model 을 이용하여도 가능. 사실 모든 ANOVA 테스트는 linear model이기도 함(lm)

anova(lm(d$StressReduction ~ d$Treatment * d$Age, d))
Analysis of Variance Table

Response: d$StressReduction
                  Df Sum Sq Mean Sq F value   Pr(>F)    
d$Treatment        2     18       9       9 0.001953 ** 
d$Age              2    162      81      81    1e-09 ***
d$Treatment:d$Age  4      0       0       0 1.000000    
Residuals         18     18       1                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
>

===== e.g., 6 =====

tdata <- ToothGrowth
tdata
str(tdata)
tdata$dose <- factor(tdata$dose)
# or a nicer way
tdata$dose <- factor(tdata$dose, levels=c(0.5, 1, 2), labels=c("dhalf", "d1", "d2"))

tdata

table(tdata$supp, tdata$dose)

==== Visualize the data ====

install.packages("ggpubr")

library("ggpubr")
ggboxplot(tdata, x = "dose", y = "len", color = "supp",
          palette = c("red", "blue"))
ggline(tdata, x = "dose", y = "len", color = "supp",
       add = c("mean_se", "dotplot"),
       palette = c("red", "blue"))

# prettier
boxplot(len ~ supp * dose, data=tdata, frame = FALSE, 
      col = c("#00AFBB", "#E7B800"), ylab="Tooth Length")

# or 
interaction.plot(x.factor = tdata$dose, trace.factor = 
      tdata$supp, response = tdata$len, fun = mean, 
      type = "b", legend = TRUE, 
      xlab = "Dose", ylab="Tooth Length",
      pch=c(1,19), col = c("#00AFBB", "#E7B800"))

==== Compute ANOVA test ====

res.aov2 <- aov(len ~ supp + dose, data = tdata)
summary(res.aov2)
            Df Sum Sq Mean Sq F value   Pr(>F)    
supp         1  205.4   205.4   14.02 0.000429 ***
dose         2 2426.4  1213.2   82.81  < 2e-16 ***
Residuals   56  820.4    14.7                     
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

res.aov3 <- aov(len ~ supp * dose, data = tdata)
res.aov3 <- aov(len ~ supp + dose + supp:dose, data = tdata)
summary(res.aov3)
            Df Sum Sq Mean Sq F value   Pr(>F)    
supp         1  205.4   205.4  15.572 0.000231 ***
dose         2 2426.4  1213.2  92.000  < 2e-16 ***
supp:dose    2  108.3    54.2   4.107 0.021860 *  
Residuals   54  712.1    13.2                     
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

From the ANOVA results, you can conclude the following, based on the p-values and a significance level of 0.05: * the p-value of supp is 0.000429 (significant), which indicates that the levels of supp are associated with significant different tooth length. * the p-value of dose is < 2e-16 (significant), which indicates that the levels of dose are associated with significant different tooth length. * the p-value for the interaction between supp*dose is 0.02 (significant), which indicates that the relationships between dose and tooth length depends on the supp method. ==== pair-wise test ====

> TukeyHSD(res.aov3, which = "dose")
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = len ~ supp * dose, data = tdata)

$dose
        diff       lwr       upr   p adj
d1-dh  9.130  6.362488 11.897512 0.0e+00
d2-dh 15.495 12.727488 18.262512 0.0e+00
d2-d1  6.365  3.597488  9.132512 2.7e-06

pairwise.t.test(tdata$len, tdata$dose, p.adjust.method = "BH")

==== Homogeneity ==== plot(res.aov3, 1)

install.packages("car")
library(car)
leveneTest(len ~ supp*dose, data = tdata)

Levene's Test for Homogeneity of Variance (center = median)
      Df F value Pr(>F)
group  5  1.7086 0.1484
      54

p value is greater than .05, which indicates that there is no evidence of significant difference between variances of groups. ==== check normality ====

plot(res.aov3, 2)

The residuals are plotted against the quantiles of normal distribution (the straight line). ==== unbalance design ====

> library(car)
> my_anova <- aov(len ~ supp * dose, data = tdata)
> 
> Anova(my_anova, type="III"
+ )
Anova Table (Type III tests)

Response: len
             Sum Sq Df F value    Pr(>F)    
(Intercept) 1750.33  1 132.730 3.603e-16 ***
supp         137.81  1  10.450  0.002092 ** 
dose         885.26  2  33.565 3.363e-10 ***
supp:dose    108.32  2   4.107  0.021860 *  
Residuals    712.11 54                      
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1