This is an old revision of the document!

Oneway ANOVA

(온도조건)x1	50.5	52.1	51.9	52.4	50.6	51.4	51.2	52.2	51.5	50.8
(온도조건)x2	47.5	47.7	46.6	47.1	47.2	47.8	45.2	47.4	45.0	47.9
(온도조건)x3	46.0	47.1	45.6	47.1	47.2	46.4	45.9	47.1	44.9	46.2

x1 <- c(50.5, 52.1, 51.9, 52.4, 50.6, 51.4, 51.2, 52.2, 51.5, 50.8)
x2 <- c(47.5, 47.7, 46.6, 47.1, 47.2, 47.8, 45.2, 47.4, 45.0, 47.9)
x3 <- c(46.0, 47.1, 45.6, 47.1, 47.2, 46.4, 45.9, 47.1, 44.9, 46.2)

x1
x2
x3

> xc <- data.frame(x1,x2,x3)
> xc
     x1   x2   x3
1  50.5 47.5 46.0
2  52.1 47.7 47.1
3  51.9 46.6 45.6
4  52.4 47.1 47.1
5  50.6 47.2 47.2
6  51.4 47.8 46.4
7  51.2 45.2 45.9
8  52.2 47.4 47.1
9  51.5 45.0 44.9
10 50.8 47.9 46.2

> xs <- stack(xc)
> xs
   values ind
1    50.5  x1
2    52.1  x1
3    51.9  x1
4    52.4  x1
5    50.6  x1
6    51.4  x1
7    51.2  x1
8    52.2  x1
9    51.5  x1
10   50.8  x1
11   47.5  x2
12   47.7  x2
13   46.6  x2
14   47.1  x2
15   47.2  x2
16   47.8  x2
17   45.2  x2
18   47.4  x2
19   45.0  x2
20   47.9  x2
21   46.0  x3
22   47.1  x3
23   45.6  x3
24   47.1  x3
25   47.2  x3
26   46.4  x3
27   45.9  x3
28   47.1  x3
29   44.9  x3
30   46.2  x3

> aggregate(values~ind, data=xs, mean)

  ind values
1  x1  51.46
2  x2  46.94
3  x3  46.35

# or use, tapply function as shown the below

> x.mod <- aov(values~ind,data=xs)
> x.mod
Call:
   aov(formula = values ~ ind, data = xs)

Terms:
                    ind Residuals
Sum of Squares  156.302    19.393
Deg. of Freedom       2        27

Residual standard error: 0.8475018
Estimated effects may be unbalanced
> summary(x.mod)
            Df Sum Sq Mean Sq F value  Pr(>F)    
ind          2 156.30   78.15   108.8 1.2e-13 ***
Residuals   27  19.39    0.72                    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> 

> TukeyHSD(x.mod)
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = values ~ ind, data = xs)

$ind
       diff       lwr        upr     p adj
x2-x1 -4.52 -5.459735 -3.5802652 0.0000000
x3-x1 -5.11 -6.049735 -4.1702652 0.0000000
x3-x2 -0.59 -1.529735  0.3497348 0.2813795

> tapply(xs$values, xs$ind, mean)
   x1    x2    x3 
51.46 46.94 46.35

SS_between

> meanxs <- mean(xs$values)
> mx1 <- mean(x1)
> mx2 <- mean(x2)
> mx3 <- mean(x3)

> x2ss <- 10*((meanxs - mx2)^2)
> x3ss <- 10*((meanxs - mx3)^2)
> x1ss <- 10*((meanxs - mx1)^2)
> xss <- x1ss+x2ss+x3ss
> xss
[1] 156.302

E.g. 1

> data(InsectSprays)
> str(InsectSprays)
'data.frame':   72 obs. of  2 variables:
 $ count: num  10 7 20 14 14 12 10 23 17 20 ...
 $ spray: Factor w/ 6 levels "A","B","C","D",..: 1 1 1 1 1 1 1 1 1 1 ...
 > summary(InsectSprays)
     count       spray 
 Min.   : 0.00   A:12  
 1st Qu.: 3.00   B:12  
 Median : 7.00   C:12  
 Mean   : 9.50   D:12  
 3rd Qu.:14.25   E:12  
 Max.   :26.00   F:12

> attach(InsectSprays)
> tapply(count, spray, mean)
        A         B         C         D         E         F 
14.500000 15.333333  2.083333  4.916667  3.500000 16.666667 
> tapply(count, spray, var)
        A         B         C         D         E         F 
22.272727 18.242424  3.901515  6.265152  3.000000 38.606061 
> tapply(count, spray, length)              # okay, as there are no missing values
 A  B  C  D  E  F 
12 12 12 12 12 12

> boxplot(count ~ spray)          # boxplot(count~spray, data=InsectSprays) if not attached

> oneway.test(count ~ spray)      # oneway.test(count~spray, data=InsectSprays) if not attached

        One-way analysis of means (not assuming equal variances)

data:  count and spray 
F = 36.0654, num df = 5.000, denom df = 30.043, p-value = 8e-12

By default, oneway.test() applies a Welch-like correction for nonhomogeneity. To turn off it, set the “var.equal=” option to TRUE.

ANOVA 그룹간 homogeneity를 요구하는데, oneway.test는 이를 보정하여 F값을 구한다.

> aov.out = aov(count ~ spray, data=InsectSprays)
> summary(aov.out)
            Df  Sum Sq Mean Sq F value    Pr(>F)    
spray        5 2668.83  533.77  34.702  <2.2e-16 ***
Residuals   66 1015.17   15.38                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

if we use var.eq=TRUE option, the result would be the same.

> oneway.test(count ~ spray, data=InsectSprays, var.eq=T)

	One-way analysis of means

data:  count and spray
F = 34.7023, num df = 5, denom df = 66, p-value < 2.2e-16

> pairwise.t.test(InsectSprays$count, InsectSprays$spray)

	Pairwise comparisons using t tests with pooled SD 

data:  InsectSprays$count and InsectSprays$spray 

  A       B       C       D       E      
B 1.00    -       -       -       -      
C 8.7e-10 1.2e-10 -       -       -      
D 6.9e-07 9.7e-08 0.49    -       -      
E 2.5e-08 3.6e-09 1.00    1.00    -      
F 0.90    1.00    4.2e-12 4.0e-09 1.4e-10

P value adjustment method: holm 
>

> TukeyHSD(aov.out)
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = count ~ spray, data = InsectSprays)

$spray
           diff        lwr       upr     p adj
B-A   0.8333333  -3.866075  5.532742 0.9951810
C-A -12.4166667 -17.116075 -7.717258 0.0000000
D-A  -9.5833333 -14.282742 -4.883925 0.0000014
E-A -11.0000000 -15.699409 -6.300591 0.0000000
F-A   2.1666667  -2.532742  6.866075 0.7542147
C-B -13.2500000 -17.949409 -8.550591 0.0000000
D-B -10.4166667 -15.116075 -5.717258 0.0000002
E-B -11.8333333 -16.532742 -7.133925 0.0000000
F-B   1.3333333  -3.366075  6.032742 0.9603075
D-C   2.8333333  -1.866075  7.532742 0.4920707
E-C   1.4166667  -3.282742  6.116075 0.9488669
F-C  14.5833333   9.883925 19.282742 0.0000000
E-D  -1.4166667  -6.116075  3.282742 0.9488669
F-D  11.7500000   7.050591 16.449409 0.0000000
F-E  13.1666667   8.467258 17.866075 0.0000000

Ex 1

약의 종류에 따른 효과의 차이

Drug A 4 5 4 3 2 4 3 4 4
Drug B 6 8 4 5 4 6 5 8 6
Drug C 6 7 6 6 7 5 6 5 5

A <- c(4, 5, 4, 3, 2, 4, 3, 4, 4)
B <- c(6, 8, 4, 5, 4, 6, 5, 8, 6)
C <- c(6, 7, 6, 6, 7, 5, 6, 5, 5)

Ex 2

download dataset_anova_comparisonMethods.csv data file

dataset_ANOVA_OneWayComparisons.csv

Treatment	StressReduction
mental	2
mental	2
mental	3
mental	4
mental	4
mental	5
mental	3
mental	4
mental	4
mental	4
physical	4
physical	4
physical	3
physical	5
physical	4
physical	1
physical	1
physical	2
physical	3
physical	3
medical	1
medical	2
medical	2
medical	2
medical	3
medical	2
medical	3
medical	1
medical	3
medical	1

dd <- read.csv("http://commres.net/wiki/_media/dataset_anova_comparisonmethods.csv", sep=",")

In class exercise
위의 데이터를 이용하여 아래를 수행하시오.
1. F test 가설을 세우시오
2. 각 그룹의 평균과 표준편차 그리고 분산값을 구하시오.
3. F test를 하시오
4. 그룹간 비교를 하시오 (post hoc)
5. 분석결과를 정리하여 설명하시오.

COMMunication
RESearch.NET

Table of Contents

Oneway ANOVA

E.g. 1

Ex 1

Ex 2