Differences

This shows you the differences between two versions of the page.

--- c:ms:2025:w07_anova_note [2025/04/10 15:26] – [ANOVA in R: Output] hkimscil
+++ c:ms:2025:w07_anova_note [2025/04/16 10:40] (current) – [R square: output] hkimscil
@@ Line 1: / Line 1: @@
 ====== ANOVA in R ======
+이 문서는 http://commres.net/wiki/r/anova 의 내용을 변형해서
+다시 만든 문서입니다. http://commres.net/wiki/r/anova 은
+세 그룹 간의 차이를 보는 것이고 이 문서는 4그룹 간의 차이를 보는
+문서입니다.
 <code>
 #
@@ Line 17: / Line 21: @@
 mean.d <- 22
-A <- rnorm2(na, mean.a, sqrt(1160/(na-1)))
+A <- rnorm2(na, mean.a, sqrt(900/(na-1)))
-B <- rnorm2(nb, mean.b, sqrt(1000/(nb-1)))
+B <- rnorm2(nb, mean.b, sqrt(900/(nb-1)))
-C <- rnorm2(nc, mean.c, sqrt(1000/(nc-1)))
+C <- rnorm2(nc, mean.c, sqrt(900/(nc-1)))
-D <- rnorm2(nd, mean.d, sqrt(1000/(nd-1)))
+D <- rnorm2(nd, mean.d, sqrt(900/(nd-1)))
 # A combined group with group A and B
@@ Line 258: / Line 262: @@
 > mean.a <- 26
 > mean.b <- 25
-> mean.c <- 22
+> mean.c <- 23
-> mean.d <- 20
+> mean.d <- 22
 >
-> A <- rnorm2(na, mean.a, sqrt(1160/(na-1)))
+> A <- rnorm2(na, mean.a, sqrt(900/(na-1)))
-> B <- rnorm2(nb, mean.b, sqrt(1000/(nb-1)))
+> B <- rnorm2(nb, mean.b, sqrt(900/(nb-1)))
-> C <- rnorm2(nc, mean.c, sqrt(1000/(nc-1)))
+> C <- rnorm2(nc, mean.c, sqrt(900/(nc-1)))
-> D <- rnorm2(nd, mean.d, sqrt(1000/(nd-1)))
+> D <- rnorm2(nd, mean.d, sqrt(900/(nd-1)))
 >
 > # A combined group with group A and B
@@ Line 272: / Line 276: @@
 > A
           [,1]
- [1,] 24.15936
+ [1,] 24.37871
- [2,] 30.80932
+ [2,] 30.23619
- [3,] 21.51824
+ [3,] 22.05233
- [4,] 28.24999
+ [4,] 27.98186
- [5,] 28.97978
+ [5,] 28.62468
- [6,] 35.51391
+ [6,] 34.38014
- [7,] 31.31140
+ [7,] 30.67844
- [8,] 25.77399
+ [8,] 25.80092
- [9,] 33.56897
+ [9,] 32.66698
-[10,] 24.93735
+[10,] 25.06399
-[11,] 30.61240
+[11,] 30.06274
-[12,] 20.61063
+[12,] 21.25288
-[13,] 37.43502
+[13,] 36.07231
-[14,] 15.52399
+[14,] 16.77241
-[15,] 24.83574
+[15,] 24.97448
-[16,] 25.16385
+[16,] 25.26349
-[17,] 20.19498
+[17,] 20.88676
-[18,] 27.06992
+[18,] 26.94242
-[19,] 20.43785
+[19,] 21.10069
-[20,] 11.10716
+[20,] 12.88194
-[21,] 25.38778
+[21,] 25.46073
-[22,] 31.99065
+[22,] 31.27674
-[23,] 24.59883
+[23,] 24.76580
-[24,] 15.54592
+[24,] 16.79173
-[25,] 32.26250
+[25,] 31.51620
-[26,] 15.95114
+[26,] 17.14866
-[27,] 30.16291
+[27,] 29.66682
-[28,] 25.72414
+[28,] 25.75701
-[29,] 30.16421
+[29,] 29.66796
-[30,] 30.39809
+[30,] 29.87397
 attr(,"scaled:center")
 [1] -0.08287722
@@ Line 308: / Line 312: @@
 > B
           [,1]
- [1,] 30.92777
+ [1,] 30.62357
- [2,] 27.40269
+ [2,] 27.27939
- [3,] 31.57423
+ [3,] 31.23686
- [4,] 13.93715
+ [4,] 14.50486
- [5,] 32.61602
+ [5,] 32.22519
- [6,] 21.65799
+ [6,] 21.82949
- [7,] 26.76631
+ [7,] 26.67567
- [8,] 31.07316
+ [8,] 30.76150
- [9,] 16.23555
+ [9,] 16.68531
-[10,] 28.37195
+[10,] 28.19891
-[11,] 28.56653
+[11,] 28.38350
-[12,] 30.14509
+[12,] 29.88106
-[13,] 12.52765
+[13,] 13.16769
-[14,] 23.17424
+[14,] 23.26793
-[15,] 19.47689
+[15,] 19.76032
-[16,] 28.11125
+[16,] 27.95159
-[17,] 29.06156
+[17,] 28.85313
-[18,] 21.76270
+[18,] 21.92882
-[19,] 24.14405
+[19,] 24.18798
-[20,] 17.31020
+[20,] 17.70481
-[21,] 19.22003
+[21,] 19.51664
-[22,] 24.23347
+[22,] 24.27280
-[23,] 29.11289
+[23,] 28.90183
-[24,] 17.80809
+[24,] 18.17715
-[25,] 30.09268
+[25,] 29.83134
-[26,] 19.78715
+[26,] 20.05465
-[27,] 26.75141
+[27,] 26.66153
-[28,] 32.27229
+[28,] 31.89910
-[29,] 32.52368
+[29,] 32.13759
-[30,] 23.35537
+[30,] 23.43977
 attr(,"scaled:center")
 [1] -0.1405676
@@ Line 344: / Line 348: @@
 > C
           [,1]
- [1,] 19.93680
+ [1,] 21.04268
- [2,] 13.13256
+ [2,] 14.58761
- [3,] 17.68193
+ [3,] 18.90352
- [4,] 22.13674
+ [4,] 23.12972
- [5,] 23.96961
+ [5,] 24.86854
- [6,] 23.75823
+ [6,] 24.66800
- [7,] 17.40748
+ [7,] 18.64315
- [8,] 22.35212
+ [8,] 23.33405
- [9,] 21.13146
+ [9,] 22.17603
-[10,] 21.02997
+[10,] 22.07975
-[11,] 30.39517
+[11,] 30.96436
-[12,] 31.04547
+[12,] 31.58129
-[13,] 28.28695
+[13,] 28.96433
-[14,] 21.01354
+[14,] 22.06416
-[15,] 10.72282
+[15,] 12.30153
-[16,] 15.34118
+[16,] 16.68289
-[17,] 23.25979
+[17,] 24.19514
-[18,] 13.91989
+[18,] 15.33454
-[19,] 22.28969
+[19,] 23.27483
-[20,] 21.17085
+[20,] 22.21340
-[21,] 32.41776
+[21,] 32.88316
-[22,] 28.04180
+[22,] 28.73176
-[23,] 18.45008
+[23,] 19.63225
-[24,] 18.25799
+[24,] 19.45001
-[25,] 11.25474
+[25,] 12.80615
-[26,] 24.25413
+[26,] 25.13846
-[27,] 21.50399
+[27,] 22.52944
-[28,] 29.43868
+[28,] 30.05695
-[29,] 25.75134
+[29,] 26.55883
-[30,] 30.64723
+[30,] 31.20348
 attr(,"scaled:center")
 [1] 0.08931913
@@ Line 380: / Line 384: @@
 > D
            [,1]
- [1,] 27.502532
+ [1,] 29.117527
- [2,] 19.249172
+ [2,] 21.287702
- [3,] 17.265865
+ [3,] 19.406172
- [4,] 15.085873
+ [4,] 17.338050
- [5,] 21.168938
+ [5,] 23.108952
- [6,] 14.658798
+ [6,] 16.932891
- [7,] 16.756660
+ [7,] 18.923097
- [8,] 27.742432
+ [8,] 29.345116
- [9,] 22.476618
+ [9,] 24.349527
-[10,] 14.513907
+[10,] 16.795435
-[11,] 21.084269
+[11,] 23.028628
-[12,] 15.862551
+[12,] 18.074871
-[13,] 32.407055
+[13,] 33.770366
-[14,] 26.575540
+[14,] 28.238105
-[15,] 23.989868
+[15,] 25.785121
-[16,] 18.058006
+[16,] 20.157663
-[17,] 19.989914
+[17,] 21.990432
-[18,]  6.202226
+[18,]  8.910282
-[19,] 16.624780
+[19,] 18.797986
-[20,] 29.690645
+[20,] 31.193353
-[21,] 16.009971
+[21,] 18.214726
-[22,] 19.173433
+[22,] 21.215850
-[23,] 18.504309
+[23,] 20.581063
-[24,] 29.182495
+[24,] 30.711279
-[25,] 24.122752
+[25,] 25.911186
-[26,] 14.773496
+[26,] 17.041703
-[27,] 15.629022
+[27,] 17.853327
-[28,] 14.417493
+[28,] 16.703969
-[29,] 15.869201
+[29,] 18.081180
-[30,] 25.412177
+[30,] 27.134442
 attr(,"scaled:center")
 [1] 0.0894333
@@ Line 418: / Line 422: @@
 > head(dat)
     values ind
-24.15936   A
+24.37871   A
-30.80932   A
+30.23619   A
- 21.51824   A
+ 22.05233   A
- 28.24999   A
+ 27.98186   A
-28.97978   A
+28.62468   A
- 35.51391   A
+ 34.38014   A
 > colnames(dat)[1] <- "values"
 > colnames(dat)[2] <- "group"
 > head(dat)
     values group
-24.15936     A
+24.37871     A
-30.80932     A
+30.23619     A
- 21.51824     A
+ 22.05233     A
- 28.24999     A
+ 27.98186     A
-28.97978     A
+28.62468     A
- 35.51391     A
+ 34.38014     A
 >
 > mean.total <- mean(dat$values)
@@ Line 447: / Line 451: @@
 >
 > mean.total
-[1] 23.25
+[1] 24
 > var.total
-[1] 40.69328
+[1] 32.77311
 > ms.total
-[1] 40.69328
+[1] 32.77311
 > df.total
 [1] 119
 > ss.total
-[1] 4842.5
+[1] 3900
 > ss.total.check
-[1] 4842.5
+[1] 3900
 >
 > # Now for each group
@@ Line 469: / Line 473: @@
 [1] 25
 > mean.c
-[1] 22
+[1] 23
 > mean.d
-[1] 20
+[1] 22
 >
 > # 그룹 간의 차이에서 나타나는 분산
@@ Line 491: / Line 495: @@
 >
 > length(A) * ((mean.total - mean.a)^2)
-[1] 226.875
+[1] 120
 > length(B) * ((mean.total - mean.b)^2)
-[1] 91.875
+[1] 30
 > length(C) * ((mean.total - mean.c)^2)
-[1] 46.875
+[1] 30
 > length(D) * ((mean.total - mean.d)^2)
-[1] 316.875
+[1] 120
 >
 >
@@ Line 507: / Line 511: @@
 >
 > ss.between
-[1] 682.5
+[1] 300
 > # df between group은 연구에 사용된
 > # 그룹의 숫자에서 1을 뺀 숫자
@@ Line 557: / Line 561: @@
 > # 이 값을 출력해 본다
 > f.calculated
-        [,1]
+         [,1]
-[1,] 6.34375
+[1,] 3.222222
 >
 > # 컴퓨터 계산이 쉬워지기 전에는 아래처럼 0.5 level
@@ Line 565: / Line 569: @@
 [1] 2.682809
 > f.calculated
-        [,1]
+         [,1]
-[1,] 6.34375
+[1,] 3.222222
 > # 위에서 f.calculated > qf값이므로
 > # f.calculated 값으로 영가설을 부정하고
@@ Line 582: / Line 586: @@
 > f.calculated.pvalue <- 1-pf(f.calculated, df1=df.between, df2=df.within)
 > f.calculated.pvalue
-             [,1]
+           [,1]
-[1,] 0.0005078937
+[1,] 0.02527283
 > f.calculated
-        [,1]
+         [,1]
-[1,] 6.34375
+[1,] 3.222222
 >
 > # graph 로 이해
@@ Line 613: / Line 617: @@
 >
 > f.calculated
-        [,1]
+         [,1]
-[1,] 6.34375
+[1,] 3.222222
 > f.calculated.pvalue
-             [,1]
+           [,1]
-[1,] 0.0005078937
+[1,] 0.02527283
 > 1 - f.calculated.pvalue
           [,1]
-[1,] 0.9994921
+[1,] 0.9747272
 >
 >
@@ Line 626: / Line 630: @@
 > # Now check this
 > ss.total
-[1] 4842.5
+[1] 3900
 > ss.between
-[1] 682.5
+[1] 300
 > ss.within
      [,1]
-[1,] 4160
+[1,] 3600
 > ss.total
-[1] 4842.5
+[1] 3900
 > ss.between + ss.within
-       [,1]
+     [,1]
-[1,] 4842.5
+[1,] 3900
 >
 > # 한편 df는
@@ Line 656: / Line 660: @@
 > a.res.sum <- summary(a.res)
 > a.res.sum
-             Df Sum Sq Mean Sq F value   Pr(>F)
+             Df Sum Sq Mean Sq F value Pr(>F)
-group         3    682  227.50   6.344 0.000508 ***
+group         3    300  100.00   3.222 0.0253 *
-Residuals   116   4160   35.86
+Residuals   116   3600   31.03
 ---
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Signif. codes:
+‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 > # 그러나 정확히 어떤 그룹에서 차이가 나는지는 판단해주지 않음
 > pairwise.t.test(dat$values, dat$group, p.adj = "none")
@@ Line 668: / Line 673: @@
 data:  dat$values and dat$group
-  A       B       C
+  A      B      C
-B 0.51908 -       -
+B 0.4883 -      -
-C 0.01092 0.05478 -
+C 0.0392 0.1671 -
-D 0.00017 0.00159 0.19842
+D 0.0063 0.0392 0.4883
 P value adjustment method: none
@@ Line 681: / Line 686: @@
 data:  dat$values and dat$group
-  A      B      C
+  A     B     C
-B 1.0000 -      -
+B 1.000 -     -
-C 0.0655 0.3287 -
+C 0.235 1.000 -
-D 0.0010 0.0096 1.0000
+D 0.038 0.235 1.000
 P value adjustment method: bonferroni
@@ Line 694: / Line 699: @@
   A     B     C
-B 0.519 -     -
+B 0.977 -     -
-C 0.044 0.164 -
+C 0.196 0.501 -
-D 0.001 0.008 0.397
+D 0.038 0.196 0.977
 P value adjustment method: holm
@@ Line 708: / Line 713: @@
 $group
-    diff        lwr        upr     p adj
+    diff       lwr        upr     p adj
-B-A   -1  -5.030484  3.0304845 0.9164944
+B-A   -1 -4.749401  2.7494006 0.8987322
-C-A   -4  -8.030484  0.0304845 0.0525532
+C-A   -3 -6.749401  0.7494006 0.1638896
-D-A   -6 -10.030484 -1.9695155 0.0009862
+D-A   -4 -7.749401 -0.2505994 0.0316929
-C-B   -3  -7.030484  1.0304845 0.2171272
+C-B   -2 -5.749401  1.7494006 0.5078699
-D-B   -5  -9.030484 -0.9695155 0.0085400
+D-B   -3 -6.749401  0.7494006 0.1638896
-D-C   -2  -6.030484  2.0304845 0.5689321
+D-C   -1 -4.749401  2.7494006 0.8987322
 >
 > boxplot(dat$values~dat$group)
 > f.calculated
-        [,1]
+         [,1]
-[1,] 6.34375
+[1,] 3.222222
 > f.calculated.pvalue
-             [,1]
+           [,1]
-[1,] 0.0005078937
+[1,] 0.02527283
+>
 >
 > # how much IV explains the DV
@@ Line 730: / Line 736: @@
 > eta  <- r.square
 > eta
-[1] 0.1409396
+[1] 0.07692308
 > lm.res <- lm(dat$values~dat$group, data = dat)
 > summary(lm.res)
@@ Line 739: / Line 745: @@
 Residuals:
      Min       1Q   Median       3Q      Max
--14.8928  -4.1826  -0.3859   4.4517  12.4071
+-13.1181  -3.9308  -0.3568   3.9491  11.7704
 Coefficients:
             Estimate Std. Error t value Pr(>|t|)
-(Intercept)   26.000      1.093  23.780  < 2e-16 ***
+(Intercept)   26.000      1.017  25.563  < 2e-16 ***
-dat$groupB    -1.000      1.546  -0.647 0.519080
+dat$groupB    -1.000      1.438  -0.695  0.48831
-dat$groupC    -4.000      1.546  -2.587 0.010919 *
+dat$groupC    -3.000      1.438  -2.086  0.03920 *
-dat$groupD    -6.000      1.546  -3.880 0.000174 ***
+dat$groupD    -4.000      1.438  -2.781  0.00633 **
 ---
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Signif. codes:
+‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 5.988 on 116 degrees of freedom
+Residual standard error: 5.571 on 116 degrees of freedom
-Multiple R-squared:  0.1409,	Adjusted R-squared:  0.1187
+Multiple R-squared:  0.07692,	Adjusted R-squared:  0.05305
-F-statistic: 6.344 on 3 and 116 DF,  p-value: 0.0005079
+F-statistic: 3.222 on 3 and 116 DF,  p-value: 0.02527
 > summary(a.res)
-             Df Sum Sq Mean Sq F value   Pr(>F)
+             Df Sum Sq Mean Sq F value Pr(>F)
-group         3    682  227.50   6.344 0.000508 ***
+group         3    300  100.00   3.222 0.0253 *
-Residuals   116   4160   35.86
+Residuals   116   3600   31.03
 ---
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Signif. codes:
+‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 >
 >
->
+></code>
-</code>
+{{:c:ms:2025:pasted:20250414-084011.png}}
+{{:c:ms:2025:pasted:20250414-084020.png}}
+{{:c:ms:2025:pasted:20250414-084030.png}}
 ====== Post hoc test ======
 [[:post hoc test]]
 <code>
-m.a
+mean.a
-m.b
+mean.b
-m.c
+mean.c
+mean.d
-d.ab <- m.a - m.b
+d.ab <- mean.a - mean.b
-d.ac <- m.a - m.c
+d.ac <- mean.a - mean.c
-d.bc <- m.b - m.c
+d.ad <- mean.a - mean.d
+d.bc <- mean.b - mean.c
+d.bd <- mean.b - mean.d
+d.cd <- mean.c - mean.d
 d.ab
 d.ac
+d.ad
 d.bc
+d.bd
+d.cd
 # mse (ms within) from the a.res.sum output
-# a.res.sum == summary(aov(values ~ group, data=comb3))
+# a.res.sum == summary(aov(values ~ group, data=dat))
 a.res.sum
 # mse = 50
-mse <- 50
+mse <- 35.86
-# 혹은 fansy way from comb3 data.frame
+# 혹은 fansy way from dat data.frame
-# 15 는 각 그룹의 df
+# df.a 는 각 그룹의 df (모든 그룹의 df값이 같으므로 df.a를 사용)
-sse.ch <- sum(tapply(comb3$values, comb3$group, var)*15)
+tapply(dat$values, dat$group, var) # 각 그룹의 분산
+tapply(dat$values, dat$group, var) * df.a # 각 그룹의 SS
+# 이 분산값에 df값을 곱하면 각 그룹의 SS값
+# 이 값들을 sum 하면 총 ss.within 값
+sse.ch <- sum(tapply(dat$values, dat$group, var) * df.a)
 sse.ch
-mse.ch <- sse.ch/45
+mse.ch <- sse.ch / (df.a*4) # 이 값에 df.a * 4 하면 ms.within값
 mse.ch
+mse <- mse.ch
 se <- sqrt(mse/length(A))
@@ Line 797: / Line 818: @@
 t.ab <- d.ab / se
 t.ac <- d.ac / se
+t.ad <- d.ad / se
 t.bc <- d.bc / se
+t.bd <- d.bd / se
+t.cd <- d.cd / se
 t.ab
 t.ac
+t.ad
 t.bc
+t.bd
+t.cd
 # 이제 위의 점수를 .05 레벨에서 비교할 점수를 찾아 비교한다
 # qtukey 펑션을 이용한다
-t.crit <- qtukey( .95, nmeans = 3, df = 45)
+t.crit <- qtukey( .95, nmeans = 4, df = 30 * 4)
 t.crit
-# t.ac만이 큰 것을 알 수 있다.
-# 따라서 a 와 c 가 서로 다른 그룹
-# 즉, 1학년과 3학년이 서로 다른 그룹
 # 혹은 R이 보통 제시한 거꾸로 방법으로 보면
-ptukey(t.ab, nmeans=3, df=45, lower.tail = F)
+ptukey(t.ab, nmeans=4, df=df.within, lower.tail = F)
-ptukey(t.ac, nmeans=3, df=45, lower.tail = F)
+ptukey(t.ac, nmeans=4, df=df.within, lower.tail = F)
-ptukey(t.bc, nmeans=3, df=45, lower.tail = F)
+ptukey(t.ad, nmeans=4, df=df.within, lower.tail = F)
+ptukey(t.bc, nmeans=4, df=df.within, lower.tail = F)
+ptukey(t.bd, nmeans=4, df=df.within, lower.tail = F)
+ptukey(t.cd, nmeans=4, df=df.within, lower.tail = F)
 TukeyHSD(a.res, conf.level=.95)
@@ Line 825: / Line 851: @@
 <code>
-pairwise.t.test(comb3$values, comb3$group, p.adj = "bonf")
+pairwise.t.test(dat$values, dat$group, p.adj = "bonf")
 </code>
 ===== post hoc test: output =====
 <code>
-> m.a
+> mean.a
 [1] 26
-> m.b
+> mean.b
-[1] 24
+[1] 25
-> m.c
+> mean.c
-[1] 19
+[1] 22
+> mean.d
+[1] 20
 >
-> d.ab <- m.a - m.b
+> d.ab <- mean.a - mean.b
-> d.ac <- m.a - m.c
+> d.ac <- mean.a - mean.c
-> d.bc <- m.b - m.c
+> d.ad <- mean.a - mean.d
+> d.bc <- mean.b - mean.c
+> d.bd <- mean.b - mean.d
+> d.cd <- mean.c - mean.d
 >
 > d.ab
-[1] 2
+[1] 1
 > d.ac
-[1] 7
+[1] 4
+> d.ad
+[1] 6
 > d.bc
+[1] 3
+> d.bd
 [1] 5
+> d.cd
+[1] 2
 >
-> # se
+> # mse (ms within) from the a.res.sum output
-> # from the a.res.sum output
+> # a.res.sum == summary(aov(values ~ group, data=dat))
 > a.res.sum
-            Df Sum Sq Mean Sq F value Pr(>F)
+             Df Sum Sq Mean Sq F value   Pr(>F)
-group        2    416     208    4.16  0.022 *
+group         3    682  227.50   6.344 0.000508 ***
-Residuals   45   2250      50
+Residuals   116   4160   35.86
 ---
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Signif. codes:
+‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 > # mse = 50
-> mse <- 50
+> mse <- 35.86
->
+> # 혹은 fansy way from dat data.frame
+> # df.a 는 각 그룹의 df (모든 그룹의 df값이 같으므로 df.a를 사용)
+> tapply(dat$values, dat$group, var) # 각 그룹의 분산
+       A        B        C        D
+.00000 34.48276 31.03448 37.93103
+> tapply(dat$values, dat$group, var) * df.a # 각 그룹의 SS
+   A    B    C    D
+1000  900 1100
+> # 이 분산값에 df값을 곱하면 각 그룹의 SS값
+> # 이 값들을 sum 하면 총 ss.within 값
+> sse.ch <- sum(tapply(dat$values, dat$group, var) * df.a)
+> sse.ch
+[1] 4160
+> mse.ch <- sse.ch / (df.a*4) # 이 값에 df.a * 4 하면 ms.within값
+> mse.ch
+[1] 35.86207
+> mse <- mse.ch
 > se <- sqrt(mse/length(A))
 >
@@ Line 863: / Line 917: @@
 > t.ab <- d.ab / se
 > t.ac <- d.ac / se
+> t.ad <- d.ad / se
 > t.bc <- d.bc / se
+> t.bd <- d.bd / se
+> t.cd <- d.cd / se
 >
 > t.ab
-[1] 1.131371
+[1] 0.9146248
 > t.ac
-[1] 3.959798
+[1] 3.658499
+> t.ad
+[1] 5.487749
 > t.bc
-[1] 2.828427
+[1] 2.743874
+> t.bd
+[1] 4.573124
+> t.cd
+[1] 1.82925
 >
 > # 이제 위의 점수를 .05 레벨에서 비교할 점수를 찾아 비교한다
 > # qtukey 펑션을 이용한다
-> t.crit <- qtukey( .95, nmeans = 3, df = 45)
+> t.crit <- qtukey( .95, nmeans = 4, df = 30 * 4)
 > t.crit
-[1] 3.427507
+[1] 3.684589
->
-> # t.ac만이 큰 것을 알 수 있다.
-> # 따라서 a 와 c 가 서로 다른 그룹
-> # 즉, 1학년과 3학년이 서로 다른 그룹
 >
 > # 혹은 R이 보통 제시한 거꾸로 방법으로 보면
-> ptukey(t.ab, nmeans=3, df=45, lower.tail = F)
+> ptukey(t.ab, nmeans=4, df=df.within, lower.tail = F)
-[1] 0.7049466
+[1] 0.9164944
-> ptukey(t.ac, nmeans=3, df=45, lower.tail = F)
+> ptukey(t.ac, nmeans=4, df=df.within, lower.tail = F)
-[1] 0.02012498
+[1] 0.05255324
-> ptukey(t.bc, nmeans=3, df=45, lower.tail = F)
+> ptukey(t.ad, nmeans=4, df=df.within, lower.tail = F)
-[1] 0.123877
+[1] 0.0009861641
+> ptukey(t.bc, nmeans=4, df=df.within, lower.tail = F)
+[1] 0.2171272
+> ptukey(t.bd, nmeans=4, df=df.within, lower.tail = F)
+[1] 0.008539966
+> ptukey(t.cd, nmeans=4, df=df.within, lower.tail = F)
+[1] 0.5689321
 >
 > TukeyHSD(a.res, conf.level=.95)
@@ Line 894: / Line 959: @@
 % family-wise confidence level
-Fit: aov(formula = values ~ group, data = comb3)
+Fit: aov(formula = values ~ group, data = dat)
 $group
-    diff        lwr       upr     p adj
+    diff        lwr        upr     p adj
-b-a   -2  -8.059034  4.059034 0.7049466
+B-A   -1  -5.030484  3.0304845 0.9164944
-c-a   -7 -13.059034 -0.940966 0.0201250
+C-A   -4  -8.030484  0.0304845 0.0525532
-c-b   -5 -11.059034  1.059034 0.1238770
+D-A   -6 -10.030484 -1.9695155 0.0009862
+C-B   -3  -7.030484  1.0304845 0.2171272
+D-B   -5  -9.030484 -0.9695155 0.0085400
+D-C   -2  -6.030484  2.0304845 0.5689321
+> plot(TukeyHSD(a.res, conf.level=.95), las = 2)
+> pairwise.t.test(dat$values, dat$group, p.adj = "bonf")
+	Pairwise comparisons using t tests with pooled SD
+data:  dat$values and dat$group
+  A      B      C
+B 1.0000 -      -
+C 0.0655 0.3287 -
+D 0.0010 0.0096 1.0000
+P value adjustment method: bonferroni
+>
+>
 </code>
-{{:c:ms:2023:pasted:20230418-223608.png}}
+{{:c:ms:2025:pasted:20250410-154903.png}}
 ====== R square or Eta square ======
 SS toal
@@ Line 930: / Line 1014: @@
 <code>
-ss.tot
+ss.total
-ss.bet
+ss.between
-r.sq <- ss.bet / ss.tot
+r.sq <- ss.between / ss.total
 r.sq
 # then . . . .
-lm.res <- lm(values ~ group, data = comb3)
+lm.res <- lm(values ~ group, data = dat)
 summary(lm.res)
 anova(lm.res)
@@ Line 944: / Line 1028: @@
 ===== R square: output =====
 <code>
-> ss.tot
+> ss.total
-[1] 2666
+[1] 3900
-> ss.bet
+> ss.between
-[1] 416
+[1] 300
-> r.sq <- ss.bet / ss.tot
+> r.sq <- ss.between / ss.total
 > r.sq
-[1] 0.156039
+[1] 0.07692308
 >
 > # then . . . .
 >
-> lm.res <- lm(values ~ group, data = comb3)
+> lm.res <- lm(values ~ group, data = dat)
 > summary(lm.res)
 Call:
-lm(formula = values ~ group, data = comb3)
+lm(formula = values ~ group, data = dat)
 Residuals:
-    Min      1Q  Median      3Q     Max
+     Min       1Q   Median       3Q      Max
--16.020  -2.783   1.476   4.892  12.148
+-13.1181  -3.9308  -0.3568   3.9491  11.7704
 Coefficients:
             Estimate Std. Error t value Pr(>|t|)
-(Intercept)   26.000      1.768   14.71   <2e-16 ***
+(Intercept)   26.000      1.017  25.563  < 2e-16 ***
-groupb        -2.000      2.500   -0.80   0.4279
+groupB        -1.000      1.438  -0.695  0.48831
-groupc        -7.000      2.500   -2.80   0.0075 **
+groupC        -3.000      1.438  -2.086  0.03920 *
+groupD        -4.000      1.438  -2.781  0.00633 **
 ---
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Signif. codes:
+‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
-Residual standard error: 7.071 on 45 degrees of freedom
+Residual standard error: 5.571 on 116 degrees of freedom
-Multiple R-squared:  0.156,	Adjusted R-squared:  0.1185
+Multiple R-squared:  0.07692,	Adjusted R-squared:  0.05305
-F-statistic:  4.16 on 2 and 45 DF,  p-value: 0.02199
+F-statistic: 3.222 on 3 and 116 DF,  p-value: 0.02527
 > anova(lm.res)
@@ Line 980: / Line 1066: @@
 Response: values
-          Df Sum Sq Mean Sq F value  Pr(>F)
+           Df Sum Sq Mean Sq F value  Pr(>F)
-group      2    416     208    4.16 0.02199 *
+group       3    300 100.000  3.2222 0.02527 *
-Residuals 45   2250      50
+Residuals 116   3600  31.034
 ---
-Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
+Signif. codes:
->
+‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
 >
 </code>