Differences

This shows you the differences between two versions of the page.

--- c:ma:2016:schedule:week09_answer [2016/11/09 07:54] – created hkimscil
+++ c:ma:2016:schedule:week09_answer [2016/11/09 09:52] (current) – [E.g. 6] hkimscil
@@ Line 3: / Line 3: @@
 ====== E.g. 1======
 MASS data의 Cars93 data에서 Origin에 따른 city Mileage와 highway Mileage, Engine size를 비교하라.
-  - 가설 만들기
+  - 가설 만들기:
+    * $\text{MPG.city:  }  \bar{X}_{\text{USA}} \ne \bar{X}_{\text{nonUSA}}$
+    * $\text{MPG.highway:  }  \bar{X}_{\text{USA}} \ne \bar{X}_{\text{nonUSA}}$
+    * $\text{EnginSize:  }  \bar{X}_{\text{USA}} \ne \bar{X}_{\text{nonUSA}}$
   - 영가설 만들기
+    * $\text{MPG.city:  }  \bar{X}_{\text{USA}} = \bar{X}_{\text{nonUSA}}$
+    * $\text{MPG.highway:  }  \bar{X}_{\text{USA}} = \bar{X}_{\text{nonUSA}}$
+    * $\text{EnginSize:  }  \bar{X}_{\text{USA}} = \bar{X}_{\text{nonUSA}}$
   - 각 그룹의 평균과 표준편차
   - 가설 테스트
   - 테스트 결과
+<code>> CarData <- subset(Cars93, select = c(Origin, MPG.city, MPG.highway, EngineSize))
+> CarData
+    Origin MPG.city MPG.highway EngineSize
+  non-USA       25          31        1.8
+  non-USA       18          25        3.2
+  non-USA       20          26        2.8
+  non-USA       19          26        2.8
+  non-USA       22          30        3.5
+      USA       22          31        2.2
+      USA       19          28        3.8
+      USA       16          25        5.7
+      USA       19          27        3.8
+     USA       16          25        4.9
+     USA       16          25        4.6
+     USA       25          36        2.2
+     USA       25          34        2.2
+     USA       19          28        3.4
+     USA       21          29        2.2
+     USA       18          23        3.8
+     USA       15          20        4.3
+     USA       17          26        5.0
+     USA       17          25        5.7
+     USA       20          28        3.3
+     USA       23          28        3.0
+     USA       20          26        3.3
+     USA       29          33        1.5
+     USA       23          29        2.2
+     USA       22          27        2.5
+     USA       17          21        3.0
+     USA       21          27        2.5
+     USA       18          24        3.0
+     USA       29          33        1.5
+     USA       20          28        3.5
+     USA       31          33        1.3
+     USA       23          30        1.8
+     USA       22          27        2.3
+     USA       22          29        2.3
+     USA       24          30        2.0
+     USA       15          20        3.0
+     USA       21          30        3.0
+     USA       18          26        4.6
+non-USA       46          50        1.0
+non-USA       30          36        1.6
+non-USA       24          31        2.3
+non-USA       42          46        1.5
+non-USA       24          31        2.2
+non-USA       29          33        1.5
+non-USA       22          29        1.8
+non-USA       26          34        1.5
+non-USA       20          27        2.0
+non-USA       17          22        4.5
+non-USA       18          24        3.0
+non-USA       18          23        3.0
+     USA       17          26        3.8
+     USA       18          26        4.6
+non-USA       29          37        1.6
+non-USA       28          36        1.8
+non-USA       26          34        2.5
+non-USA       18          24        3.0
+non-USA       17          25        1.3
+non-USA       20          29        2.3
+non-USA       19          25        3.2
+     USA       23          26        1.6
+     USA       19          26        3.8
+non-USA       29          33        1.5
+non-USA       18          24        3.0
+non-USA       29          33        1.6
+non-USA       24          30        2.4
+non-USA       17          23        3.0
+non-USA       21          26        3.0
+     USA       24          31        2.3
+     USA       23          31        2.2
+     USA       18          23        3.8
+     USA       19          28        3.8
+     USA       23          30        1.8
+     USA       31          41        1.6
+     USA       23          31        2.0
+     USA       19          28        3.4
+     USA       19          27        3.4
+     USA       19          28        3.8
+non-USA       20          26        2.1
+     USA       28          38        1.9
+non-USA       33          37        1.2
+non-USA       25          30        1.8
+non-USA       23          30        2.2
+non-USA       39          43        1.3
+non-USA       32          37        1.5
+non-USA       25          32        2.2
+non-USA       22          29        2.2
+non-USA       18          22        2.4
+non-USA       25          33        1.8
+non-USA       17          21        2.5
+non-USA       21          30        2.0
+non-USA       18          25        2.8
+non-USA       21          28        2.3
+non-USA       20          28        2.4
+>
+> sapply(CarData, summary, na.rm=)
+$Origin
+    USA non-USA
+      45
+$MPG.city
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
+.00   18.00   21.00   22.37   25.00   46.00
+$MPG.highway
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
+.00   26.00   28.00   29.09   31.00   50.00
+$EngineSize
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
+.000   1.800   2.400   2.668   3.300   5.700
+>
+</code>
+<code>> attach(CarData)
+> tapply(CarData$MPG.city, CarData$Origin, summary)
+$USA
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
+.00   18.00   20.00   20.96   23.00   31.00
+$`non-USA`
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
+.00   19.00   22.00   23.87   26.00   46.00
+> tapply(MPG.city, Origin, sd)
+     USA  non-USA
+.994455 6.672876
+> plot(MPG.city~Origin)
+</code>
+{{t-test_mpg.city.png}}
+<code>> t.test(MPG.city~Origin)
+	Welch Two Sample t-test
+data:  MPG.city by Origin
+t = -2.5296, df = 71.024, p-value = 0.01364
+alternative hypothesis: true difference in means is not equal to 0
+percent confidence interval:
+ -5.2008385 -0.6158282
+sample estimates:
+    mean in group USA mean in group non-USA
+.95833              23.86667
+>
+> t.test(MPG.city~Origin, var.equal=TRUE)
+	Two Sample t-test
+data:  MPG.city by Origin
+t = -2.5688, df = 91, p-value = 0.01183
+alternative hypothesis: true difference in means is not equal to 0
+percent confidence interval:
+ -5.1572298 -0.6594368
+sample estimates:
+    mean in group USA mean in group non-USA
+.95833              23.86667
+>
+</code>
+<code>> tapply(MPG.highway, Origin, summary)
+$USA
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
+.00   26.00   28.00   28.15   30.00   41.00
+$`non-USA`
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
+.00   25.00   30.00   30.09   33.00   50.00
+>
+> tapply(MPG.highway, Origin, sd)
+     USA  non-USA
+.151337 6.247990
+> plot(MPG.highway~Origin)
+</code>
+{{t-test_mpghighway.png}}
+<code>> t.test(MPG.highway~Origin)
+	Welch Two Sample t-test
+data:  MPG.highway by Origin
+t = -1.7545, df = 75.802, p-value = 0.08339
+alternative hypothesis: true difference in means is not equal to 0
+percent confidence interval:
+ -4.1489029  0.2627918
+sample estimates:
+    mean in group USA mean in group non-USA
+.14583              30.08889
+</code>
+<code>> tapply(EngineSize, Origin, summary)
+$USA
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
+.300   2.200   3.000   3.067   3.800   5.700
+$`non-USA`
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
+.000   1.600   2.200   2.242   2.800   4.500
+> tapply(EngineSize, Origin, sd)
+      USA   non-USA
+.1353757 0.7171563
+> plot(EngineSize~Origin)
+>
+</code>
+{{t-test_enginesize.png}}
+<code>> t.test(EngineSize~Origin)
+	Welch Two Sample t-test
+data:  EngineSize by Origin
+t = 4.2135, df = 80.033, p-value = 6.55e-05
+alternative hypothesis: true difference in means is not equal to 0
+percent confidence interval:
+.4350602 1.2138287
+sample estimates:
+    mean in group USA mean in group non-USA
+.066667              2.242222
+>
+</code>
 ====== E.g. 2 ======
   - Seatbelts 데이터를 불러온 후
@@ Line 14: / Line 249: @@
     - null hypothesis
     - test result
+<code>> sb <- as.data.frame(Seatbelts)
+> attach(sb)
+The following objects are masked from sb (pos = 3):
+    drivers, DriversKilled, front, kms, law,
+    PetrolPrice, rear, VanKilled
+The following object is masked from package:MASS:
+    drivers
+>
+</code>
+<code>> tapply(DriversKilled,law,summary)
+$`0`
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
+.0   108.0   121.0   125.9   140.0   198.0
+$`1`
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
+.0    85.0    92.0   100.3   119.0   154.0
+>
+> tapply(DriversKilled,law,sd)
+        1
+.26088 22.22860
+</code>
+<code>> t.test(DriversKilled~law)
+	Welch Two Sample t-test
+data:  DriversKilled by law
+t = 5.1253, df = 29.609, p-value = 1.693e-05
+alternative hypothesis: true difference in means is not equal to 0
+percent confidence interval:
+.39892 35.81899
+sample estimates:
+mean in group 0 mean in group 1
+.8698        100.2609
+</code>
 ====== E.g. 3 ======
@@ Line 21: / Line 298: @@
     - 테스트를 한 후
     - 결과를 보고하시오.
+<code>>anorexia
+. . . .
+> md = subset(anorexia, Treat=="FT")
+> md
+   Treat Prewt Postwt
+    FT  83.8   95.2
+    FT  83.3   94.3
+    FT  86.0   91.5
+    FT  82.5   91.9
+    FT  86.7  100.3
+    FT  79.6   76.7
+    FT  76.9   76.8
+    FT  94.2  101.6
+    FT  73.4   94.9
+    FT  80.5   75.2
+    FT  81.6   77.8
+    FT  82.1   95.5
+    FT  77.6   90.7
+    FT  83.5   92.5
+    FT  89.9   93.8
+    FT  86.0   91.7
+    FT  87.3   98.0
+> t.test(md$Prewt, md$Postwt, data=md, paired=TRUE)
+	Paired t-test
+data:  md$Prewt and md$Postwt
+t = -4.1849, df = 16, p-value = 0.0007003
+alternative hypothesis: true difference in means is not equal to 0
+percent confidence interval:
+ -10.94471  -3.58470
+sample estimates:
+mean of the differences
+              -7.264706
+</code>
 ====== E.g. 4 ======
 <WRAP box>
@@ Line 27: / Line 345: @@
 </WRAP>
 두 그룹의 평균의 차이를 비교하시오.
+<code>> a
+ [1] 175 168 168 190 156 181 182 175 174 179
+> b
+ [1] 185 169 173 173 188 186 175 174 179 180
+> ab <- data.frame(a,b)
+> ab
+     a   b
+  175 185
+  168 169
+  168 173
+  190 173
+  156 188
+  181 186
+  182 175
+  175 174
+  174 179
+179 180
+>
+> summary(ab)
+       a               b
+ Min.   :156.0   Min.   :169.0
+st Qu.:169.5   1st Qu.:173.2
+ Median :175.0   Median :177.0
+ Mean   :174.8   Mean   :178.2
+rd Qu.:180.5   3rd Qu.:183.8
+ Max.   :190.0   Max.   :188.0
+> abs <- stack(ab)
+> tapply(abs$values, abs$ind, summary)
+$a
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
+.0   169.5   175.0   174.8   180.5   190.0
+$b
+   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
+.0   173.2   177.0   178.2   183.8   188.0
+> tapply(abs$values, abs$ind, sd)
+       a        b
+.342852 6.442912
+>
+> t.test(ab$a,ab$b)
+	Welch Two Sample t-test
+data:  ab$a and ab$b
+t = -0.94737, df = 15.981, p-value = 0.3576
+alternative hypothesis: true difference in means is not equal to 0
+percent confidence interval:
+ -11.008795   4.208795
+sample estimates:
+mean of x mean of y
+.8     178.2
+</code>
 ====== E.g. 5 ======
@@ Line 33: / Line 409: @@
 아이스크림의 박테리아가 0.3 MPN/g 보다 커서 유통되기에 위험하다고 할 수 있을까?
+<code>> ir <- c(0.593, 0.142, 0.329, 0.691, 0.231, 0.793, 0.519, 0.392, 0.418)
+> ir
+[1] 0.593 0.142 0.329 0.691 0.231 0.793 0.519 0.392 0.418
+> t.test(ir, mu=.3)
+	One Sample t-test
+data:  ir
+t = 2.2051, df = 8, p-value = 0.05853
+alternative hypothesis: true mean is not equal to 0.3
+percent confidence interval:
+.2928381 0.6200508
+sample estimates:
+mean of x
+.4564444
+>
+> t.test(ir, alternative="greater", mu=.3)
+	One Sample t-test
+data:  ir
+t = 2.2051, df = 8, p-value = 0.02927
+alternative hypothesis: true mean is greater than 0.3
+percent confidence interval:
+.3245133       Inf
+sample estimates:
+mean of x
+.4564444
+>
+</code>
 ====== E.g. 6 ======
@@ Line 40: / Line 450: @@
 흡연이 기억에 영향을 준다고 할 수 있을까?
+<code>
+> smoke <- c(18,22,21,17,20,17,23,20,22,21)
+> nosmoke <- c(16,20,14,21,20,18,13,15,17,21)
+> sn <- data.frame(smoke, nosmoke)
+> ss <- stack(sn)
+> plot(ss$values~ss$ind)
+</code>
+<code>> t.test(values$ss~ind$ss)
+	Welch Two Sample t-test
+data:  ss$values by ss$ind
+t = -2.2573, df = 16.376, p-value = 0.03798
+alternative hypothesis: true difference in means is not equal to 0
+percent confidence interval:
+ -5.0371795 -0.1628205
+sample estimates:
+mean in group nosmoke   mean in group smoke
+.5                  20.1
+>
+>
+</code>
 ====== E.g. 7 ======
   - MASS package를 불러온 후, survey 데이터를 활용하여 담배와 운동량 간의 관계에 대한 가설테스트를 하시오.