> ###
> rnorm2 <- function(n,mean,sd) {
+ mean+sd*scale(rnorm(n))
+ }
> groupA <- rnorm2(10, 19, sqrt(160/9))
> groupB <- rnorm2(10, 25, sqrt(200/9))
> ss.a <- 160
> ss.b <- 200
> n.a <- 10
> n.b <- 10
> df.a <- 9
> df.b <- 9
> pooled.v <- (ss.a+ss.b)/(df.a+df.b)
> pooled.v
[1] 20
> se <- sqrt(pooled.v/n.a + pooled.v/n.b)
> se
[1] 2
> diff <- (mean(groupA)-mean(groupB))
> t.cal <- diff/se
> t.cal
[1] -3
> pt(t.cal, df=df.a+df.b)*2
[1] 0.007685412
>
> t.test(groupA, groupB, var.equal=T)
Two Sample t-test
data: groupA and groupB
t = -3, df = 18, p-value = 0.007685
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-10.201844 -1.798156
sample estimates:
mean of x mean of y
19 25
>
> # 만약에 정확한 구간의 probability 보다
> # 95% confidence interval의 점수를 구하고
> # 싶다면
> t.critical <- abs(qt(.05/2, 18))
> c(-t.critical, t.critical)
[1] -2.100922 2.100922
> t.cal
[1] -3
> #############
> ss <- function(x) {
+ sum((x-mean(x))^2)
+ }
> n.a <- n.b <- 40
> df.b <- n.b - 1
> df.a <- n.a - 1
> naver <- rnorm2(n.a, 7.2, 1)
> mylab <- rnorm2(n.b, 7.9, 1)
> ss(naver)
[1] 39
> pv <- (ss(naver)+ss(mylab))/(df.a + df.b)
> pv
[1] 1
> se <- sqrt(pv/n.a + pv/n.b)
> se
[1] 0.2236068
> t.cal <- (mean(naver)-mean(mylab))/se
> t.cal
[1] -3.130495
> pt(t.cal, df.a+df.b)*2
[1] 0.002455973
> t.test(naver, mylab)
Welch Two Sample t-test
data: naver and mylab
t = -3.1305, df = 78, p-value = 0.002456
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.1451669 -0.2548331
sample estimates:
mean of x mean of y
7.2 7.9
>
> #
> s.n <- 100
> s.hi <- 5200
> s.lo <- 4980
> s.sd <- 500
>
> set.seed(101)
> s.hi <- round(rnorm(s.n, s.hi, s.sd))
> s.lo <- round(rnorm(s.n, s.lo, s.sd))
>
> s.hi
[1] 5037 5476 4863 5307 5355 5787 5509 5144 5659 5088 5463 4803
[13] 5914 4467 5082 5103 4775 5229 4791 4175 5118 5554 5066 4468
[25] 5572 4495 5434 5140 5434 5449 5647 5340 5704 4163 5795 4838
[37] 5284 5660 4364 5424 5441 5579 4040 4970 4647 5401 5484 4847
[49] 5055 4458 4625 5063 5489 4502 5575 4674 5283 5765 5787 4986
[61] 5070 4494 4879 5256 5411 5393 4856 5274 5171 5163 5955 6010
[73] 5777 5161 4291 4681 5351 4561 5269 5175 6126 5756 4944 4928
[85] 4336 5435 5203 5874 5562 5976 5863 5183 5019 4840 5341 4805
[97] 4978 5882 5449 4793
> s.lo
[1] 5114 4684 6047 5566 5353 4865 5024 3888 4747 5823 4696 4957
[13] 4902 5781 5364 4594 4665 4565 4684 5471 4649 4594 3971 4713
[25] 5197 4594 4603 4830 5812 4358 4588 5102 4908 4176 5456 4070
[37] 5872 5924 5725 4790 4525 4811 4274 5089 5315 4836 5215 4745
[49] 4860 4756 4671 5106 4603 5346 4779 3568 5211 6046 4845 5104
[61] 4999 5177 4228 4187 4516 5368 4590 4341 4979 4055 5206 4764
[73] 5337 5460 5171 5589 4971 4961 5602 4502 5438 4510 5036 5257
[85] 5246 4543 4887 4873 4878 5840 5081 5236 5706 5162 4542 4973
[97] 4618 5965 4712 4967
>
> df.a <- s.n - 1
> df.b <- s.n - 1
>
> m.a <- mean(s.hi)
> m.b <- mean(s.lo)
> diff <- m.a - m.b
> pv <- (ss(s.hi)+ss(s.lo))/(df.a + df.b)
> pv
[1] 235102.1
> se <- sqrt(pv/s.n + pv/s.n)
> se
[1] 68.57143
> diff
[1] 222.38
> t.cal <- diff / se
> t.cal
[1] 3.243042
> pt(t.cal, df.a+df.b, lower.tail = F)*2
[1] 0.001387925
>
> t.test(s.hi, s.lo, var.equal=T)
Two Sample t-test
data: s.hi and s.lo
t = 3.243, df = 198, p-value = 0.001388
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
87.15594 357.60406
sample estimates:
mean of x mean of y
5181.38 4959.00