> ### > rnorm2 <- function(n,mean,sd) { + mean+sd*scale(rnorm(n)) + } > groupA <- rnorm2(10, 19, sqrt(160/9)) > groupB <- rnorm2(10, 25, sqrt(200/9)) > ss.a <- 160 > ss.b <- 200 > n.a <- 10 > n.b <- 10 > df.a <- 9 > df.b <- 9 > pooled.v <- (ss.a+ss.b)/(df.a+df.b) > pooled.v [1] 20 > se <- sqrt(pooled.v/n.a + pooled.v/n.b) > se [1] 2 > diff <- (mean(groupA)-mean(groupB)) > t.cal <- diff/se > t.cal [1] -3 > pt(t.cal, df=df.a+df.b)*2 [1] 0.007685412 > > t.test(groupA, groupB, var.equal=T) Two Sample t-test data: groupA and groupB t = -3, df = 18, p-value = 0.007685 alternative hypothesis: true difference in means is not equal to 0 95 percent confidence interval: -10.201844 -1.798156 sample estimates: mean of x mean of y 19 25 > > # 만약에 정확한 구간의 probability 보다 > # 95% confidence interval의 점수를 구하고 > # 싶다면 > t.critical <- abs(qt(.05/2, 18)) > c(-t.critical, t.critical) [1] -2.100922 2.100922 > t.cal [1] -3 > ############# > ss <- function(x) { + sum((x-mean(x))^2) + } > n.a <- n.b <- 40 > df.b <- n.b - 1 > df.a <- n.a - 1 > naver <- rnorm2(n.a, 7.2, 1) > mylab <- rnorm2(n.b, 7.9, 1) > ss(naver) [1] 39 > pv <- (ss(naver)+ss(mylab))/(df.a + df.b) > pv [1] 1 > se <- sqrt(pv/n.a + pv/n.b) > se [1] 0.2236068 > t.cal <- (mean(naver)-mean(mylab))/se > t.cal [1] -3.130495 > pt(t.cal, df.a+df.b)*2 [1] 0.002455973 > t.test(naver, mylab) Welch Two Sample t-test data: naver and mylab t = -3.1305, df = 78, p-value = 0.002456 alternative hypothesis: true difference in means is not equal to 0 95 percent confidence interval: -1.1451669 -0.2548331 sample estimates: mean of x mean of y 7.2 7.9 > > # > s.n <- 100 > s.hi <- 5200 > s.lo <- 4980 > s.sd <- 500 > > set.seed(101) > s.hi <- round(rnorm(s.n, s.hi, s.sd)) > s.lo <- round(rnorm(s.n, s.lo, s.sd)) > > s.hi [1] 5037 5476 4863 5307 5355 5787 5509 5144 5659 5088 5463 4803 [13] 5914 4467 5082 5103 4775 5229 4791 4175 5118 5554 5066 4468 [25] 5572 4495 5434 5140 5434 5449 5647 5340 5704 4163 5795 4838 [37] 5284 5660 4364 5424 5441 5579 4040 4970 4647 5401 5484 4847 [49] 5055 4458 4625 5063 5489 4502 5575 4674 5283 5765 5787 4986 [61] 5070 4494 4879 5256 5411 5393 4856 5274 5171 5163 5955 6010 [73] 5777 5161 4291 4681 5351 4561 5269 5175 6126 5756 4944 4928 [85] 4336 5435 5203 5874 5562 5976 5863 5183 5019 4840 5341 4805 [97] 4978 5882 5449 4793 > s.lo [1] 5114 4684 6047 5566 5353 4865 5024 3888 4747 5823 4696 4957 [13] 4902 5781 5364 4594 4665 4565 4684 5471 4649 4594 3971 4713 [25] 5197 4594 4603 4830 5812 4358 4588 5102 4908 4176 5456 4070 [37] 5872 5924 5725 4790 4525 4811 4274 5089 5315 4836 5215 4745 [49] 4860 4756 4671 5106 4603 5346 4779 3568 5211 6046 4845 5104 [61] 4999 5177 4228 4187 4516 5368 4590 4341 4979 4055 5206 4764 [73] 5337 5460 5171 5589 4971 4961 5602 4502 5438 4510 5036 5257 [85] 5246 4543 4887 4873 4878 5840 5081 5236 5706 5162 4542 4973 [97] 4618 5965 4712 4967 > > df.a <- s.n - 1 > df.b <- s.n - 1 > > m.a <- mean(s.hi) > m.b <- mean(s.lo) > diff <- m.a - m.b > pv <- (ss(s.hi)+ss(s.lo))/(df.a + df.b) > pv [1] 235102.1 > se <- sqrt(pv/s.n + pv/s.n) > se [1] 68.57143 > diff [1] 222.38 > t.cal <- diff / se > t.cal [1] 3.243042 > pt(t.cal, df.a+df.b, lower.tail = F)*2 [1] 0.001387925 > > t.test(s.hi, s.lo, var.equal=T) Two Sample t-test data: s.hi and s.lo t = 3.243, df = 198, p-value = 0.001388 alternative hypothesis: true difference in means is not equal to 0 95 percent confidence interval: 87.15594 357.60406 sample estimates: mean of x mean of y 5181.38 4959.00