====== Sampling Distribution and z-test ====== rm(list=ls()) rnorm2 <- function(n,mean,sd){ mean+sd*scale(rnorm(n)) } se <- function(sample) { sd(sample)/sqrt(length(sample)) } ss <- function(x) { sum((x-mean(x))^2) } ################################ N.p <- 1000000 m.p <- 100 sd.p <- 10 p1 <- rnorm2(N.p, m.p, sd.p) mean(p1) sd(p1) p2 <- rnorm2(N.p, m.p+5, sd.p) mean(p2) sd(p2) m.p1 <- mean(p1) sd.p1 <- sd(p1) var(p1) hist(p1, breaks=50, col = rgb(1, 1, 1, 0.5), main = "histogram of p1 and p2",) abline(v=mean(p1), col="black", lwd=3) hist(p2, add=T, breaks=50, col=rgb(1,1,.5,.5)) abline(v=mean(p2), col="red", lwd=3) hist(p1, breaks=50, col=rgb(0,.5,.5,.5)) abline(v=mean(p1),lwd=2) abline(v=mean(p1)-sd(p1), lwd=2) abline(v=mean(p1)+sd(p1), lwd=2) abline(v=c(m.p1-2*sd.p1, m.p1+2*sd.p1), lwd=2, col='red') abline(v=c(m.p1-3*sd.p1, m.p1+3*sd.p1), lwd=2, col='green') # area bet black = 68% # between red = 95% # between green = 99% pnorm(m.p1+sd.p1, m.p1, sd.p1) pnorm(m.p1-sd.p1, m.p1, sd.p1) pnorm(m.p1+sd.p1, m.p1, sd.p1) - pnorm(m.p1-sd.p1, m.p1, sd.p1) pnorm(m.p1+2*sd.p1, m.p1, sd.p1) - pnorm(m.p1-2*sd.p1, m.p1, sd.p1) pnorm(m.p1+3*sd.p1, m.p1, sd.p1) - pnorm(m.p1-3*sd.p1, m.p1, sd.p1) m.p1 sd.p1 (m.p1-m.p1)/sd.p1 ((m.p1-sd.p1) - m.p1) / sd.p1 (120-100)/10 pnorm(1)-pnorm(-1) pnorm(2)-pnorm(-2) pnorm(3)-pnorm(3) 1-pnorm(-2)*2 pnorm(2)-pnorm(-2) pnorm(120, 100, 10) pnorm(2)-pnorm(-2) zscore <- (120-100)/10 pnorm(zscore)-pnorm(-zscore) zscore # reminder. pnorm(-1) pnorm(1, 0, 1, lower.tail = F) pnorm(110, 100, 10, lower.tail = F) zscore <- (110-100)/10 pnorm(zscore, lower.tail = F) pnorm(118, 100, 10, lower.tail = F) pnorm(18/10, lower.tail = F) z.p1 <- (p1-mean(p1))/sd(p1) mean(z.p1) round(mean(z.p1),10) sd(z.p1) pnorm(1.8)-pnorm(-1.8) hist(z.p1, breaks=50, col=rgb(1,0,0,0)) abline(v=c(m.p1, -1.8, 1.8), col='red') 1-(pnorm(1.8)-pnorm(-1.8)) pnorm(1)-pnorm(-1) 1-(pnorm(-1)*2) pnorm(2)-pnorm(-2) 1-(pnorm(-2)*2) 1-(pnorm(-3)*2) # hist(p1, breaks=50, col=rgb(.9,.9,.9,.9)) abline(v=mean(p1),lwd=2) abline(v=mean(p1)-sd(p1), lwd=2) abline(v=mean(p1)+sd(p1), lwd=2) abline(v=c(m.p1-2*sd.p1, m.p1+2*sd.p1), lwd=2, col='red') abline(v=c(m.p1-3*sd.p1, m.p1+3*sd.p1), lwd=2, col='green') # 68% a <- qnorm(.32/2) b <- qnorm(1-.32/2) c(a, b) c(-1, 1) # note that .32/2 pnorm(-1) qnorm(.32/2) qnorm(pnorm(-1)) # 95% c <- qnorm(.05/2) d <- qnorm(1-.05/2) c(c, d) c(-2,2) # 99% e <- qnorm(.01/2) f <- qnorm(1-.01/2) c(e,f) c(-3,3) pnorm(b)-pnorm(a) c(a, b) pnorm(d)-pnorm(c) c(c, d) pnorm(f)-pnorm(e) c(e, f) qnorm(.5) qnorm(1) ################################ s.size <- 50 means.temp <- c() s1 <- sample(p1, s.size, replace = T) mean(s1) means.temp <- append(means.temp, mean(s1)) means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T))) means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T))) means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T))) means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T))) means.temp iter <- 1000000 # means <- c() means <- rep(NA, iter) for (i in 1:iter) { # means <- append(means, mean(sample(p1, s.size, replace = T))) means[i] <- mean(sample(p1, s.size, replace = T)) } length(means) mean(means) sd(means) se.s <- sd(means) hist(means, breaks=50, xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)), col=rgb(1, 1, 1, .5)) abline(v=mean(means), col="black", lwd=3) # now we want to get sd of this distribution lo1 <- mean(means)-se.s hi1 <- mean(means)+se.s lo2 <- mean(means)-2*se.s hi2 <- mean(means)+2*se.s lo3 <- mean(means)-3*se.s hi3 <- mean(means)+3*se.s abline(v=mean(means), col="black", lwd=2) # abline(v=mean(p2), colo='darkgreen', lwd=2) abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3), col=c("red","red", "blue", "blue", "orange", "orange"), lwd=2) # meanwhile . . . . se.s se.z <- sqrt(var(p1)/s.size) se.z <- c(se.z) se.z se.z.adjusted <- sqrt(var(s1)/s.size) se.z.adjusted # sd of sample means (sd(means)) # = se.s # when iter value goes to # infinite value: # mean(means) = mean(p1) # and # sd(means) = sd(p1) / sqrt(s.size) # that is, se.s = se.z # This is called CLT (Central Limit Theorem) # see http://commres.net/wiki/cetral_limit_theorem mean(means) mean(p1) sd(means) var(p1) # remember we started talking sample size 10 sqrt(var(p1)/s.size) se.z sd(means) se.s se.z # because CLT loz1 <- mean(p1)-se.z hiz1 <- mean(p1)+se.z loz2 <- mean(p1)-2*se.z hiz2 <- mean(p1)+2*se.z loz3 <- mean(p1)-3*se.z hiz3 <- mean(p1)+3*se.z c(lo1, loz1) c(lo2, loz2) c(lo3, loz3) c(hi1, hiz1) c(hi2, hiz2) c(hi3, hiz3) hist(means, breaks=50, xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)), col = rgb(1, 1, 1, .5)) abline(v=mean(means), col="black", lwd=3) # abline(v=mean(p2), colo='darkgreen', lwd=3) abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3), col=c("darkgreen","darkgreen", "blue", "blue", "orange", "orange"), lwd=2) round(c(lo1, hi1)) round(c(lo2, hi2)) round(c(lo3, hi3)) round(c(loz1, hiz1)) round(c(loz2, hiz2)) round(c(loz3, hiz3)) m.sample.i.got <- mean(means)+ 1.5*sd(means) m.sample.i.got hist(means, breaks=30, xlim = c(mean(means)-7*sd(means), mean(means)+10*sd(means)), col = rgb(1, 1, 1, .5)) abline(v=mean(means), col="black", lwd=3) abline(v=m.sample.i.got, col='darkgreen', lwd=3) # what is the probablity of getting # greater than # m.sample.i.got? m.sample.i.got pnorm(m.sample.i.got, mean(means), sd(means), lower.tail = F) pnorm(m.sample.i.got, mean(p1), se.z, lower.tail = F) # then, what is the probabilty of getting # greater than m.sample.i.got and # less than corresponding value, which is # mean(means) - m.sample.i.got - mean(means) # (green line) tmp <- mean(means) - (m.sample.i.got - mean(means)) abline(v=tmp, col='red', lwd=3) 2 * pnorm(m.sample.i.got, mean(p1), sd(means), lower.tail = F) m.sample.i.got ### one more time # this time, with a story mean(p2) sd(p2) s.from.p2 <- sample(p2, s.size) m.s.from.p2 <- mean(s.from.p2) m.s.from.p2 se.s se.z sd(means) tmp <- mean(means) - (m.s.from.p2 - mean(means)) tmp hist(means, breaks=30, xlim = c(tmp-4*sd(means), m.s.from.p2+4*sd(means)), col = rgb(1, 1, 1, .5)) abline(v=mean(means), col="black", lwd=3) abline(v=m.s.from.p2, col='blue', lwd=3) # what is the probablity of getting # greater than # m.sample.i.got? m.s.from.p2 pnorm(m.s.from.p2, mean(p1), se.z, lower.tail = F) # then, what is the probabilty of getting # greater than m.sample.i.got and # less than corresponding value, which is # mean(means) - m.sample.i.got - mean(means) # (green line) abline(v=tmp, col='red', lwd=3) 2 * pnorm(m.s.from.p2, mean(p1), se.z, lower.tail = F) se.z sd(s.from.p2)/sqrt(s.size) se.z.adjusted <- sqrt(var(s.from.p2)/s.size) se.z.adjusted 2 * pnorm(m.s.from.p2, mean(p1), se.z.adjusted, lower.tail = F) z.cal <- (m.s.from.p2 - mean(p1))/se.z.adjusted z.cal pnorm(z.cal, lower.tail = F)*2 pt(z.cal, 49, lower.tail = F)*2 t.test(s.from.p2, mu=mean(p1), var.equal = T) ====== output ====== > rm(list=ls()) > rnorm2 <- function(n,mean,sd){ + mean+sd*scale(rnorm(n)) + } > > se <- function(sample) { + sd(sample)/sqrt(length(sample)) + } > > ss <- function(x) { + sum((x-mean(x))^2) + } > > ################################ > N.p <- 1000000 > m.p <- 100 > sd.p <- 10 > > p1 <- rnorm2(N.p, m.p, sd.p) > mean(p1) [1] 100 > sd(p1) [1] 10 > > p2 <- rnorm2(N.p, m.p+5, sd.p) > mean(p2) [1] 105 > sd(p2) [1] 10 > > m.p1 <- mean(p1) > sd.p1 <- sd(p1) > var(p1) [,1] [1,] 100 > > > hist(p1, breaks=50, col = rgb(1, 1, 1, 0.5), + main = "histogram of p1 and p2",) > abline(v=mean(p1), col="black", lwd=3) > hist(p2, add=T, breaks=50, col=rgb(1,1,.5,.5)) > abline(v=mean(p2), col="red", lwd=3) > > > hist(p1, breaks=50, col=rgb(0,.5,.5,.5)) > abline(v=mean(p1),lwd=2) > abline(v=mean(p1)-sd(p1), lwd=2) > abline(v=mean(p1)+sd(p1), lwd=2) > abline(v=c(m.p1-2*sd.p1, m.p1+2*sd.p1), lwd=2, col='red') > abline(v=c(m.p1-3*sd.p1, m.p1+3*sd.p1), lwd=2, col='green') > > # area bet black = 68% > # between red = 95% > # between green = 99% > > pnorm(m.p1+sd.p1, m.p1, sd.p1) [1] 0.8413447 > pnorm(m.p1-sd.p1, m.p1, sd.p1) [1] 0.1586553 > pnorm(m.p1+sd.p1, m.p1, sd.p1) - + pnorm(m.p1-sd.p1, m.p1, sd.p1) [1] 0.6826895 > > pnorm(m.p1+2*sd.p1, m.p1, sd.p1) - + pnorm(m.p1-2*sd.p1, m.p1, sd.p1) [1] 0.9544997 > > pnorm(m.p1+3*sd.p1, m.p1, sd.p1) - + pnorm(m.p1-3*sd.p1, m.p1, sd.p1) [1] 0.9973002 > > m.p1 [1] 100 > sd.p1 [1] 10 > > (m.p1-m.p1)/sd.p1 [1] 0 > ((m.p1-sd.p1) - m.p1) / sd.p1 [1] -1 > (120-100)/10 [1] 2 > pnorm(1)-pnorm(-1) [1] 0.6826895 > pnorm(2)-pnorm(-2) [1] 0.9544997 > pnorm(3)-pnorm(3) [1] 0 > > 1-pnorm(-2)*2 [1] 0.9544997 > pnorm(2)-pnorm(-2) [1] 0.9544997 > > pnorm(120, 100, 10) [1] 0.9772499 > pnorm(2)-pnorm(-2) [1] 0.9544997 > > zscore <- (120-100)/10 > pnorm(zscore)-pnorm(-zscore) [1] 0.9544997 > zscore [1] 2 > > # reminder. > pnorm(-1) [1] 0.1586553 > pnorm(1, 0, 1, lower.tail = F) [1] 0.1586553 > pnorm(110, 100, 10, lower.tail = F) [1] 0.1586553 > zscore <- (110-100)/10 > pnorm(zscore, lower.tail = F) [1] 0.1586553 > > pnorm(118, 100, 10, lower.tail = F) [1] 0.03593032 > pnorm(18/10, lower.tail = F) [1] 0.03593032 > > z.p1 <- (p1-mean(p1))/sd(p1) > mean(z.p1) [1] 8.189457e-18 > round(mean(z.p1),10) [1] 0 > sd(z.p1) [1] 1 > pnorm(1.8)-pnorm(-1.8) [1] 0.9281394 > > hist(z.p1, breaks=50, col=rgb(1,0,0,0)) > abline(v=c(m.p1, -1.8, 1.8), col='red') > 1-(pnorm(1.8)-pnorm(-1.8)) [1] 0.07186064 > > > pnorm(1)-pnorm(-1) [1] 0.6826895 > 1-(pnorm(-1)*2) [1] 0.6826895 > > pnorm(2)-pnorm(-2) [1] 0.9544997 > 1-(pnorm(-2)*2) [1] 0.9544997 > > 1-(pnorm(-3)*2) [1] 0.9973002 > > # > hist(p1, breaks=50, col=rgb(.9,.9,.9,.9)) > abline(v=mean(p1),lwd=2) > abline(v=mean(p1)-sd(p1), lwd=2) > abline(v=mean(p1)+sd(p1), lwd=2) > abline(v=c(m.p1-2*sd.p1, m.p1+2*sd.p1), lwd=2, col='red') > abline(v=c(m.p1-3*sd.p1, m.p1+3*sd.p1), lwd=2, col='green') > > # 68% > a <- qnorm(.32/2) > b <- qnorm(1-.32/2) > c(a, b) [1] -0.9944579 0.9944579 > c(-1, 1) [1] -1 1 > # note that > .32/2 [1] 0.16 > pnorm(-1) [1] 0.1586553 > qnorm(.32/2) [1] -0.9944579 > qnorm(pnorm(-1)) [1] -1 > > # 95% > c <- qnorm(.05/2) > d <- qnorm(1-.05/2) > c(c, d) [1] -1.959964 1.959964 > c(-2,2) [1] -2 2 > > # 99% > e <- qnorm(.01/2) > f <- qnorm(1-.01/2) > c(e,f) [1] -2.575829 2.575829 > c(-3,3) [1] -3 3 > > > pnorm(b)-pnorm(a) [1] 0.68 > c(a, b) [1] -0.9944579 0.9944579 > pnorm(d)-pnorm(c) [1] 0.95 > c(c, d) [1] -1.959964 1.959964 > pnorm(f)-pnorm(e) [1] 0.99 > c(e, f) [1] -2.575829 2.575829 > > qnorm(.5) [1] 0 > qnorm(1) [1] Inf > > > ################################ > s.size <- 50 > > means.temp <- c() > s1 <- sample(p1, s.size, replace = T) > mean(s1) [1] 98.76098 > means.temp <- append(means.temp, mean(s1)) > means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T))) > means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T))) > means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T))) > means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T))) > means.temp [1] 98.76098 99.90935 99.29643 99.66014 101.93822 > > iter <- 1000000 > # means <- c() > means <- rep(NA, iter) > for (i in 1:iter) { + # means <- append(means, mean(sample(p1, s.size, replace = T))) + means[i] <- mean(sample(p1, s.size, replace = T)) + } > length(means) [1] 1000000 > mean(means) [1] 99.99824 > sd(means) [1] 1.414035 > se.s <- sd(means) > > hist(means, breaks=50, + xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)), + col=rgb(1, 1, 1, .5)) > abline(v=mean(means), col="black", lwd=3) > # now we want to get sd of this distribution > lo1 <- mean(means)-se.s > hi1 <- mean(means)+se.s > lo2 <- mean(means)-2*se.s > hi2 <- mean(means)+2*se.s > lo3 <- mean(means)-3*se.s > hi3 <- mean(means)+3*se.s > abline(v=mean(means), col="black", lwd=2) > # abline(v=mean(p2), colo='darkgreen', lwd=2) > abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3), + col=c("red","red", "blue", "blue", "orange", "orange"), + lwd=2) > > # meanwhile . . . . > se.s [1] 1.414035 > > se.z <- sqrt(var(p1)/s.size) > se.z <- c(se.z) > se.z [1] 1.414214 > > se.z.adjusted <- sqrt(var(s1)/s.size) > se.z.adjusted [1] 1.370464 > > # sd of sample means (sd(means)) > # = se.s > > # when iter value goes to > # infinite value: > # mean(means) = mean(p1) > # and > # sd(means) = sd(p1) / sqrt(s.size) > # that is, se.s = se.z > # This is called CLT (Central Limit Theorem) > # see http://commres.net/wiki/cetral_limit_theorem > > mean(means) [1] 99.99824 > mean(p1) [1] 100 > sd(means) [1] 1.414035 > var(p1) [,1] [1,] 100 > # remember we started talking sample size 10 > sqrt(var(p1)/s.size) [,1] [1,] 1.414214 > se.z [1] 1.414214 > > sd(means) [1] 1.414035 > se.s [1] 1.414035 > se.z [1] 1.414214 > > # because CLT > loz1 <- mean(p1)-se.z > hiz1 <- mean(p1)+se.z > loz2 <- mean(p1)-2*se.z > hiz2 <- mean(p1)+2*se.z > loz3 <- mean(p1)-3*se.z > hiz3 <- mean(p1)+3*se.z > > c(lo1, loz1) [1] 98.58421 98.58579 > c(lo2, loz2) [1] 97.17017 97.17157 > c(lo3, loz3) [1] 95.75614 95.75736 > > c(hi1, hiz1) [1] 101.4123 101.4142 > c(hi2, hiz2) [1] 102.8263 102.8284 > c(hi3, hiz3) [1] 104.2403 104.2426 > > > hist(means, breaks=50, + xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)), + col = rgb(1, 1, 1, .5)) > abline(v=mean(means), col="black", lwd=3) > # abline(v=mean(p2), colo='darkgreen', lwd=3) > abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3), + col=c("darkgreen","darkgreen", "blue", "blue", "orange", "orange"), + lwd=2) > > round(c(lo1, hi1)) [1] 99 101 > round(c(lo2, hi2)) [1] 97 103 > round(c(lo3, hi3)) [1] 96 104 > > round(c(loz1, hiz1)) [1] 99 101 > round(c(loz2, hiz2)) [1] 97 103 > round(c(loz3, hiz3)) [1] 96 104 > > m.sample.i.got <- mean(means)+ 1.5*sd(means) > m.sample.i.got [1] 102.1193 > > hist(means, breaks=30, + xlim = c(mean(means)-7*sd(means), mean(means)+10*sd(means)), + col = rgb(1, 1, 1, .5)) > abline(v=mean(means), col="black", lwd=3) > abline(v=m.sample.i.got, col='darkgreen', lwd=3) > > # what is the probablity of getting > # greater than > # m.sample.i.got? > m.sample.i.got [1] 102.1193 > pnorm(m.sample.i.got, mean(means), sd(means), lower.tail = F) [1] 0.0668072 > pnorm(m.sample.i.got, mean(p1), se.z, lower.tail = F) [1] 0.0669929 > > # then, what is the probabilty of getting > # greater than m.sample.i.got and > # less than corresponding value, which is > # mean(means) - m.sample.i.got - mean(means) > # (green line) > tmp <- mean(means) - (m.sample.i.got - mean(means)) > abline(v=tmp, col='red', lwd=3) > 2 * pnorm(m.sample.i.got, mean(p1), sd(means), lower.tail = F) [1] 0.1339368 > m.sample.i.got [1] 102.1193 > > ### one more time > # this time, with a story > mean(p2) [1] 105 > sd(p2) [1] 10 > s.from.p2 <- sample(p2, s.size) > m.s.from.p2 <- mean(s.from.p2) > m.s.from.p2 [1] 103.1283 > > se.s [1] 1.414035 > se.z [1] 1.414214 > sd(means) [1] 1.414035 > > tmp <- mean(means) - (m.s.from.p2 - mean(means)) > tmp [1] 96.86822 > > hist(means, breaks=30, + xlim = c(tmp-4*sd(means), m.s.from.p2+4*sd(means)), + col = rgb(1, 1, 1, .5)) > abline(v=mean(means), col="black", lwd=3) > abline(v=m.s.from.p2, col='blue', lwd=3) > > # what is the probablity of getting > # greater than > # m.sample.i.got? > m.s.from.p2 [1] 103.1283 > pnorm(m.s.from.p2, mean(p1), se.z, lower.tail = F) [1] 0.01348266 > > # then, what is the probabilty of getting > # greater than m.sample.i.got and > # less than corresponding value, which is > # mean(means) - m.sample.i.got - mean(means) > # (green line) > abline(v=tmp, col='red', lwd=3) > 2 * pnorm(m.s.from.p2, mean(p1), se.z, lower.tail = F) [1] 0.02696533 > > se.z [1] 1.414214 > sd(s.from.p2)/sqrt(s.size) [1] 1.414296 > se.z.adjusted <- sqrt(var(s.from.p2)/s.size) > se.z.adjusted [1] 1.414296 > 2 * pnorm(m.s.from.p2, mean(p1), se.z.adjusted, + lower.tail = F) [1] 0.02697421 > > z.cal <- (m.s.from.p2 - mean(p1))/se.z.adjusted > z.cal [1] 2.211891 > pnorm(z.cal, lower.tail = F)*2 [1] 0.02697421 > > > pt(z.cal, 49, lower.tail = F)*2 [1] 0.03166797 > t.test(s.from.p2, mu=mean(p1), var.equal = T) One Sample t-test data: s.from.p2 t = 2.2119, df = 49, p-value = 0.03167 alternative hypothesis: true mean is not equal to 100 95 percent confidence interval: 100.2861 105.9704 sample estimates: mean of x 103.1283 > ====== T-test sum up ====== rm(list=ls()) rnorm2 <- function(n,mean,sd){ mean+sd*scale(rnorm(n)) } se <- function(sample) { sd(sample)/sqrt(length(sample)) } ss <- function(x) { sum((x-mean(x))^2) } N.p <- 1000000 m.p <- 100 sd.p <- 10 p1 <- rnorm2(N.p, m.p, sd.p) mean(p1) sd(p1) p2 <- rnorm2(N.p, m.p+10, sd.p) mean(p2) sd(p2) hist(p1, breaks=50, col = rgb(1, 0, 0, 0.5), main = "histogram of p1 and p2",) abline(v=mean(p1), col="black", lwd=3) hist(p2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5)) abline(v=mean(p2), col="red", lwd=3) s.size <- 1000 s2 <- sample(p2, s.size) mean(s2) sd(s2) se.z <- sqrt(var(p1)/s.size) se.z <- c(se.z) se.z.range <- c(-2*se.z,2*se.z) se.z.range mean(p1)+se.z.range mean(s2) z.cal <- (mean(s2) - mean(p1)) / se.z z.cal pnorm(z.cal, lower.tail = F) * 2 z.cal # principles . . . # distribution of sample means iter <- 100000 means <- c() for (i in 1:iter) { m.of.s <- mean(sample(p1, s.size)) means <- append(means, m.of.s) } hist(means, xlim = c(mean(means)-3*sd(means), mean(s2)+5), col = rgb(1, 0, 0, 0.5)) abline(v=mean(p1), col="black", lwd=3) abline(v=mean(s2), col="blue", lwd=3) lo1 <- mean(p1)-se.z hi1 <- mean(p1)+se.z lo2 <- mean(p1)-2*se.z hi2 <- mean(p1)+2*se.z abline(v=c(lo1, hi1, lo2, hi2), col=c("green","green", "brown", "brown"), lwd=2) se.z c(lo2, hi2) pnorm(z.cal, lower.tail = F) * 2 # Note that we use sqrt(var(s2)/s.size) # as our se value instread of # sqrt(var(p1)/s.size) # This is a common practice for R # In fact, some z.test (made by someone) # function uses the former rather than # latter. sqrt(var(p1)/s.size) se.z sqrt(var(s2)/s.size) se(s2) t.cal.os <- (mean(s2) - mean(p1))/ se(s2) z.cal <- (mean(s2) - mean(p1))/ se.z t.cal.os z.cal df.s2 <- length(s2)-1 df.s2 p.t.os <- pt(abs(t.cal.os), df.s2, lower.tail = F) * 2 p.t.os t.out <- t.test(s2, mu=mean(p1)) library(BSDA) z.out <- z.test(s2, p1, sigma.x = sd(s2), sigma.y = sd(p1)) z.out$statistic # se.z 대신에 se(s2) 값으로 구한 z 값 z.cal # se.z으로 (sqrt(var(p1)/s.size)값) 구한 z 값 t.out$statistic # se(s2)를 분모로 하여 구한 t 값 t.cal.os # se(s2)를 이용하여 손으로 구한 t 값 # But, after all, we use t.test method regardless of # variation hist(means, xlim = c(mean(means)-3*sd(means), mean(s2)+5), col = rgb(1, 0, 0, 0.5)) abline(v=mean(p1), col="black", lwd=3) abline(v=mean(s2), col="blue", lwd=3) lo1 <- mean(p1)-se.z hi1 <- mean(p1)+se.z lo2 <- mean(p1)-2*se.z hi2 <- mean(p1)+2*se.z abline(v=c(lo1, hi1, lo2, hi2), col=c("green","green", "brown", "brown"), lwd=2) # difference between black and blue line # divided by # se(s2) (= random difference) # t.value mean(s2) mean(p1) diff <- mean(s2)-mean(p1) diff se(s2) diff/se(s2) t.cal.os ######################## # 2 sample t-test ######################## # 가정. 아래에서 추출하는 두 # 샘플의 모집단의 파라미터를 # 모른다. s1 <- sample(p1, s.size) s2 <- sample(p2, s.size) mean(s1) mean(s2) ss(s1) ss(s2) df.s1 <- length(s1)-1 df.s2 <- length(s2)-1 df.s1 df.s2 pooled.variance <- (ss(s1)+ss(s2))/(df.s1+df.s2) pooled.variance se.ts <- sqrt((pooled.variance/length(s1))+(pooled.variance/length(s2))) se.ts t.cal.ts <- (mean(s1)-mean(s2))/se.ts t.cal.ts p.val.ts <- pt(abs(t.cal.ts), df=df.s1+df.s2, lower.tail = F) * 2 p.val.ts t.test(s1, s2, var.equal = T) se(s1) se(s2) mean(s1)+c(-se(s1)*2, se(s1)*2) mean(s2)+c(-se(s2)*2, se(s2)*2) mean(p1) mean(p2) hist(s1, breaks=50, col = rgb(1, 0, 0, 0.5)) hist(s2, breaks=50, add=T, col=rgb(0,0,1,1)) abline(v=mean(s1), col="green", lwd=3) # hist(s2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5)) abline(v=mean(s2), col="lightblue", lwd=3) diff <- mean(s1)-mean(s2) se.ts diff/se.ts #### # repeated measure t-test # we can use the above case # pop paramter unknown # two consecutive measurement # for the same sample t1 <- s1 t2 <- s2 mean(t1) mean(t2) diff.s <- t1 - t2 diff.s t.cal.rm <- mean(diff.s)/se(diff.s) t.cal.rm p.val.rm <- pt(abs(t.cal.rm), length(s1)-1, lower.tail = F) * 2 p.val.rm t.test(s1, s2, paired = T) # create multiple histogram s.all <- c(s1,s2) mean(s.all) hist(s1, col='grey', breaks=50, xlim=c(50, 150)) hist(s2, col='darkgreen', breaks=50, add=TRUE) abline(v=c(mean(s.all)), col=c("red"), lwd=3) abline(v=c(mean(s1), mean(s2)), col=c("black", "green"), lwd=3) comb <- data.frame(s1,s2) dat <- stack(comb) head(dat) m.tot <- mean(s.all) m.s1 <- mean(s1) m.s2 <- mean(s2) ss.tot <- ss(s.all) ss.s1 <- ss(s1) ss.s2 <- ss(s2) df.tot <- length(s.all)-1 df.s1 <- length(s1)-1 df.s2 <- length(s2)-1 ms.tot <- var(s.all) ms.tot ss.tot/df.tot var(s1) ss.s1 / df.s1 var(s2) ss.s2 / df.s2 ss.b.s1 <- length(s1) * ((m.tot - m.s1)^2) ss.b.s2 <- length(s2) * ((m.tot - m.s1)^2) ss.bet <- ss.b.s1+ss.b.s2 ss.bet ss.wit <- ss.s1 + ss.s2 ss.wit ss.bet + ss.wit ss.tot library(dplyr) # df.bet <- length(unique(dat)) - 1 df.bet <- nlevels(dat$ind) - 1 df.wit <- df.s1+df.s2 df.bet df.wit df.bet+df.wit df.tot ms.bet <- ss.bet / df.bet ms.wit <- ss.wit / df.wit ms.bet ms.wit f.cal <- ms.bet / ms.wit f.cal pf(f.cal, df.bet, df.wit, lower.tail = F) f.test <- aov(dat$values~ dat$ind, data = dat) summary(f.test) sqrt(f.cal) t.cal.ts # this is anova after all. ====== output ====== > rm(list=ls()) > rnorm2 <- function(n,mean,sd){ + mean+sd*scale(rnorm(n)) + } > se <- function(sample) { + sd(sample)/sqrt(length(sample)) + } > ss <- function(x) { + sum((x-mean(x))^2) + } > > N.p <- 1000000 > m.p <- 100 > sd.p <- 10 > > > p1 <- rnorm2(N.p, m.p, sd.p) > mean(p1) [1] 100 > sd(p1) [1] 10 > > p2 <- rnorm2(N.p, m.p+10, sd.p) > mean(p2) [1] 110 > sd(p2) [1] 10 > > hist(p1, breaks=50, col = rgb(1, 0, 0, 0.5), + main = "histogram of p1 and p2",) > abline(v=mean(p1), col="black", lwd=3) > hist(p2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5)) > abline(v=mean(p2), col="red", lwd=3) > > s.size <- 1000 > s2 <- sample(p2, s.size) > mean(s2) [1] 110.4892 > sd(s2) [1] 10.36614 > > se.z <- sqrt(var(p1)/s.size) > se.z <- c(se.z) > se.z.range <- c(-2*se.z,2*se.z) > se.z.range [1] -0.6324555 0.6324555 > > mean(p1)+se.z.range [1] 99.36754 100.63246 > mean(s2) [1] 110.4892 > > z.cal <- (mean(s2) - mean(p1)) / se.z > z.cal [1] 33.16976 > pnorm(z.cal, lower.tail = F) * 2 [1] 2.93954e-241 > > z.cal [1] 33.16976 > > # principles . . . > # distribution of sample means > iter <- 100000 > means <- c() > for (i in 1:iter) { + m.of.s <- mean(sample(p1, s.size)) + means <- append(means, m.of.s) + } > > hist(means, + xlim = c(mean(means)-3*sd(means), mean(s2)+5), + col = rgb(1, 0, 0, 0.5)) > abline(v=mean(p1), col="black", lwd=3) > abline(v=mean(s2), + col="blue", lwd=3) > lo1 <- mean(p1)-se.z > hi1 <- mean(p1)+se.z > lo2 <- mean(p1)-2*se.z > hi2 <- mean(p1)+2*se.z > abline(v=c(lo1, hi1, lo2, hi2), + col=c("green","green", "brown", "brown"), + lwd=2) > se.z [1] 0.3162278 > c(lo2, hi2) [1] 99.36754 100.63246 > pnorm(z.cal, lower.tail = F) * 2 [1] 2.93954e-241 > > > # Note that we use sqrt(var(s2)/s.size) > # as our se value instread of > # sqrt(var(p1)/s.size) > # This is a common practice for R > # In fact, some z.test (made by someone) > # function uses the former rather than > # latter. > > sqrt(var(p1)/s.size) [,1] [1,] 0.3162278 > se.z [1] 0.3162278 > > sqrt(var(s2)/s.size) [1] 0.3278061 > se(s2) [1] 0.3278061 > > t.cal.os <- (mean(s2) - mean(p1))/ se(s2) > z.cal <- (mean(s2) - mean(p1))/ se.z > t.cal.os [1] 31.99818 > z.cal [1] 33.16976 > > df.s2 <- length(s2)-1 > df.s2 [1] 999 > p.t.os <- pt(abs(t.cal.os), df.s2, lower.tail = F) * 2 > p.t.os [1] 3.162139e-155 > t.out <- t.test(s2, mu=mean(p1)) > > library(BSDA) 필요한 패키지를 로딩중입니다: lattice 다음의 패키지를 부착합니다: ‘BSDA’ The following object is masked from ‘package:datasets’: Orange 경고메시지(들): 패키지 ‘BSDA’는 R 버전 4.3.3에서 작성되었습니다 > z.out <- z.test(s2, p1, sigma.x = sd(s2), sigma.y = sd(p1)) > > z.out$statistic # se.z 대신에 se(s2) 값으로 구한 z 값 z 31.9833 > z.cal # se.z으로 (sqrt(var(p1)/s.size)값) 구한 z 값 [1] 33.16976 > > t.out$statistic # se(s2)를 분모로 하여 구한 t 값 t 31.99818 > t.cal.os # se(s2)를 이용하여 손으로 구한 t 값 [1] 31.99818 > > # But, after all, we use t.test method regardless of > # variation > > hist(means, + xlim = c(mean(means)-3*sd(means), mean(s2)+5), + col = rgb(1, 0, 0, 0.5)) > abline(v=mean(p1), col="black", lwd=3) > abline(v=mean(s2), + col="blue", lwd=3) > lo1 <- mean(p1)-se.z > hi1 <- mean(p1)+se.z > lo2 <- mean(p1)-2*se.z > hi2 <- mean(p1)+2*se.z > abline(v=c(lo1, hi1, lo2, hi2), + col=c("green","green", "brown", "brown"), + lwd=2) > > # difference between black and blue line > # divided by > # se(s2) (= random difference) > # t.value > mean(s2) [1] 110.4892 > mean(p1) [1] 100 > diff <- mean(s2)-mean(p1) > diff [1] 10.4892 > se(s2) [1] 0.3278061 > diff/se(s2) [1] 31.99818 > > t.cal.os [1] 31.99818 > > ######################## > # 2 sample t-test > ######################## > # 가정. 아래에서 추출하는 두 > # 샘플의 모집단의 파라미터를 > # 모른다. > s1 <- sample(p1, s.size) > s2 <- sample(p2, s.size) > > mean(s1) [1] 100.7138 > mean(s2) [1] 109.8804 > ss(s1) [1] 108288.9 > ss(s2) [1] 100751 > df.s1 <- length(s1)-1 > df.s2 <- length(s2)-1 > df.s1 [1] 999 > df.s2 [1] 999 > > pooled.variance <- (ss(s1)+ss(s2))/(df.s1+df.s2) > pooled.variance [1] 104.6246 > se.ts <- sqrt((pooled.variance/length(s1))+(pooled.variance/length(s2))) > se.ts [1] 0.4574376 > t.cal.ts <- (mean(s1)-mean(s2))/se.ts > t.cal.ts [1] -20.03892 > p.val.ts <- pt(abs(t.cal.ts), df=df.s1+df.s2, lower.tail = F) * 2 > p.val.ts [1] 1.522061e-81 > > t.test(s1, s2, var.equal = T) Two Sample t-test data: s1 and s2 t = -20.039, df = 1998, p-value < 2.2e-16 alternative hypothesis: true difference in means is not equal to 0 95 percent confidence interval: -10.06366 -8.26945 sample estimates: mean of x mean of y 100.7138 109.8804 > > se(s1) [1] 0.3292374 > se(s2) [1] 0.3175719 > > mean(s1)+c(-se(s1)*2, se(s1)*2) [1] 100.0553 101.3723 > mean(s2)+c(-se(s2)*2, se(s2)*2) [1] 109.2452 110.5155 > > mean(p1) [1] 100 > mean(p2) [1] 110 > > hist(s1, breaks=50, + col = rgb(1, 0, 0, 0.5)) > hist(s2, breaks=50, add=T, col=rgb(0,0,1,1)) > abline(v=mean(s1), col="green", lwd=3) > # hist(s2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5)) > abline(v=mean(s2), col="lightblue", lwd=3) > > diff <- mean(s1)-mean(s2) > se.ts [1] 0.4574376 > diff/se.ts [1] -20.03892 > > #### > # repeated measure t-test > # we can use the above case > # pop paramter unknown > # two consecutive measurement > # for the same sample > > t1 <- s1 > t2 <- s2 > mean(t1) [1] 100.7138 > mean(t2) [1] 109.8804 > diff.s <- t1 - t2 > diff.s [1] -11.22445947 -3.01532631 -3.47460077 -14.77286374 -9.74734969 2.33544374 -1.53761723 [8] 5.16258644 -40.75608052 -4.94498515 13.05099703 -0.24847628 -6.26581498 -8.14116058 [15] 5.49152742 6.15799638 -4.65589524 -7.93104859 -15.28488198 -5.99182327 -15.35591992 [22] -1.00704696 18.27285355 -12.09936359 -4.72393289 -4.98301049 -20.95452153 1.62363928 [29] -10.58244069 -21.50608157 -53.61230898 -3.85198479 -42.61929736 -6.80266370 -22.92704580 [36] 3.01745740 -19.37131113 -27.82551902 -10.05485425 -25.12701225 -12.93162558 -7.55706006 [43] -19.16855657 -4.81878797 1.64397602 -28.64658004 16.36241227 8.73170802 -11.56090742 [50] 3.21799642 -39.37233043 -9.96051946 -19.11232333 -34.53077051 -4.85780005 -9.52501389 [57] -8.28743679 -38.33995044 -50.60884456 -3.43450084 -0.85381393 -13.30971467 10.13049966 [64] 8.65616917 -29.75453733 -25.40674843 -24.98197786 -12.92901371 15.80168803 -8.67599446 [71] -20.50728324 -19.37275012 -23.27866089 -11.74962053 -34.35317908 -26.10460199 8.59009957 [78] -24.79252799 3.09475727 -19.13505970 -11.72561867 -33.79775614 -6.00167910 -25.03263480 [85] -23.66447959 -8.54416282 -6.89905337 -10.45234583 -34.67182776 -37.20205500 6.60270378 [92] -21.22842221 -11.68774036 -8.71535203 -15.55746542 2.88009050 -5.51543509 1.21606420 [99] 19.63435733 -14.40578016 -11.24172079 10.21723258 -23.41564885 27.60247565 -8.28684078 [106] 17.72472594 -0.29977586 -14.84142327 -20.25713391 -37.99518419 -27.68545647 -19.78976153 [113] -10.23092427 0.40875267 -17.36077213 -24.53979674 -24.18070810 -24.13321556 -17.36615616 [120] -31.50478963 -2.47101725 -22.14003910 -33.63875270 -19.91485505 -3.64251563 -30.62407901 [127] -7.91406849 -6.25389594 4.40651820 -19.08031290 -14.01489366 -31.30542657 -27.05443597 [134] -6.60642443 1.29892762 -11.73908399 1.96878666 -7.53666283 -42.78007247 -9.04715952 [141] 14.05326537 -6.85724091 -16.02305236 -24.38581030 -9.91759245 4.75488243 -2.83181250 [148] -5.38371023 -19.62202451 -1.55000290 -14.86541899 2.95279749 3.82076747 -3.38868029 [155] -0.65101074 18.42861149 -20.55548459 3.02438240 21.23539589 3.32810800 -9.66847192 [162] 2.48687983 -5.40571073 -1.53265391 -12.93542011 19.87564176 -10.69228781 12.80629134 [169] 6.51132022 -16.96586244 10.88690774 -0.37382590 -14.69255590 -26.66941722 -3.67854181 [176] 6.29443209 -10.77182585 -25.65187453 0.09825251 9.03908176 10.25414786 -0.45340800 [183] -38.63379525 -16.58800530 -17.62672134 5.43887886 -4.95532070 -6.46372777 -3.14562140 [190] -22.25276559 2.05941880 -44.33676979 -3.34190343 -19.04858920 -8.21990394 -25.53625536 [197] -17.21120659 28.96404607 6.25767994 -6.17593254 -34.33503461 6.65350479 11.42897662 [204] -0.83715976 -28.46397824 -40.67262397 -6.01225907 -11.22598108 -10.92756008 -15.45671946 [211] -4.57060131 -27.19860432 5.68618678 -27.70257611 -27.77374648 -5.93312400 -9.37871992 [218] -24.41623403 16.94244832 -30.46760860 -4.91996788 2.89031604 5.27074167 -23.91666746 [225] -13.27091592 -7.99640540 15.26148582 -26.01138488 -28.57927092 -17.29274303 -17.07704891 [232] -11.52528966 -5.50387909 -0.66159232 -11.50347650 -19.90680762 -11.09595230 -11.02710712 [239] -13.34969773 -37.98584006 -0.95289265 -15.00431567 -1.22592809 7.40922588 11.45790664 [246] -24.07983488 4.54606079 -0.54863357 6.56528626 -4.04491250 -17.13525622 -22.85976576 [253] -12.30101864 7.01445235 -6.77058075 -10.69023765 -14.21289974 0.68743488 -22.13964282 [260] -4.93155960 -5.32992121 -9.24699990 -34.21542676 -28.10074867 -14.64483350 -21.94636738 [267] 0.57190289 -32.90838279 -23.39341251 -8.52122572 7.61461839 -12.60688433 -8.17161329 [274] -7.73981345 -4.86979671 14.97509924 -17.25493992 -48.85010339 10.16448581 -4.34608694 [281] -4.73924884 19.07076764 -5.23571728 -28.73076387 -1.01530521 -1.14387890 -6.96197277 [288] -10.10160879 -11.59352210 5.83294359 -10.03967522 -9.59761019 -15.88867160 -7.13643475 [295] -7.29391701 -31.93027109 -15.56526408 13.22678162 -11.18996097 -25.00719650 -12.64524490 [302] -35.18133234 13.41791211 -10.87228845 -31.95732546 -18.32496165 -17.76804212 -14.54640557 [309] 21.57859526 8.22556833 -11.51111550 -7.19669828 -28.98417620 -8.16801696 -3.70794736 [316] -6.13751897 8.57292816 0.83289680 -11.25989874 11.25387495 -21.86409747 8.11413189 [323] -31.81200194 -12.67414744 -7.24837917 -9.76383734 -9.95068555 -17.43835347 -21.88852882 [330] -7.57724957 -39.97109963 -18.22405067 -3.35304894 -2.01422861 12.65855868 19.68084692 [337] -17.60471709 0.17064467 -2.41707219 -8.99719317 -6.91454803 6.10676201 -6.24933210 [344] -10.52672545 -8.28580388 -21.74741007 -20.10217608 -27.73408273 -15.76758951 -16.05537057 [351] 7.28801425 14.00094463 -32.21492728 -26.60734855 5.34256687 -2.73678515 -11.37626522 [358] -23.18199896 -18.58488442 -26.09162223 -8.11379506 -16.24314767 -7.63202196 -7.70819353 [365] 15.27344158 -6.89298171 -14.69008686 -10.61871285 -15.97716535 2.04244654 -20.72377059 [372] -3.33710996 -8.70719328 1.11210610 17.29240572 3.03254095 -0.17471362 -0.03964601 [379] -37.25886118 1.80635404 -18.13062850 -12.08353080 3.17736630 11.60663212 14.40230421 [386] -10.17057116 -11.45984282 -11.29130871 -15.60613249 3.94447987 21.39005539 38.66131508 [393] -21.77681110 12.35832123 20.97222557 -30.89221977 26.68094540 13.28471640 5.74956285 [400] -7.60656480 17.47154758 2.14422376 -25.38609321 -7.75483605 12.58051687 -0.12179751 [407] -17.52668230 1.92572455 -0.40532799 -11.06357792 -11.11874742 -10.35835894 -18.01080311 [414] -14.30821541 -10.88427284 3.35560021 -13.46512417 6.04047559 -14.68666762 1.68951313 [421] 4.31329242 7.50742041 -26.79115307 -32.01096580 28.21960400 3.60371811 14.05530287 [428] -23.35919604 4.65486577 -42.05164227 5.90447867 -22.06140361 -36.42899364 -37.85288612 [435] -7.63484440 -7.99917501 5.73410720 -17.24124704 25.18142781 -22.27250215 3.31690400 [442] -19.75974381 13.54264541 -18.86771849 -37.61540899 1.88862667 -10.85162959 -15.90454635 [449] -2.21038773 -1.64057323 -5.05984647 -11.33623396 -3.23993046 -7.07393112 -12.97757803 [456] 2.81189108 -14.48709633 -26.49136729 -27.92148265 -3.32407602 -32.81165304 12.60241883 [463] -19.77773032 -7.21862902 -30.05644966 -34.95205067 11.36207603 -1.88732702 -0.76534355 [470] -2.96997060 -26.06871359 -35.94190403 15.37335724 -29.72925340 -16.82830601 -32.14796579 [477] -7.49787668 -18.90903047 -4.95224428 -15.73171387 13.33385011 6.12192173 -12.11076372 [484] -18.62856464 -25.58573774 -11.07888550 -16.77332405 -0.02979250 0.28045921 6.30825053 [491] 2.66879530 -12.26308670 -3.34380103 0.57984817 -6.07862084 -0.47454107 0.18277721 [498] -1.49326079 -10.71424083 -18.55815468 -12.12523481 -10.32886876 1.21618496 -26.03417587 [505] 4.21652961 -7.19317598 -6.49276222 -24.15078462 -16.62200664 18.65907236 -14.55468004 [512] 12.03275099 13.63985493 1.94955005 4.24574372 -1.87352631 -9.90476053 4.92904187 [519] -27.58434557 -3.22307117 -17.00433045 -24.92647628 -9.86853681 -3.12201969 -3.23781294 [526] -44.00039083 -11.40638340 -10.42772214 11.56731373 34.18935566 0.22493781 -21.75192741 [533] -27.11548294 -26.69534077 11.86361692 -18.87587506 -19.76960989 -6.72305933 -16.37844743 [540] 28.42329924 -9.05042270 -5.94334753 0.80277394 -10.71835822 -7.39049563 -2.78087519 [547] -18.87000360 -2.56690042 -2.31189557 -15.13438755 2.47712830 -7.13675100 -0.16789790 [554] -17.69850124 -11.92887098 -7.41497303 -12.51272098 8.45361690 -30.63879338 -22.19654933 [561] 0.37289814 -27.30901665 -42.04696582 -12.04972054 -20.74642541 -7.29054494 -11.52182026 [568] 8.38372199 -18.57905859 7.62453900 -15.52892307 -1.83154260 -16.24286276 -5.74980635 [575] 5.80843070 5.89082648 -12.38211486 -30.94945977 -34.27141760 -16.68862227 -12.74228148 [582] -12.87197389 -27.14630137 -14.53291112 -8.24059195 -5.93523740 3.67320111 -14.73114416 [589] 7.12333661 -20.54493654 -16.23259278 -7.23953837 13.66853283 -13.35719133 -3.19469244 [596] -8.63453073 -4.38937208 -4.25683530 -3.01229288 -9.46716386 -14.49889042 -4.80320886 [603] -5.93786510 -5.58582436 -38.52292640 -7.31871320 -10.10261534 3.55750183 -0.27859803 [610] 13.47634673 -29.68104705 -4.28539812 3.67080445 -5.84424964 13.18400307 -7.89086592 [617] -1.33920025 -0.95308433 -29.91743460 -18.66903033 -20.15406028 -0.89348972 -12.41231804 [624] 8.08680667 -34.26161156 -33.98947051 -7.80666121 -10.20912967 -28.95896039 -5.89738802 [631] 2.32928389 -38.93521867 -15.56868160 -6.70412659 -2.57471692 2.46451660 -12.81558476 [638] 19.76309298 2.12194484 24.84734206 -13.55620029 -17.87794609 -19.54477100 -19.73182713 [645] -42.02480771 -13.42077241 9.19843679 -15.49829521 -35.16885995 -17.77559877 -2.28384147 [652] -17.68303368 6.17812431 0.66249967 5.00542555 -2.26766815 -11.73064973 -6.75727090 [659] -19.48957914 -28.88885682 -10.59583907 -32.20537872 -11.95661953 -23.77887711 -23.51551361 [666] -17.32443690 -10.86310515 -8.51040349 -27.57610667 -26.85875525 -3.70329222 -20.50863173 [673] -12.41671968 -5.15745402 -19.62849573 4.85082387 -29.88557391 -20.24914582 -27.73847105 [680] -0.12605203 -9.20839167 -51.56244236 3.17764075 13.96786787 -12.74398310 6.86534410 [687] -21.75000477 14.10169236 -24.69667641 -20.26619614 -11.67028168 -5.14496708 -21.84000650 [694] -34.30010114 -4.30214907 -15.81158253 -2.54412477 0.17601622 -22.22290730 -6.51460318 [701] -19.46561809 9.62212347 -5.62354822 16.70312068 -7.16879691 -5.77420998 -2.36157455 [708] -27.91638644 -12.61381331 7.45329002 -1.78749631 10.24888993 -2.76665687 -6.47189694 [715] -20.55376627 1.03372077 -6.75380336 -21.29024889 -16.12342903 -36.81337018 1.75482644 [722] -14.38944775 -4.27006397 -10.21581755 1.97016866 1.50969462 32.31451580 3.32233756 [729] -7.85868267 8.83356066 -3.54004596 -17.21481071 -29.58350979 -22.72248706 -27.86169027 [736] -20.25705972 -1.67627671 -23.02237081 -20.13752529 6.07661361 -0.84839297 19.31619624 [743] -13.32818441 -1.51206927 -16.05469364 -18.19320869 21.19248327 -26.85398142 2.82896396 [750] 1.90853566 -10.76451371 -0.16368097 -5.02703204 -23.15483742 -5.12822113 -4.84245502 [757] -19.16011286 -13.91801221 -11.66649472 15.04676653 -20.54651422 32.67150526 -11.37626079 [764] -14.75337241 -0.46891630 -12.09854921 -10.75658868 -18.03655441 10.95871312 27.68695247 [771] -1.22676076 -15.78897397 -13.68374038 16.98138996 -15.57048042 -17.53983895 -32.33929466 [778] -34.01869977 -2.46227514 -6.56500408 -17.04103052 -17.24440339 -6.68381805 -8.43674456 [785] -3.88372407 -29.28134174 -40.86613320 -18.64259952 -2.78880196 11.13938536 14.40875929 [792] -6.52858972 -38.92485161 -23.57441915 -25.59877817 19.51852353 -20.06650023 -4.32313864 [799] 4.62056706 -5.18094527 -0.76429848 -2.98392902 -18.50300318 -29.63778693 -23.63235389 [806] 5.68017122 14.33220812 -13.31317005 -10.81641168 5.22445430 -35.46250519 1.56327962 [813] -14.60867384 10.29147319 -13.28593538 6.63825469 -14.52606348 17.45903705 -38.05095694 [820] -13.93270232 -20.21993468 -1.96308711 1.80444271 12.16855255 9.52956342 -14.88194384 [827] -29.04193544 -24.04102844 -14.01878071 -2.03269506 2.67151865 -5.20017572 -41.14943705 [834] 8.96661691 28.12018815 -2.37196235 -13.46669223 -15.34687871 -19.92157033 16.10283716 [841] 5.71060454 -1.87210810 17.82634786 6.46299261 -23.56325888 -9.31538158 13.08900119 [848] 8.81863004 -16.87823373 2.57469446 -19.81326240 -1.08297141 -15.99656489 -12.78570251 [855] -8.53943328 -37.51286174 -5.80934175 29.94051347 -12.29916397 -0.84174744 -5.89053659 [862] -30.93593593 -6.24638974 6.71567898 0.33777483 -6.43007412 -6.10032287 -18.90969351 [869] 6.22885535 2.29565188 -25.72416278 -4.48305502 -5.77922453 -13.55585021 -23.84825362 [876] -14.65449874 -25.51320775 -35.73124575 2.27482359 -23.67720440 7.67981459 -30.63388731 [883] -2.12532769 -6.06248123 -14.67967251 2.92695069 -32.55242308 -19.80182640 -12.70340060 [890] -0.36473422 -24.33804299 -33.53505272 6.38777505 -7.65940679 -45.22813407 -5.91512961 [897] 4.82722129 -32.55034135 -5.64002137 -1.85377692 -24.82298659 -11.91899896 5.90226019 [904] -6.67799556 -18.39929702 -14.70709248 20.16465530 -2.37785503 -19.40544013 -43.64259489 [911] -4.80310727 -20.67267587 -6.12960286 14.76051916 -24.94995895 -21.55367734 3.51347606 [918] -21.82098554 4.68892318 22.32281743 3.01554647 -14.22391287 -28.44488042 0.32000549 [925] -26.29548705 -28.39677088 -6.06084948 -2.71491964 1.69227810 -8.71016310 -6.16547536 [932] -11.67413566 -11.59680714 9.40984647 -9.93669428 -17.84745893 2.04601218 -19.80104095 [939] -0.49341925 6.14760676 -22.21183010 13.50485022 -5.22057307 -17.82539558 -24.46518962 [946] 13.48666595 -11.80484500 -20.85173165 -31.08852302 -4.75860232 -36.88054918 2.98370161 [953] -0.37405972 17.58168372 0.48972051 9.79846880 -45.38152568 -25.01098967 -0.10839639 [960] 0.14270290 10.28628008 21.54101027 -17.49673999 -12.24470665 -11.99999059 -24.05651849 [967] -18.49661342 -12.30226946 -6.43942483 -23.07187196 -1.29013458 -15.53084359 0.86159642 [974] -11.26368153 -4.91450784 -9.73711163 -14.70304307 -4.39913543 -21.76712550 -7.49898974 [981] -25.17421015 -10.35273557 -9.64669400 -14.19622872 -13.91668603 -24.13258717 -15.08519499 [988] 1.35746984 10.40157841 -2.47480562 -35.30199191 -25.52554695 -2.31850569 -10.24616931 [995] -22.27223290 -4.57167529 -7.75456863 -2.13869306 -17.30982789 -24.04030147 > t.cal.rm <- mean(diff.s)/se(diff.s) > t.cal.rm [1] -19.82846 > p.val.rm <- pt(abs(t.cal.rm), length(s1)-1, lower.tail = F) * 2 > p.val.rm [1] 4.836389e-74 > t.test(s1, s2, paired = T) Paired t-test data: s1 and s2 t = -19.828, df = 999, p-value < 2.2e-16 alternative hypothesis: true mean difference is not equal to 0 95 percent confidence interval: -10.073732 -8.259379 sample estimates: mean difference -9.166555 > > # create multiple histogram > s.all <- c(s1,s2) > mean(s.all) [1] 105.2971 > hist(s1, col='grey', breaks=50, xlim=c(50, 150)) > hist(s2, col='darkgreen', breaks=50, add=TRUE) > abline(v=c(mean(s.all)), + col=c("red"), lwd=3) > abline(v=c(mean(s1), mean(s2)), + col=c("black", "green"), lwd=3) > > comb <- data.frame(s1,s2) > dat <- stack(comb) > head(dat) values ind 1 93.17788 s1 2 103.00254 s1 3 104.53388 s1 4 88.59698 s1 5 105.67789 s1 6 112.72657 s1 > > m.tot <- mean(s.all) > m.s1 <- mean(s1) > m.s2 <- mean(s2) > > ss.tot <- ss(s.all) > ss.s1 <- ss(s1) > ss.s2 <- ss(s2) > > df.tot <- length(s.all)-1 > df.s1 <- length(s1)-1 > df.s2 <- length(s2)-1 > > ms.tot <- var(s.all) > ms.tot [1] 125.5892 > ss.tot/df.tot [1] 125.5892 > > var(s1) [1] 108.3973 > ss.s1 / df.s1 [1] 108.3973 > > var(s2) [1] 100.8519 > ss.s2 / df.s2 [1] 100.8519 > > ss.b.s1 <- length(s1) * ((m.tot - m.s1)^2) > ss.b.s2 <- length(s2) * ((m.tot - m.s1)^2) > ss.bet <- ss.b.s1+ss.b.s2 > ss.bet [1] 42012.87 > > ss.wit <- ss.s1 + ss.s2 > ss.wit [1] 209039.9 > > ss.bet + ss.wit [1] 251052.8 > ss.tot [1] 251052.8 > > library(dplyr) 다음의 패키지를 부착합니다: ‘dplyr’ The following objects are masked from ‘package:stats’: filter, lag The following objects are masked from ‘package:base’: intersect, setdiff, setequal, union > # df.bet <- length(unique(dat)) - 1 > df.bet <- nlevels(dat$ind) - 1 > df.wit <- df.s1+df.s2 > df.bet [1] 1 > df.wit [1] 1998 > df.bet+df.wit [1] 1999 > df.tot [1] 1999 > > ms.bet <- ss.bet / df.bet > ms.wit <- ss.wit / df.wit > ms.bet [1] 42012.87 > ms.wit [1] 104.6246 > > f.cal <- ms.bet / ms.wit > f.cal [1] 401.5582 > pf(f.cal, df.bet, df.wit, lower.tail = F) [1] 1.522061e-81 > > > f.test <- aov(dat$values~ dat$ind, data = dat) > summary(f.test) Df Sum Sq Mean Sq F value Pr(>F) dat$ind 1 42013 42013 401.6 <2e-16 *** Residuals 1998 209040 105 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 > > sqrt(f.cal) [1] 20.03892 > t.cal.ts [1] -20.03892 > > # this is anova after all. >