Sampling Distribution and z-test
rm(list=ls())
rnorm2 <- function(n,mean,sd){
mean+sd*scale(rnorm(n))
}
se <- function(sample) {
sd(sample)/sqrt(length(sample))
}
ss <- function(x) {
sum((x-mean(x))^2)
}
################################
N.p <- 1000000
m.p <- 100
sd.p <- 10
p1 <- rnorm2(N.p, m.p, sd.p)
mean(p1)
sd(p1)
p2 <- rnorm2(N.p, m.p+5, sd.p)
mean(p2)
sd(p2)
m.p1 <- mean(p1)
sd.p1 <- sd(p1)
var(p1)
hist(p1, breaks=50, col = rgb(1, 1, 1, 0.5),
main = "histogram of p1 and p2",)
abline(v=mean(p1), col="black", lwd=3)
hist(p2, add=T, breaks=50, col=rgb(1,1,.5,.5))
abline(v=mean(p2), col="red", lwd=3)
hist(p1, breaks=50, col=rgb(0,.5,.5,.5))
abline(v=mean(p1),lwd=2)
abline(v=mean(p1)-sd(p1), lwd=2)
abline(v=mean(p1)+sd(p1), lwd=2)
abline(v=c(m.p1-2*sd.p1, m.p1+2*sd.p1), lwd=2, col='red')
abline(v=c(m.p1-3*sd.p1, m.p1+3*sd.p1), lwd=2, col='green')
# area bet black = 68%
# between red = 95%
# between green = 99%
pnorm(m.p1+sd.p1, m.p1, sd.p1)
pnorm(m.p1-sd.p1, m.p1, sd.p1)
pnorm(m.p1+sd.p1, m.p1, sd.p1) -
pnorm(m.p1-sd.p1, m.p1, sd.p1)
pnorm(m.p1+2*sd.p1, m.p1, sd.p1) -
pnorm(m.p1-2*sd.p1, m.p1, sd.p1)
pnorm(m.p1+3*sd.p1, m.p1, sd.p1) -
pnorm(m.p1-3*sd.p1, m.p1, sd.p1)
m.p1
sd.p1
(m.p1-m.p1)/sd.p1
((m.p1-sd.p1) - m.p1) / sd.p1
(120-100)/10
pnorm(1)-pnorm(-1)
pnorm(2)-pnorm(-2)
pnorm(3)-pnorm(3)
1-pnorm(-2)*2
pnorm(2)-pnorm(-2)
pnorm(120, 100, 10)
pnorm(2)-pnorm(-2)
zscore <- (120-100)/10
pnorm(zscore)-pnorm(-zscore)
zscore
# reminder.
pnorm(-1)
pnorm(1, 0, 1, lower.tail = F)
pnorm(110, 100, 10, lower.tail = F)
zscore <- (110-100)/10
pnorm(zscore, lower.tail = F)
pnorm(118, 100, 10, lower.tail = F)
pnorm(18/10, lower.tail = F)
z.p1 <- (p1-mean(p1))/sd(p1)
mean(z.p1)
round(mean(z.p1),10)
sd(z.p1)
pnorm(1.8)-pnorm(-1.8)
hist(z.p1, breaks=50, col=rgb(1,0,0,0))
abline(v=c(m.p1, -1.8, 1.8), col='red')
1-(pnorm(1.8)-pnorm(-1.8))
pnorm(1)-pnorm(-1)
1-(pnorm(-1)*2)
pnorm(2)-pnorm(-2)
1-(pnorm(-2)*2)
1-(pnorm(-3)*2)
#
hist(p1, breaks=50, col=rgb(.9,.9,.9,.9))
abline(v=mean(p1),lwd=2)
abline(v=mean(p1)-sd(p1), lwd=2)
abline(v=mean(p1)+sd(p1), lwd=2)
abline(v=c(m.p1-2*sd.p1, m.p1+2*sd.p1), lwd=2, col='red')
abline(v=c(m.p1-3*sd.p1, m.p1+3*sd.p1), lwd=2, col='green')
# 68%
a <- qnorm(.32/2)
b <- qnorm(1-.32/2)
c(a, b)
c(-1, 1)
# note that
.32/2
pnorm(-1)
qnorm(.32/2)
qnorm(pnorm(-1))
# 95%
c <- qnorm(.05/2)
d <- qnorm(1-.05/2)
c(c, d)
c(-2,2)
# 99%
e <- qnorm(.01/2)
f <- qnorm(1-.01/2)
c(e,f)
c(-3,3)
pnorm(b)-pnorm(a)
c(a, b)
pnorm(d)-pnorm(c)
c(c, d)
pnorm(f)-pnorm(e)
c(e, f)
qnorm(.5)
qnorm(1)
################################
s.size <- 50
means.temp <- c()
s1 <- sample(p1, s.size, replace = T)
mean(s1)
means.temp <- append(means.temp, mean(s1))
means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
means.temp
iter <- 1000000
# means <- c()
means <- rep(NA, iter)
for (i in 1:iter) {
# means <- append(means, mean(sample(p1, s.size, replace = T)))
means[i] <- mean(sample(p1, s.size, replace = T))
}
length(means)
mean(means)
sd(means)
se.s <- sd(means)
hist(means, breaks=50,
xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)),
col=rgb(1, 1, 1, .5))
abline(v=mean(means), col="black", lwd=3)
# now we want to get sd of this distribution
lo1 <- mean(means)-se.s
hi1 <- mean(means)+se.s
lo2 <- mean(means)-2*se.s
hi2 <- mean(means)+2*se.s
lo3 <- mean(means)-3*se.s
hi3 <- mean(means)+3*se.s
abline(v=mean(means), col="black", lwd=2)
# abline(v=mean(p2), colo='darkgreen', lwd=2)
abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3),
col=c("red","red", "blue", "blue", "orange", "orange"),
lwd=2)
# meanwhile . . . .
se.s
se.z <- sqrt(var(p1)/s.size)
se.z <- c(se.z)
se.z
se.z.adjusted <- sqrt(var(s1)/s.size)
se.z.adjusted
# sd of sample means (sd(means))
# = se.s
# when iter value goes to
# infinite value:
# mean(means) = mean(p1)
# and
# sd(means) = sd(p1) / sqrt(s.size)
# that is, se.s = se.z
# This is called CLT (Central Limit Theorem)
# see http://commres.net/wiki/cetral_limit_theorem
mean(means)
mean(p1)
sd(means)
var(p1)
# remember we started talking sample size 10
sqrt(var(p1)/s.size)
se.z
sd(means)
se.s
se.z
# because CLT
loz1 <- mean(p1)-se.z
hiz1 <- mean(p1)+se.z
loz2 <- mean(p1)-2*se.z
hiz2 <- mean(p1)+2*se.z
loz3 <- mean(p1)-3*se.z
hiz3 <- mean(p1)+3*se.z
c(lo1, loz1)
c(lo2, loz2)
c(lo3, loz3)
c(hi1, hiz1)
c(hi2, hiz2)
c(hi3, hiz3)
hist(means, breaks=50,
xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)),
col = rgb(1, 1, 1, .5))
abline(v=mean(means), col="black", lwd=3)
# abline(v=mean(p2), colo='darkgreen', lwd=3)
abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3),
col=c("darkgreen","darkgreen", "blue", "blue", "orange", "orange"),
lwd=2)
round(c(lo1, hi1))
round(c(lo2, hi2))
round(c(lo3, hi3))
round(c(loz1, hiz1))
round(c(loz2, hiz2))
round(c(loz3, hiz3))
m.sample.i.got <- mean(means)+ 1.5*sd(means)
m.sample.i.got
hist(means, breaks=30,
xlim = c(mean(means)-7*sd(means), mean(means)+10*sd(means)),
col = rgb(1, 1, 1, .5))
abline(v=mean(means), col="black", lwd=3)
abline(v=m.sample.i.got, col='darkgreen', lwd=3)
# what is the probablity of getting
# greater than
# m.sample.i.got?
m.sample.i.got
pnorm(m.sample.i.got, mean(means), sd(means), lower.tail = F)
pnorm(m.sample.i.got, mean(p1), se.z, lower.tail = F)
# then, what is the probabilty of getting
# greater than m.sample.i.got and
# less than corresponding value, which is
# mean(means) - m.sample.i.got - mean(means)
# (green line)
tmp <- mean(means) - (m.sample.i.got - mean(means))
abline(v=tmp, col='red', lwd=3)
2 * pnorm(m.sample.i.got, mean(p1), sd(means), lower.tail = F)
m.sample.i.got
### one more time
# this time, with a story
mean(p2)
sd(p2)
s.from.p2 <- sample(p2, s.size)
m.s.from.p2 <- mean(s.from.p2)
m.s.from.p2
se.s
se.z
sd(means)
tmp <- mean(means) - (m.s.from.p2 - mean(means))
tmp
hist(means, breaks=30,
xlim = c(tmp-4*sd(means), m.s.from.p2+4*sd(means)),
col = rgb(1, 1, 1, .5))
abline(v=mean(means), col="black", lwd=3)
abline(v=m.s.from.p2, col='blue', lwd=3)
# what is the probablity of getting
# greater than
# m.sample.i.got?
m.s.from.p2
pnorm(m.s.from.p2, mean(p1), se.z, lower.tail = F)
# then, what is the probabilty of getting
# greater than m.sample.i.got and
# less than corresponding value, which is
# mean(means) - m.sample.i.got - mean(means)
# (green line)
abline(v=tmp, col='red', lwd=3)
2 * pnorm(m.s.from.p2, mean(p1), se.z, lower.tail = F)
se.z
sd(s.from.p2)/sqrt(s.size)
se.z.adjusted <- sqrt(var(s.from.p2)/s.size)
se.z.adjusted
2 * pnorm(m.s.from.p2, mean(p1), se.z.adjusted,
lower.tail = F)
z.cal <- (m.s.from.p2 - mean(p1))/se.z.adjusted
z.cal
pnorm(z.cal, lower.tail = F)*2
pt(z.cal, 49, lower.tail = F)*2
t.test(s.from.p2, mu=mean(p1), var.equal = T)
output
> rm(list=ls())
> rnorm2 <- function(n,mean,sd){
+ mean+sd*scale(rnorm(n))
+ }
>
> se <- function(sample) {
+ sd(sample)/sqrt(length(sample))
+ }
>
> ss <- function(x) {
+ sum((x-mean(x))^2)
+ }
>
> ################################
> N.p <- 1000000
> m.p <- 100
> sd.p <- 10
>
> p1 <- rnorm2(N.p, m.p, sd.p)
> mean(p1)
[1] 100
> sd(p1)
[1] 10
>
> p2 <- rnorm2(N.p, m.p+5, sd.p)
> mean(p2)
[1] 105
> sd(p2)
[1] 10
>
> m.p1 <- mean(p1)
> sd.p1 <- sd(p1)
> var(p1)
[,1]
[1,] 100
>
>
> hist(p1, breaks=50, col = rgb(1, 1, 1, 0.5),
+ main = "histogram of p1 and p2",)
> abline(v=mean(p1), col="black", lwd=3)
> hist(p2, add=T, breaks=50, col=rgb(1,1,.5,.5))
> abline(v=mean(p2), col="red", lwd=3)
>
>
> hist(p1, breaks=50, col=rgb(0,.5,.5,.5))
> abline(v=mean(p1),lwd=2)
> abline(v=mean(p1)-sd(p1), lwd=2)
> abline(v=mean(p1)+sd(p1), lwd=2)
> abline(v=c(m.p1-2*sd.p1, m.p1+2*sd.p1), lwd=2, col='red')
> abline(v=c(m.p1-3*sd.p1, m.p1+3*sd.p1), lwd=2, col='green')
>
> # area bet black = 68%
> # between red = 95%
> # between green = 99%
>
> pnorm(m.p1+sd.p1, m.p1, sd.p1)
[1] 0.8413447
> pnorm(m.p1-sd.p1, m.p1, sd.p1)
[1] 0.1586553
> pnorm(m.p1+sd.p1, m.p1, sd.p1) -
+ pnorm(m.p1-sd.p1, m.p1, sd.p1)
[1] 0.6826895
>
> pnorm(m.p1+2*sd.p1, m.p1, sd.p1) -
+ pnorm(m.p1-2*sd.p1, m.p1, sd.p1)
[1] 0.9544997
>
> pnorm(m.p1+3*sd.p1, m.p1, sd.p1) -
+ pnorm(m.p1-3*sd.p1, m.p1, sd.p1)
[1] 0.9973002
>
> m.p1
[1] 100
> sd.p1
[1] 10
>
> (m.p1-m.p1)/sd.p1
[1] 0
> ((m.p1-sd.p1) - m.p1) / sd.p1
[1] -1
> (120-100)/10
[1] 2
> pnorm(1)-pnorm(-1)
[1] 0.6826895
> pnorm(2)-pnorm(-2)
[1] 0.9544997
> pnorm(3)-pnorm(3)
[1] 0
>
> 1-pnorm(-2)*2
[1] 0.9544997
> pnorm(2)-pnorm(-2)
[1] 0.9544997
>
> pnorm(120, 100, 10)
[1] 0.9772499
> pnorm(2)-pnorm(-2)
[1] 0.9544997
>
> zscore <- (120-100)/10
> pnorm(zscore)-pnorm(-zscore)
[1] 0.9544997
> zscore
[1] 2
>
> # reminder.
> pnorm(-1)
[1] 0.1586553
> pnorm(1, 0, 1, lower.tail = F)
[1] 0.1586553
> pnorm(110, 100, 10, lower.tail = F)
[1] 0.1586553
> zscore <- (110-100)/10
> pnorm(zscore, lower.tail = F)
[1] 0.1586553
>
> pnorm(118, 100, 10, lower.tail = F)
[1] 0.03593032
> pnorm(18/10, lower.tail = F)
[1] 0.03593032
>
> z.p1 <- (p1-mean(p1))/sd(p1)
> mean(z.p1)
[1] 8.189457e-18
> round(mean(z.p1),10)
[1] 0
> sd(z.p1)
[1] 1
> pnorm(1.8)-pnorm(-1.8)
[1] 0.9281394
>
> hist(z.p1, breaks=50, col=rgb(1,0,0,0))
> abline(v=c(m.p1, -1.8, 1.8), col='red')
> 1-(pnorm(1.8)-pnorm(-1.8))
[1] 0.07186064
>
>
> pnorm(1)-pnorm(-1)
[1] 0.6826895
> 1-(pnorm(-1)*2)
[1] 0.6826895
>
> pnorm(2)-pnorm(-2)
[1] 0.9544997
> 1-(pnorm(-2)*2)
[1] 0.9544997
>
> 1-(pnorm(-3)*2)
[1] 0.9973002
>
> #
> hist(p1, breaks=50, col=rgb(.9,.9,.9,.9))
> abline(v=mean(p1),lwd=2)
> abline(v=mean(p1)-sd(p1), lwd=2)
> abline(v=mean(p1)+sd(p1), lwd=2)
> abline(v=c(m.p1-2*sd.p1, m.p1+2*sd.p1), lwd=2, col='red')
> abline(v=c(m.p1-3*sd.p1, m.p1+3*sd.p1), lwd=2, col='green')
>
> # 68%
> a <- qnorm(.32/2)
> b <- qnorm(1-.32/2)
> c(a, b)
[1] -0.9944579 0.9944579
> c(-1, 1)
[1] -1 1
> # note that
> .32/2
[1] 0.16
> pnorm(-1)
[1] 0.1586553
> qnorm(.32/2)
[1] -0.9944579
> qnorm(pnorm(-1))
[1] -1
>
> # 95%
> c <- qnorm(.05/2)
> d <- qnorm(1-.05/2)
> c(c, d)
[1] -1.959964 1.959964
> c(-2,2)
[1] -2 2
>
> # 99%
> e <- qnorm(.01/2)
> f <- qnorm(1-.01/2)
> c(e,f)
[1] -2.575829 2.575829
> c(-3,3)
[1] -3 3
>
>
> pnorm(b)-pnorm(a)
[1] 0.68
> c(a, b)
[1] -0.9944579 0.9944579
> pnorm(d)-pnorm(c)
[1] 0.95
> c(c, d)
[1] -1.959964 1.959964
> pnorm(f)-pnorm(e)
[1] 0.99
> c(e, f)
[1] -2.575829 2.575829
>
> qnorm(.5)
[1] 0
> qnorm(1)
[1] Inf
>
>
> ################################
> s.size <- 50
>
> means.temp <- c()
> s1 <- sample(p1, s.size, replace = T)
> mean(s1)
[1] 98.76098
> means.temp <- append(means.temp, mean(s1))
> means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
> means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
> means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
> means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
> means.temp
[1] 98.76098 99.90935 99.29643 99.66014 101.93822
>
> iter <- 1000000
> # means <- c()
> means <- rep(NA, iter)
> for (i in 1:iter) {
+ # means <- append(means, mean(sample(p1, s.size, replace = T)))
+ means[i] <- mean(sample(p1, s.size, replace = T))
+ }
> length(means)
[1] 1000000
> mean(means)
[1] 99.99824
> sd(means)
[1] 1.414035
> se.s <- sd(means)
>
> hist(means, breaks=50,
+ xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)),
+ col=rgb(1, 1, 1, .5))
> abline(v=mean(means), col="black", lwd=3)
> # now we want to get sd of this distribution
> lo1 <- mean(means)-se.s
> hi1 <- mean(means)+se.s
> lo2 <- mean(means)-2*se.s
> hi2 <- mean(means)+2*se.s
> lo3 <- mean(means)-3*se.s
> hi3 <- mean(means)+3*se.s
> abline(v=mean(means), col="black", lwd=2)
> # abline(v=mean(p2), colo='darkgreen', lwd=2)
> abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3),
+ col=c("red","red", "blue", "blue", "orange", "orange"),
+ lwd=2)
>
> # meanwhile . . . .
> se.s
[1] 1.414035
>
> se.z <- sqrt(var(p1)/s.size)
> se.z <- c(se.z)
> se.z
[1] 1.414214
>
> se.z.adjusted <- sqrt(var(s1)/s.size)
> se.z.adjusted
[1] 1.370464
>
> # sd of sample means (sd(means))
> # = se.s
>
> # when iter value goes to
> # infinite value:
> # mean(means) = mean(p1)
> # and
> # sd(means) = sd(p1) / sqrt(s.size)
> # that is, se.s = se.z
> # This is called CLT (Central Limit Theorem)
> # see http://commres.net/wiki/cetral_limit_theorem
>
> mean(means)
[1] 99.99824
> mean(p1)
[1] 100
> sd(means)
[1] 1.414035
> var(p1)
[,1]
[1,] 100
> # remember we started talking sample size 10
> sqrt(var(p1)/s.size)
[,1]
[1,] 1.414214
> se.z
[1] 1.414214
>
> sd(means)
[1] 1.414035
> se.s
[1] 1.414035
> se.z
[1] 1.414214
>
> # because CLT
> loz1 <- mean(p1)-se.z
> hiz1 <- mean(p1)+se.z
> loz2 <- mean(p1)-2*se.z
> hiz2 <- mean(p1)+2*se.z
> loz3 <- mean(p1)-3*se.z
> hiz3 <- mean(p1)+3*se.z
>
> c(lo1, loz1)
[1] 98.58421 98.58579
> c(lo2, loz2)
[1] 97.17017 97.17157
> c(lo3, loz3)
[1] 95.75614 95.75736
>
> c(hi1, hiz1)
[1] 101.4123 101.4142
> c(hi2, hiz2)
[1] 102.8263 102.8284
> c(hi3, hiz3)
[1] 104.2403 104.2426
>
>
> hist(means, breaks=50,
+ xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)),
+ col = rgb(1, 1, 1, .5))
> abline(v=mean(means), col="black", lwd=3)
> # abline(v=mean(p2), colo='darkgreen', lwd=3)
> abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3),
+ col=c("darkgreen","darkgreen", "blue", "blue", "orange", "orange"),
+ lwd=2)
>
> round(c(lo1, hi1))
[1] 99 101
> round(c(lo2, hi2))
[1] 97 103
> round(c(lo3, hi3))
[1] 96 104
>
> round(c(loz1, hiz1))
[1] 99 101
> round(c(loz2, hiz2))
[1] 97 103
> round(c(loz3, hiz3))
[1] 96 104
>
> m.sample.i.got <- mean(means)+ 1.5*sd(means)
> m.sample.i.got
[1] 102.1193
>
> hist(means, breaks=30,
+ xlim = c(mean(means)-7*sd(means), mean(means)+10*sd(means)),
+ col = rgb(1, 1, 1, .5))
> abline(v=mean(means), col="black", lwd=3)
> abline(v=m.sample.i.got, col='darkgreen', lwd=3)
>
> # what is the probablity of getting
> # greater than
> # m.sample.i.got?
> m.sample.i.got
[1] 102.1193
> pnorm(m.sample.i.got, mean(means), sd(means), lower.tail = F)
[1] 0.0668072
> pnorm(m.sample.i.got, mean(p1), se.z, lower.tail = F)
[1] 0.0669929
>
> # then, what is the probabilty of getting
> # greater than m.sample.i.got and
> # less than corresponding value, which is
> # mean(means) - m.sample.i.got - mean(means)
> # (green line)
> tmp <- mean(means) - (m.sample.i.got - mean(means))
> abline(v=tmp, col='red', lwd=3)
> 2 * pnorm(m.sample.i.got, mean(p1), sd(means), lower.tail = F)
[1] 0.1339368
> m.sample.i.got
[1] 102.1193
>
> ### one more time
> # this time, with a story
> mean(p2)
[1] 105
> sd(p2)
[1] 10
> s.from.p2 <- sample(p2, s.size)
> m.s.from.p2 <- mean(s.from.p2)
> m.s.from.p2
[1] 103.1283
>
> se.s
[1] 1.414035
> se.z
[1] 1.414214
> sd(means)
[1] 1.414035
>
> tmp <- mean(means) - (m.s.from.p2 - mean(means))
> tmp
[1] 96.86822
>
> hist(means, breaks=30,
+ xlim = c(tmp-4*sd(means), m.s.from.p2+4*sd(means)),
+ col = rgb(1, 1, 1, .5))
> abline(v=mean(means), col="black", lwd=3)
> abline(v=m.s.from.p2, col='blue', lwd=3)
>
> # what is the probablity of getting
> # greater than
> # m.sample.i.got?
> m.s.from.p2
[1] 103.1283
> pnorm(m.s.from.p2, mean(p1), se.z, lower.tail = F)
[1] 0.01348266
>
> # then, what is the probabilty of getting
> # greater than m.sample.i.got and
> # less than corresponding value, which is
> # mean(means) - m.sample.i.got - mean(means)
> # (green line)
> abline(v=tmp, col='red', lwd=3)
> 2 * pnorm(m.s.from.p2, mean(p1), se.z, lower.tail = F)
[1] 0.02696533
>
> se.z
[1] 1.414214
> sd(s.from.p2)/sqrt(s.size)
[1] 1.414296
> se.z.adjusted <- sqrt(var(s.from.p2)/s.size)
> se.z.adjusted
[1] 1.414296
> 2 * pnorm(m.s.from.p2, mean(p1), se.z.adjusted,
+ lower.tail = F)
[1] 0.02697421
>
> z.cal <- (m.s.from.p2 - mean(p1))/se.z.adjusted
> z.cal
[1] 2.211891
> pnorm(z.cal, lower.tail = F)*2
[1] 0.02697421
>
>
> pt(z.cal, 49, lower.tail = F)*2
[1] 0.03166797
> t.test(s.from.p2, mu=mean(p1), var.equal = T)
One Sample t-test
data: s.from.p2
t = 2.2119, df = 49, p-value = 0.03167
alternative hypothesis: true mean is not equal to 100
95 percent confidence interval:
100.2861 105.9704
sample estimates:
mean of x
103.1283
>
T-test sum up
rm(list=ls())
rnorm2 <- function(n,mean,sd){
mean+sd*scale(rnorm(n))
}
se <- function(sample) {
sd(sample)/sqrt(length(sample))
}
ss <- function(x) {
sum((x-mean(x))^2)
}
N.p <- 1000000
m.p <- 100
sd.p <- 10
p1 <- rnorm2(N.p, m.p, sd.p)
mean(p1)
sd(p1)
p2 <- rnorm2(N.p, m.p+10, sd.p)
mean(p2)
sd(p2)
hist(p1, breaks=50, col = rgb(1, 0, 0, 0.5),
main = "histogram of p1 and p2",)
abline(v=mean(p1), col="black", lwd=3)
hist(p2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5))
abline(v=mean(p2), col="red", lwd=3)
s.size <- 1000
s2 <- sample(p2, s.size)
mean(s2)
sd(s2)
se.z <- sqrt(var(p1)/s.size)
se.z <- c(se.z)
se.z.range <- c(-2*se.z,2*se.z)
se.z.range
mean(p1)+se.z.range
mean(s2)
z.cal <- (mean(s2) - mean(p1)) / se.z
z.cal
pnorm(z.cal, lower.tail = F) * 2
z.cal
# principles . . .
# distribution of sample means
iter <- 100000
means <- c()
for (i in 1:iter) {
m.of.s <- mean(sample(p1, s.size))
means <- append(means, m.of.s)
}
hist(means,
xlim = c(mean(means)-3*sd(means), mean(s2)+5),
col = rgb(1, 0, 0, 0.5))
abline(v=mean(p1), col="black", lwd=3)
abline(v=mean(s2),
col="blue", lwd=3)
lo1 <- mean(p1)-se.z
hi1 <- mean(p1)+se.z
lo2 <- mean(p1)-2*se.z
hi2 <- mean(p1)+2*se.z
abline(v=c(lo1, hi1, lo2, hi2),
col=c("green","green", "brown", "brown"),
lwd=2)
se.z
c(lo2, hi2)
pnorm(z.cal, lower.tail = F) * 2
# Note that we use sqrt(var(s2)/s.size)
# as our se value instread of
# sqrt(var(p1)/s.size)
# This is a common practice for R
# In fact, some z.test (made by someone)
# function uses the former rather than
# latter.
sqrt(var(p1)/s.size)
se.z
sqrt(var(s2)/s.size)
se(s2)
t.cal.os <- (mean(s2) - mean(p1))/ se(s2)
z.cal <- (mean(s2) - mean(p1))/ se.z
t.cal.os
z.cal
df.s2 <- length(s2)-1
df.s2
p.t.os <- pt(abs(t.cal.os), df.s2, lower.tail = F) * 2
p.t.os
t.out <- t.test(s2, mu=mean(p1))
library(BSDA)
z.out <- z.test(s2, p1, sigma.x = sd(s2), sigma.y = sd(p1))
z.out$statistic # se.z 대신에 se(s2) 값으로 구한 z 값
z.cal # se.z으로 (sqrt(var(p1)/s.size)값) 구한 z 값
t.out$statistic # se(s2)를 분모로 하여 구한 t 값
t.cal.os # se(s2)를 이용하여 손으로 구한 t 값
# But, after all, we use t.test method regardless of
# variation
hist(means,
xlim = c(mean(means)-3*sd(means), mean(s2)+5),
col = rgb(1, 0, 0, 0.5))
abline(v=mean(p1), col="black", lwd=3)
abline(v=mean(s2),
col="blue", lwd=3)
lo1 <- mean(p1)-se.z
hi1 <- mean(p1)+se.z
lo2 <- mean(p1)-2*se.z
hi2 <- mean(p1)+2*se.z
abline(v=c(lo1, hi1, lo2, hi2),
col=c("green","green", "brown", "brown"),
lwd=2)
# difference between black and blue line
# divided by
# se(s2) (= random difference)
# t.value
mean(s2)
mean(p1)
diff <- mean(s2)-mean(p1)
diff
se(s2)
diff/se(s2)
t.cal.os
########################
# 2 sample t-test
########################
# 가정. 아래에서 추출하는 두
# 샘플의 모집단의 파라미터를
# 모른다.
s1 <- sample(p1, s.size)
s2 <- sample(p2, s.size)
mean(s1)
mean(s2)
ss(s1)
ss(s2)
df.s1 <- length(s1)-1
df.s2 <- length(s2)-1
df.s1
df.s2
pooled.variance <- (ss(s1)+ss(s2))/(df.s1+df.s2)
pooled.variance
se.ts <- sqrt((pooled.variance/length(s1))+(pooled.variance/length(s2)))
se.ts
t.cal.ts <- (mean(s1)-mean(s2))/se.ts
t.cal.ts
p.val.ts <- pt(abs(t.cal.ts), df=df.s1+df.s2, lower.tail = F) * 2
p.val.ts
t.test(s1, s2, var.equal = T)
se(s1)
se(s2)
mean(s1)+c(-se(s1)*2, se(s1)*2)
mean(s2)+c(-se(s2)*2, se(s2)*2)
mean(p1)
mean(p2)
hist(s1, breaks=50,
col = rgb(1, 0, 0, 0.5))
hist(s2, breaks=50, add=T, col=rgb(0,0,1,1))
abline(v=mean(s1), col="green", lwd=3)
# hist(s2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5))
abline(v=mean(s2), col="lightblue", lwd=3)
diff <- mean(s1)-mean(s2)
se.ts
diff/se.ts
####
# repeated measure t-test
# we can use the above case
# pop paramter unknown
# two consecutive measurement
# for the same sample
t1 <- s1
t2 <- s2
mean(t1)
mean(t2)
diff.s <- t1 - t2
diff.s
t.cal.rm <- mean(diff.s)/se(diff.s)
t.cal.rm
p.val.rm <- pt(abs(t.cal.rm), length(s1)-1, lower.tail = F) * 2
p.val.rm
t.test(s1, s2, paired = T)
# create multiple histogram
s.all <- c(s1,s2)
mean(s.all)
hist(s1, col='grey', breaks=50, xlim=c(50, 150))
hist(s2, col='darkgreen', breaks=50, add=TRUE)
abline(v=c(mean(s.all)),
col=c("red"), lwd=3)
abline(v=c(mean(s1), mean(s2)),
col=c("black", "green"), lwd=3)
comb <- data.frame(s1,s2)
dat <- stack(comb)
head(dat)
m.tot <- mean(s.all)
m.s1 <- mean(s1)
m.s2 <- mean(s2)
ss.tot <- ss(s.all)
ss.s1 <- ss(s1)
ss.s2 <- ss(s2)
df.tot <- length(s.all)-1
df.s1 <- length(s1)-1
df.s2 <- length(s2)-1
ms.tot <- var(s.all)
ms.tot
ss.tot/df.tot
var(s1)
ss.s1 / df.s1
var(s2)
ss.s2 / df.s2
ss.b.s1 <- length(s1) * ((m.tot - m.s1)^2)
ss.b.s2 <- length(s2) * ((m.tot - m.s1)^2)
ss.bet <- ss.b.s1+ss.b.s2
ss.bet
ss.wit <- ss.s1 + ss.s2
ss.wit
ss.bet + ss.wit
ss.tot
library(dplyr)
# df.bet <- length(unique(dat)) - 1
df.bet <- nlevels(dat$ind) - 1
df.wit <- df.s1+df.s2
df.bet
df.wit
df.bet+df.wit
df.tot
ms.bet <- ss.bet / df.bet
ms.wit <- ss.wit / df.wit
ms.bet
ms.wit
f.cal <- ms.bet / ms.wit
f.cal
pf(f.cal, df.bet, df.wit, lower.tail = F)
f.test <- aov(dat$values~ dat$ind, data = dat)
summary(f.test)
sqrt(f.cal)
t.cal.ts
# this is anova after all.
output
> rm(list=ls())
> rnorm2 <- function(n,mean,sd){
+ mean+sd*scale(rnorm(n))
+ }
> se <- function(sample) {
+ sd(sample)/sqrt(length(sample))
+ }
> ss <- function(x) {
+ sum((x-mean(x))^2)
+ }
>
> N.p <- 1000000
> m.p <- 100
> sd.p <- 10
>
>
> p1 <- rnorm2(N.p, m.p, sd.p)
> mean(p1)
[1] 100
> sd(p1)
[1] 10
>
> p2 <- rnorm2(N.p, m.p+10, sd.p)
> mean(p2)
[1] 110
> sd(p2)
[1] 10
>
> hist(p1, breaks=50, col = rgb(1, 0, 0, 0.5),
+ main = "histogram of p1 and p2",)
> abline(v=mean(p1), col="black", lwd=3)
> hist(p2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5))
> abline(v=mean(p2), col="red", lwd=3)
>
> s.size <- 1000
> s2 <- sample(p2, s.size)
> mean(s2)
[1] 110.4892
> sd(s2)
[1] 10.36614
>
> se.z <- sqrt(var(p1)/s.size)
> se.z <- c(se.z)
> se.z.range <- c(-2*se.z,2*se.z)
> se.z.range
[1] -0.6324555 0.6324555
>
> mean(p1)+se.z.range
[1] 99.36754 100.63246
> mean(s2)
[1] 110.4892
>
> z.cal <- (mean(s2) - mean(p1)) / se.z
> z.cal
[1] 33.16976
> pnorm(z.cal, lower.tail = F) * 2
[1] 2.93954e-241
>
> z.cal
[1] 33.16976
>
> # principles . . .
> # distribution of sample means
> iter <- 100000
> means <- c()
> for (i in 1:iter) {
+ m.of.s <- mean(sample(p1, s.size))
+ means <- append(means, m.of.s)
+ }
>
> hist(means,
+ xlim = c(mean(means)-3*sd(means), mean(s2)+5),
+ col = rgb(1, 0, 0, 0.5))
> abline(v=mean(p1), col="black", lwd=3)
> abline(v=mean(s2),
+ col="blue", lwd=3)
> lo1 <- mean(p1)-se.z
> hi1 <- mean(p1)+se.z
> lo2 <- mean(p1)-2*se.z
> hi2 <- mean(p1)+2*se.z
> abline(v=c(lo1, hi1, lo2, hi2),
+ col=c("green","green", "brown", "brown"),
+ lwd=2)
> se.z
[1] 0.3162278
> c(lo2, hi2)
[1] 99.36754 100.63246
> pnorm(z.cal, lower.tail = F) * 2
[1] 2.93954e-241
>
>
> # Note that we use sqrt(var(s2)/s.size)
> # as our se value instread of
> # sqrt(var(p1)/s.size)
> # This is a common practice for R
> # In fact, some z.test (made by someone)
> # function uses the former rather than
> # latter.
>
> sqrt(var(p1)/s.size)
[,1]
[1,] 0.3162278
> se.z
[1] 0.3162278
>
> sqrt(var(s2)/s.size)
[1] 0.3278061
> se(s2)
[1] 0.3278061
>
> t.cal.os <- (mean(s2) - mean(p1))/ se(s2)
> z.cal <- (mean(s2) - mean(p1))/ se.z
> t.cal.os
[1] 31.99818
> z.cal
[1] 33.16976
>
> df.s2 <- length(s2)-1
> df.s2
[1] 999
> p.t.os <- pt(abs(t.cal.os), df.s2, lower.tail = F) * 2
> p.t.os
[1] 3.162139e-155
> t.out <- t.test(s2, mu=mean(p1))
>
> library(BSDA)
필요한 패키지를 로딩중입니다: lattice
다음의 패키지를 부착합니다: ‘BSDA’
The following object is masked from ‘package:datasets’:
Orange
경고메시지(들):
패키지 ‘BSDA’는 R 버전 4.3.3에서 작성되었습니다
> z.out <- z.test(s2, p1, sigma.x = sd(s2), sigma.y = sd(p1))
>
> z.out$statistic # se.z 대신에 se(s2) 값으로 구한 z 값
z
31.9833
> z.cal # se.z으로 (sqrt(var(p1)/s.size)값) 구한 z 값
[1] 33.16976
>
> t.out$statistic # se(s2)를 분모로 하여 구한 t 값
t
31.99818
> t.cal.os # se(s2)를 이용하여 손으로 구한 t 값
[1] 31.99818
>
> # But, after all, we use t.test method regardless of
> # variation
>
> hist(means,
+ xlim = c(mean(means)-3*sd(means), mean(s2)+5),
+ col = rgb(1, 0, 0, 0.5))
> abline(v=mean(p1), col="black", lwd=3)
> abline(v=mean(s2),
+ col="blue", lwd=3)
> lo1 <- mean(p1)-se.z
> hi1 <- mean(p1)+se.z
> lo2 <- mean(p1)-2*se.z
> hi2 <- mean(p1)+2*se.z
> abline(v=c(lo1, hi1, lo2, hi2),
+ col=c("green","green", "brown", "brown"),
+ lwd=2)
>
> # difference between black and blue line
> # divided by
> # se(s2) (= random difference)
> # t.value
> mean(s2)
[1] 110.4892
> mean(p1)
[1] 100
> diff <- mean(s2)-mean(p1)
> diff
[1] 10.4892
> se(s2)
[1] 0.3278061
> diff/se(s2)
[1] 31.99818
>
> t.cal.os
[1] 31.99818
>
> ########################
> # 2 sample t-test
> ########################
> # 가정. 아래에서 추출하는 두
> # 샘플의 모집단의 파라미터를
> # 모른다.
> s1 <- sample(p1, s.size)
> s2 <- sample(p2, s.size)
>
> mean(s1)
[1] 100.7138
> mean(s2)
[1] 109.8804
> ss(s1)
[1] 108288.9
> ss(s2)
[1] 100751
> df.s1 <- length(s1)-1
> df.s2 <- length(s2)-1
> df.s1
[1] 999
> df.s2
[1] 999
>
> pooled.variance <- (ss(s1)+ss(s2))/(df.s1+df.s2)
> pooled.variance
[1] 104.6246
> se.ts <- sqrt((pooled.variance/length(s1))+(pooled.variance/length(s2)))
> se.ts
[1] 0.4574376
> t.cal.ts <- (mean(s1)-mean(s2))/se.ts
> t.cal.ts
[1] -20.03892
> p.val.ts <- pt(abs(t.cal.ts), df=df.s1+df.s2, lower.tail = F) * 2
> p.val.ts
[1] 1.522061e-81
>
> t.test(s1, s2, var.equal = T)
Two Sample t-test
data: s1 and s2
t = -20.039, df = 1998, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-10.06366 -8.26945
sample estimates:
mean of x mean of y
100.7138 109.8804
>
> se(s1)
[1] 0.3292374
> se(s2)
[1] 0.3175719
>
> mean(s1)+c(-se(s1)*2, se(s1)*2)
[1] 100.0553 101.3723
> mean(s2)+c(-se(s2)*2, se(s2)*2)
[1] 109.2452 110.5155
>
> mean(p1)
[1] 100
> mean(p2)
[1] 110
>
> hist(s1, breaks=50,
+ col = rgb(1, 0, 0, 0.5))
> hist(s2, breaks=50, add=T, col=rgb(0,0,1,1))
> abline(v=mean(s1), col="green", lwd=3)
> # hist(s2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5))
> abline(v=mean(s2), col="lightblue", lwd=3)
>
> diff <- mean(s1)-mean(s2)
> se.ts
[1] 0.4574376
> diff/se.ts
[1] -20.03892
>
> ####
> # repeated measure t-test
> # we can use the above case
> # pop paramter unknown
> # two consecutive measurement
> # for the same sample
>
> t1 <- s1
> t2 <- s2
> mean(t1)
[1] 100.7138
> mean(t2)
[1] 109.8804
> diff.s <- t1 - t2
> diff.s
[1] -11.22445947 -3.01532631 -3.47460077 -14.77286374 -9.74734969 2.33544374 -1.53761723
[8] 5.16258644 -40.75608052 -4.94498515 13.05099703 -0.24847628 -6.26581498 -8.14116058
[15] 5.49152742 6.15799638 -4.65589524 -7.93104859 -15.28488198 -5.99182327 -15.35591992
[22] -1.00704696 18.27285355 -12.09936359 -4.72393289 -4.98301049 -20.95452153 1.62363928
[29] -10.58244069 -21.50608157 -53.61230898 -3.85198479 -42.61929736 -6.80266370 -22.92704580
[36] 3.01745740 -19.37131113 -27.82551902 -10.05485425 -25.12701225 -12.93162558 -7.55706006
[43] -19.16855657 -4.81878797 1.64397602 -28.64658004 16.36241227 8.73170802 -11.56090742
[50] 3.21799642 -39.37233043 -9.96051946 -19.11232333 -34.53077051 -4.85780005 -9.52501389
[57] -8.28743679 -38.33995044 -50.60884456 -3.43450084 -0.85381393 -13.30971467 10.13049966
[64] 8.65616917 -29.75453733 -25.40674843 -24.98197786 -12.92901371 15.80168803 -8.67599446
[71] -20.50728324 -19.37275012 -23.27866089 -11.74962053 -34.35317908 -26.10460199 8.59009957
[78] -24.79252799 3.09475727 -19.13505970 -11.72561867 -33.79775614 -6.00167910 -25.03263480
[85] -23.66447959 -8.54416282 -6.89905337 -10.45234583 -34.67182776 -37.20205500 6.60270378
[92] -21.22842221 -11.68774036 -8.71535203 -15.55746542 2.88009050 -5.51543509 1.21606420
[99] 19.63435733 -14.40578016 -11.24172079 10.21723258 -23.41564885 27.60247565 -8.28684078
[106] 17.72472594 -0.29977586 -14.84142327 -20.25713391 -37.99518419 -27.68545647 -19.78976153
[113] -10.23092427 0.40875267 -17.36077213 -24.53979674 -24.18070810 -24.13321556 -17.36615616
[120] -31.50478963 -2.47101725 -22.14003910 -33.63875270 -19.91485505 -3.64251563 -30.62407901
[127] -7.91406849 -6.25389594 4.40651820 -19.08031290 -14.01489366 -31.30542657 -27.05443597
[134] -6.60642443 1.29892762 -11.73908399 1.96878666 -7.53666283 -42.78007247 -9.04715952
[141] 14.05326537 -6.85724091 -16.02305236 -24.38581030 -9.91759245 4.75488243 -2.83181250
[148] -5.38371023 -19.62202451 -1.55000290 -14.86541899 2.95279749 3.82076747 -3.38868029
[155] -0.65101074 18.42861149 -20.55548459 3.02438240 21.23539589 3.32810800 -9.66847192
[162] 2.48687983 -5.40571073 -1.53265391 -12.93542011 19.87564176 -10.69228781 12.80629134
[169] 6.51132022 -16.96586244 10.88690774 -0.37382590 -14.69255590 -26.66941722 -3.67854181
[176] 6.29443209 -10.77182585 -25.65187453 0.09825251 9.03908176 10.25414786 -0.45340800
[183] -38.63379525 -16.58800530 -17.62672134 5.43887886 -4.95532070 -6.46372777 -3.14562140
[190] -22.25276559 2.05941880 -44.33676979 -3.34190343 -19.04858920 -8.21990394 -25.53625536
[197] -17.21120659 28.96404607 6.25767994 -6.17593254 -34.33503461 6.65350479 11.42897662
[204] -0.83715976 -28.46397824 -40.67262397 -6.01225907 -11.22598108 -10.92756008 -15.45671946
[211] -4.57060131 -27.19860432 5.68618678 -27.70257611 -27.77374648 -5.93312400 -9.37871992
[218] -24.41623403 16.94244832 -30.46760860 -4.91996788 2.89031604 5.27074167 -23.91666746
[225] -13.27091592 -7.99640540 15.26148582 -26.01138488 -28.57927092 -17.29274303 -17.07704891
[232] -11.52528966 -5.50387909 -0.66159232 -11.50347650 -19.90680762 -11.09595230 -11.02710712
[239] -13.34969773 -37.98584006 -0.95289265 -15.00431567 -1.22592809 7.40922588 11.45790664
[246] -24.07983488 4.54606079 -0.54863357 6.56528626 -4.04491250 -17.13525622 -22.85976576
[253] -12.30101864 7.01445235 -6.77058075 -10.69023765 -14.21289974 0.68743488 -22.13964282
[260] -4.93155960 -5.32992121 -9.24699990 -34.21542676 -28.10074867 -14.64483350 -21.94636738
[267] 0.57190289 -32.90838279 -23.39341251 -8.52122572 7.61461839 -12.60688433 -8.17161329
[274] -7.73981345 -4.86979671 14.97509924 -17.25493992 -48.85010339 10.16448581 -4.34608694
[281] -4.73924884 19.07076764 -5.23571728 -28.73076387 -1.01530521 -1.14387890 -6.96197277
[288] -10.10160879 -11.59352210 5.83294359 -10.03967522 -9.59761019 -15.88867160 -7.13643475
[295] -7.29391701 -31.93027109 -15.56526408 13.22678162 -11.18996097 -25.00719650 -12.64524490
[302] -35.18133234 13.41791211 -10.87228845 -31.95732546 -18.32496165 -17.76804212 -14.54640557
[309] 21.57859526 8.22556833 -11.51111550 -7.19669828 -28.98417620 -8.16801696 -3.70794736
[316] -6.13751897 8.57292816 0.83289680 -11.25989874 11.25387495 -21.86409747 8.11413189
[323] -31.81200194 -12.67414744 -7.24837917 -9.76383734 -9.95068555 -17.43835347 -21.88852882
[330] -7.57724957 -39.97109963 -18.22405067 -3.35304894 -2.01422861 12.65855868 19.68084692
[337] -17.60471709 0.17064467 -2.41707219 -8.99719317 -6.91454803 6.10676201 -6.24933210
[344] -10.52672545 -8.28580388 -21.74741007 -20.10217608 -27.73408273 -15.76758951 -16.05537057
[351] 7.28801425 14.00094463 -32.21492728 -26.60734855 5.34256687 -2.73678515 -11.37626522
[358] -23.18199896 -18.58488442 -26.09162223 -8.11379506 -16.24314767 -7.63202196 -7.70819353
[365] 15.27344158 -6.89298171 -14.69008686 -10.61871285 -15.97716535 2.04244654 -20.72377059
[372] -3.33710996 -8.70719328 1.11210610 17.29240572 3.03254095 -0.17471362 -0.03964601
[379] -37.25886118 1.80635404 -18.13062850 -12.08353080 3.17736630 11.60663212 14.40230421
[386] -10.17057116 -11.45984282 -11.29130871 -15.60613249 3.94447987 21.39005539 38.66131508
[393] -21.77681110 12.35832123 20.97222557 -30.89221977 26.68094540 13.28471640 5.74956285
[400] -7.60656480 17.47154758 2.14422376 -25.38609321 -7.75483605 12.58051687 -0.12179751
[407] -17.52668230 1.92572455 -0.40532799 -11.06357792 -11.11874742 -10.35835894 -18.01080311
[414] -14.30821541 -10.88427284 3.35560021 -13.46512417 6.04047559 -14.68666762 1.68951313
[421] 4.31329242 7.50742041 -26.79115307 -32.01096580 28.21960400 3.60371811 14.05530287
[428] -23.35919604 4.65486577 -42.05164227 5.90447867 -22.06140361 -36.42899364 -37.85288612
[435] -7.63484440 -7.99917501 5.73410720 -17.24124704 25.18142781 -22.27250215 3.31690400
[442] -19.75974381 13.54264541 -18.86771849 -37.61540899 1.88862667 -10.85162959 -15.90454635
[449] -2.21038773 -1.64057323 -5.05984647 -11.33623396 -3.23993046 -7.07393112 -12.97757803
[456] 2.81189108 -14.48709633 -26.49136729 -27.92148265 -3.32407602 -32.81165304 12.60241883
[463] -19.77773032 -7.21862902 -30.05644966 -34.95205067 11.36207603 -1.88732702 -0.76534355
[470] -2.96997060 -26.06871359 -35.94190403 15.37335724 -29.72925340 -16.82830601 -32.14796579
[477] -7.49787668 -18.90903047 -4.95224428 -15.73171387 13.33385011 6.12192173 -12.11076372
[484] -18.62856464 -25.58573774 -11.07888550 -16.77332405 -0.02979250 0.28045921 6.30825053
[491] 2.66879530 -12.26308670 -3.34380103 0.57984817 -6.07862084 -0.47454107 0.18277721
[498] -1.49326079 -10.71424083 -18.55815468 -12.12523481 -10.32886876 1.21618496 -26.03417587
[505] 4.21652961 -7.19317598 -6.49276222 -24.15078462 -16.62200664 18.65907236 -14.55468004
[512] 12.03275099 13.63985493 1.94955005 4.24574372 -1.87352631 -9.90476053 4.92904187
[519] -27.58434557 -3.22307117 -17.00433045 -24.92647628 -9.86853681 -3.12201969 -3.23781294
[526] -44.00039083 -11.40638340 -10.42772214 11.56731373 34.18935566 0.22493781 -21.75192741
[533] -27.11548294 -26.69534077 11.86361692 -18.87587506 -19.76960989 -6.72305933 -16.37844743
[540] 28.42329924 -9.05042270 -5.94334753 0.80277394 -10.71835822 -7.39049563 -2.78087519
[547] -18.87000360 -2.56690042 -2.31189557 -15.13438755 2.47712830 -7.13675100 -0.16789790
[554] -17.69850124 -11.92887098 -7.41497303 -12.51272098 8.45361690 -30.63879338 -22.19654933
[561] 0.37289814 -27.30901665 -42.04696582 -12.04972054 -20.74642541 -7.29054494 -11.52182026
[568] 8.38372199 -18.57905859 7.62453900 -15.52892307 -1.83154260 -16.24286276 -5.74980635
[575] 5.80843070 5.89082648 -12.38211486 -30.94945977 -34.27141760 -16.68862227 -12.74228148
[582] -12.87197389 -27.14630137 -14.53291112 -8.24059195 -5.93523740 3.67320111 -14.73114416
[589] 7.12333661 -20.54493654 -16.23259278 -7.23953837 13.66853283 -13.35719133 -3.19469244
[596] -8.63453073 -4.38937208 -4.25683530 -3.01229288 -9.46716386 -14.49889042 -4.80320886
[603] -5.93786510 -5.58582436 -38.52292640 -7.31871320 -10.10261534 3.55750183 -0.27859803
[610] 13.47634673 -29.68104705 -4.28539812 3.67080445 -5.84424964 13.18400307 -7.89086592
[617] -1.33920025 -0.95308433 -29.91743460 -18.66903033 -20.15406028 -0.89348972 -12.41231804
[624] 8.08680667 -34.26161156 -33.98947051 -7.80666121 -10.20912967 -28.95896039 -5.89738802
[631] 2.32928389 -38.93521867 -15.56868160 -6.70412659 -2.57471692 2.46451660 -12.81558476
[638] 19.76309298 2.12194484 24.84734206 -13.55620029 -17.87794609 -19.54477100 -19.73182713
[645] -42.02480771 -13.42077241 9.19843679 -15.49829521 -35.16885995 -17.77559877 -2.28384147
[652] -17.68303368 6.17812431 0.66249967 5.00542555 -2.26766815 -11.73064973 -6.75727090
[659] -19.48957914 -28.88885682 -10.59583907 -32.20537872 -11.95661953 -23.77887711 -23.51551361
[666] -17.32443690 -10.86310515 -8.51040349 -27.57610667 -26.85875525 -3.70329222 -20.50863173
[673] -12.41671968 -5.15745402 -19.62849573 4.85082387 -29.88557391 -20.24914582 -27.73847105
[680] -0.12605203 -9.20839167 -51.56244236 3.17764075 13.96786787 -12.74398310 6.86534410
[687] -21.75000477 14.10169236 -24.69667641 -20.26619614 -11.67028168 -5.14496708 -21.84000650
[694] -34.30010114 -4.30214907 -15.81158253 -2.54412477 0.17601622 -22.22290730 -6.51460318
[701] -19.46561809 9.62212347 -5.62354822 16.70312068 -7.16879691 -5.77420998 -2.36157455
[708] -27.91638644 -12.61381331 7.45329002 -1.78749631 10.24888993 -2.76665687 -6.47189694
[715] -20.55376627 1.03372077 -6.75380336 -21.29024889 -16.12342903 -36.81337018 1.75482644
[722] -14.38944775 -4.27006397 -10.21581755 1.97016866 1.50969462 32.31451580 3.32233756
[729] -7.85868267 8.83356066 -3.54004596 -17.21481071 -29.58350979 -22.72248706 -27.86169027
[736] -20.25705972 -1.67627671 -23.02237081 -20.13752529 6.07661361 -0.84839297 19.31619624
[743] -13.32818441 -1.51206927 -16.05469364 -18.19320869 21.19248327 -26.85398142 2.82896396
[750] 1.90853566 -10.76451371 -0.16368097 -5.02703204 -23.15483742 -5.12822113 -4.84245502
[757] -19.16011286 -13.91801221 -11.66649472 15.04676653 -20.54651422 32.67150526 -11.37626079
[764] -14.75337241 -0.46891630 -12.09854921 -10.75658868 -18.03655441 10.95871312 27.68695247
[771] -1.22676076 -15.78897397 -13.68374038 16.98138996 -15.57048042 -17.53983895 -32.33929466
[778] -34.01869977 -2.46227514 -6.56500408 -17.04103052 -17.24440339 -6.68381805 -8.43674456
[785] -3.88372407 -29.28134174 -40.86613320 -18.64259952 -2.78880196 11.13938536 14.40875929
[792] -6.52858972 -38.92485161 -23.57441915 -25.59877817 19.51852353 -20.06650023 -4.32313864
[799] 4.62056706 -5.18094527 -0.76429848 -2.98392902 -18.50300318 -29.63778693 -23.63235389
[806] 5.68017122 14.33220812 -13.31317005 -10.81641168 5.22445430 -35.46250519 1.56327962
[813] -14.60867384 10.29147319 -13.28593538 6.63825469 -14.52606348 17.45903705 -38.05095694
[820] -13.93270232 -20.21993468 -1.96308711 1.80444271 12.16855255 9.52956342 -14.88194384
[827] -29.04193544 -24.04102844 -14.01878071 -2.03269506 2.67151865 -5.20017572 -41.14943705
[834] 8.96661691 28.12018815 -2.37196235 -13.46669223 -15.34687871 -19.92157033 16.10283716
[841] 5.71060454 -1.87210810 17.82634786 6.46299261 -23.56325888 -9.31538158 13.08900119
[848] 8.81863004 -16.87823373 2.57469446 -19.81326240 -1.08297141 -15.99656489 -12.78570251
[855] -8.53943328 -37.51286174 -5.80934175 29.94051347 -12.29916397 -0.84174744 -5.89053659
[862] -30.93593593 -6.24638974 6.71567898 0.33777483 -6.43007412 -6.10032287 -18.90969351
[869] 6.22885535 2.29565188 -25.72416278 -4.48305502 -5.77922453 -13.55585021 -23.84825362
[876] -14.65449874 -25.51320775 -35.73124575 2.27482359 -23.67720440 7.67981459 -30.63388731
[883] -2.12532769 -6.06248123 -14.67967251 2.92695069 -32.55242308 -19.80182640 -12.70340060
[890] -0.36473422 -24.33804299 -33.53505272 6.38777505 -7.65940679 -45.22813407 -5.91512961
[897] 4.82722129 -32.55034135 -5.64002137 -1.85377692 -24.82298659 -11.91899896 5.90226019
[904] -6.67799556 -18.39929702 -14.70709248 20.16465530 -2.37785503 -19.40544013 -43.64259489
[911] -4.80310727 -20.67267587 -6.12960286 14.76051916 -24.94995895 -21.55367734 3.51347606
[918] -21.82098554 4.68892318 22.32281743 3.01554647 -14.22391287 -28.44488042 0.32000549
[925] -26.29548705 -28.39677088 -6.06084948 -2.71491964 1.69227810 -8.71016310 -6.16547536
[932] -11.67413566 -11.59680714 9.40984647 -9.93669428 -17.84745893 2.04601218 -19.80104095
[939] -0.49341925 6.14760676 -22.21183010 13.50485022 -5.22057307 -17.82539558 -24.46518962
[946] 13.48666595 -11.80484500 -20.85173165 -31.08852302 -4.75860232 -36.88054918 2.98370161
[953] -0.37405972 17.58168372 0.48972051 9.79846880 -45.38152568 -25.01098967 -0.10839639
[960] 0.14270290 10.28628008 21.54101027 -17.49673999 -12.24470665 -11.99999059 -24.05651849
[967] -18.49661342 -12.30226946 -6.43942483 -23.07187196 -1.29013458 -15.53084359 0.86159642
[974] -11.26368153 -4.91450784 -9.73711163 -14.70304307 -4.39913543 -21.76712550 -7.49898974
[981] -25.17421015 -10.35273557 -9.64669400 -14.19622872 -13.91668603 -24.13258717 -15.08519499
[988] 1.35746984 10.40157841 -2.47480562 -35.30199191 -25.52554695 -2.31850569 -10.24616931
[995] -22.27223290 -4.57167529 -7.75456863 -2.13869306 -17.30982789 -24.04030147
> t.cal.rm <- mean(diff.s)/se(diff.s)
> t.cal.rm
[1] -19.82846
> p.val.rm <- pt(abs(t.cal.rm), length(s1)-1, lower.tail = F) * 2
> p.val.rm
[1] 4.836389e-74
> t.test(s1, s2, paired = T)
Paired t-test
data: s1 and s2
t = -19.828, df = 999, p-value < 2.2e-16
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
-10.073732 -8.259379
sample estimates:
mean difference
-9.166555
>
> # create multiple histogram
> s.all <- c(s1,s2)
> mean(s.all)
[1] 105.2971
> hist(s1, col='grey', breaks=50, xlim=c(50, 150))
> hist(s2, col='darkgreen', breaks=50, add=TRUE)
> abline(v=c(mean(s.all)),
+ col=c("red"), lwd=3)
> abline(v=c(mean(s1), mean(s2)),
+ col=c("black", "green"), lwd=3)
>
> comb <- data.frame(s1,s2)
> dat <- stack(comb)
> head(dat)
values ind
1 93.17788 s1
2 103.00254 s1
3 104.53388 s1
4 88.59698 s1
5 105.67789 s1
6 112.72657 s1
>
> m.tot <- mean(s.all)
> m.s1 <- mean(s1)
> m.s2 <- mean(s2)
>
> ss.tot <- ss(s.all)
> ss.s1 <- ss(s1)
> ss.s2 <- ss(s2)
>
> df.tot <- length(s.all)-1
> df.s1 <- length(s1)-1
> df.s2 <- length(s2)-1
>
> ms.tot <- var(s.all)
> ms.tot
[1] 125.5892
> ss.tot/df.tot
[1] 125.5892
>
> var(s1)
[1] 108.3973
> ss.s1 / df.s1
[1] 108.3973
>
> var(s2)
[1] 100.8519
> ss.s2 / df.s2
[1] 100.8519
>
> ss.b.s1 <- length(s1) * ((m.tot - m.s1)^2)
> ss.b.s2 <- length(s2) * ((m.tot - m.s1)^2)
> ss.bet <- ss.b.s1+ss.b.s2
> ss.bet
[1] 42012.87
>
> ss.wit <- ss.s1 + ss.s2
> ss.wit
[1] 209039.9
>
> ss.bet + ss.wit
[1] 251052.8
> ss.tot
[1] 251052.8
>
> library(dplyr)
다음의 패키지를 부착합니다: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
> # df.bet <- length(unique(dat)) - 1
> df.bet <- nlevels(dat$ind) - 1
> df.wit <- df.s1+df.s2
> df.bet
[1] 1
> df.wit
[1] 1998
> df.bet+df.wit
[1] 1999
> df.tot
[1] 1999
>
> ms.bet <- ss.bet / df.bet
> ms.wit <- ss.wit / df.wit
> ms.bet
[1] 42012.87
> ms.wit
[1] 104.6246
>
> f.cal <- ms.bet / ms.wit
> f.cal
[1] 401.5582
> pf(f.cal, df.bet, df.wit, lower.tail = F)
[1] 1.522061e-81
>
>
> f.test <- aov(dat$values~ dat$ind, data = dat)
> summary(f.test)
Df Sum Sq Mean Sq F value Pr(>F)
dat$ind 1 42013 42013 401.6 <2e-16 ***
Residuals 1998 209040 105
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
>
> sqrt(f.cal)
[1] 20.03892
> t.cal.ts
[1] -20.03892
>
> # this is anova after all.
>