r:confidence_interval:code01
This is an old revision of the document!
rnorm2 <- function(n,mean,sd) {
mean + sd * scale(rnorm(n))
}
p1 <- rnorm2 (1000000, mean=70, sd=10)
s.size <- 16
means.temp <- c()
s1 <- sample(p1, s.size, replace = T)
mean(s1)
means.temp <- append(means.temp, mean(s1))
means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
means.temp
iter <- 1000000
# means <- c()
means <- rep(NA, iter)
for (i in 1:iter) {
# the below method is slow
# means <- append(means, mean(sample(p1, s.size, replace = T)))
# so we use the means[] method.
means[i] <- mean(sample(p1, s.size, replace = T))
}
length(means)
m.sm <- mean(means)
sd.sm <- sd(means)
m.sm
sd.sm
se.s <- sd.sm
# but, we already learned that
# m.sm and sd.sm follow CLT
m.sm2 <- mean(p1)
sd.sm2 <- sqrt(var(p1)/s.size) # or
sd(p1)/sqrt(s.size)
m.sm2
sd.sm2
se.z <- sd.sm2
hist(means, breaks=50,
xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)),
col=rgb(1, 1, 1, .5))
abline(v=mean(p1), col="black", lwd=3)
# now we want to get sd of this distribution
lo1 <- mean(p1)-1*se.z
hi1 <- mean(p1)+1*se.z
lo2 <- mean(p1)-2*se.z
hi2 <- mean(p1)+2*se.z
lo3 <- mean(p1)-3*se.z
hi3 <- mean(p1)+3*se.z
abline(v=mean(p1), col="black", lwd=2)
# abline(v=mean(p2), colo='darkgreen', lwd=2)
abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3),
col=c("red","red", "blue", "blue", "orange", "orange"),
lwd=2)
# 95% confidence interval
# sample means will be found in
# between blue vertical line
# which is
c(lo2, hi2)
# textbook says
# one sample statistics inferential to the pop
# we do not know population parameter
s.size <- 16
set.seed(101)
s1 <- sample(p1, s.size)
mean(s1)
c(var(s1))
m.sm3 <- mean(s1) #
se3 <- sqrt(var(s1)/s.size)
m.sm3
se3
# with the above information
# we infer that pop mean = m.s1s
# se of sample means (n=s.size) = sd.s1s
lohi <- c(m.sm3 - 2*se3, m.sm3 + 2*se3)
curve(dnorm(x, m.sm3, se3),
from = m.sm3 - 4 * se3,
to = m.sm3 + 4*se3,
main = paste("CLT dist", "\n", "mean =",
round(m.sm3,2),
"\n",
"se = ",
round(se3,2)))
abline(v=c(lohi))
lohi
r/confidence_interval/code01.1763507668.txt.gz · Last modified: by hkimscil
