rnorm2 <- function(n,mean,sd) { mean + sd * scale(rnorm(n)) } p1 <- rnorm2 (1000000, mean=70, sd=10) s.size <- 16 means.temp <- c() s1 <- sample(p1, s.size, replace = T) mean(s1) means.temp <- append(means.temp, mean(s1)) means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T))) means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T))) means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T))) means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T))) means.temp iter <- 1000000 # means <- c() means <- rep(NA, iter) for (i in 1:iter) { # the below method is slow # means <- append(means, mean(sample(p1, s.size, replace = T))) # so we use the means[] method. means[i] <- mean(sample(p1, s.size, replace = T)) } length(means) m.sm <- mean(means) sd.sm <- sd(means) m.sm sd.sm se.s <- sd.sm # but, we already learned that # m.sm and sd.sm follow CLT m.sm2 <- mean(p1) sd.sm2 <- sqrt(var(p1)/s.size) # or sd(p1)/sqrt(s.size) m.sm2 sd.sm2 se.z <- sd.sm2 hist(means, breaks=50, xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)), col=rgb(1, 1, 1, .5)) abline(v=mean(p1), col="black", lwd=3) # now we want to get sd of this distribution lo1 <- mean(p1)-1*se.z hi1 <- mean(p1)+1*se.z lo2 <- mean(p1)-2*se.z hi2 <- mean(p1)+2*se.z lo3 <- mean(p1)-3*se.z hi3 <- mean(p1)+3*se.z abline(v=mean(p1), col="black", lwd=2) # abline(v=mean(p2), colo='darkgreen', lwd=2) abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3), col=c("red","red", "blue", "blue", "orange", "orange"), lwd=2) # 95% confidence interval # sample means will be found in # between blue vertical line # which is c(lo2, hi2) # textbook says # one sample statistics inferential to the pop # we do not know population parameter s.size <- 16 set.seed(101) s1 <- sample(p1, s.size) mean(s1) c(var(s1)) m.sm3 <- mean(s1) # se3 <- sqrt(var(s1)/s.size) m.sm3 se3 # with the above information # we infer that pop mean = m.s1s # se of sample means (n=s.size) = sd.s1s lohi <- c(m.sm3 - 2*se3, m.sm3 + 2*se3) curve(dnorm(x, m.sm3, se3), from = m.sm3 - 4 * se3, to = m.sm3 + 4*se3, main = paste("CLT dist", "\n", "mean =", round(m.sm3,2), "\n", "se = ", round(se3,2))) abline(v=c(lohi)) lohi