COMMunication
RESearch.NET

This is an old revision of the document!
rnorm2 <- function(n,mean,sd) { 
  mean + sd * scale(rnorm(n)) 
}

p1 <- rnorm2 (1000000, mean=70, sd=10)

s.size <- 16

means.temp <- c()
s1 <- sample(p1, s.size, replace = T)
mean(s1)
means.temp <- append(means.temp, mean(s1))
means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
means.temp

iter <- 1000000
# means <- c()
means <- rep(NA, iter)
for (i in 1:iter) {
  # the below method is slow
  # means <- append(means, mean(sample(p1, s.size, replace = T))) 
  # so we use the means[] method.
  means[i] <- mean(sample(p1, s.size, replace = T))
}
length(means)
m.sm <- mean(means)
sd.sm <- sd(means)
m.sm
sd.sm
se.s <- sd.sm
# but, we already learned that 
# m.sm and sd.sm follow CLT
m.sm2 <- mean(p1)
sd.sm2 <- sqrt(var(p1)/s.size) # or 
sd(p1)/sqrt(s.size)
m.sm2
sd.sm2
se.z <- sd.sm2

hist(means, breaks=50, 
     xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)), 
     col=rgb(1, 1, 1, .5))
abline(v=mean(p1), col="black", lwd=3)
# now we want to get sd of this distribution
lo1 <- mean(p1)-1*se.z
hi1 <- mean(p1)+1*se.z
lo2 <- mean(p1)-2*se.z
hi2 <- mean(p1)+2*se.z
lo3 <- mean(p1)-3*se.z
hi3 <- mean(p1)+3*se.z
abline(v=mean(p1), col="black", lwd=2)
# abline(v=mean(p2), colo='darkgreen', lwd=2)
abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3), 
       col=c("red","red", "blue", "blue", "orange", "orange"), 
       lwd=2)

# 95% confidence interval 
# sample means will be found in 
# between blue vertical line
# which is 
c(lo2, hi2)

# textbook says
# one sample statistics inferential to the pop
# we do not know population parameter
s.size <- 16
set.seed(101)
s1 <- sample(p1, s.size)
mean(s1)
c(var(s1))

m.sm3 <- mean(s1) # 
se3 <- sqrt(var(s1)/s.size)
m.sm3
se3

# with the above information
# we infer that pop mean = m.s1s
# se of sample means (n=s.size) = sd.s1s
lohi <- c(m.sm3 - 2*se3, m.sm3 + 2*se3)

curve(dnorm(x, m.sm3, se3),
      from = m.sm3 - 4 * se3, 
      to = m.sm3 + 4*se3,
      main = paste("CLT dist", "\n", "mean =", 
                   round(m.sm3,2), 
                   "\n", 
                   "se = ", 
                   round(se3,2)))
abline(v=c(lohi))
lohi