r:confidence_interval
- code01
- output01
Loading...
> rnorm2 <- function(n,mean,sd) {
+ mean + sd * scale(rnorm(n))
+ }
>
> p1 <- rnorm2 (1000000, mean=70, sd=10)
>
> s.size <- 16
>
> means.temp <- c()
> s1 <- sample(p1, s.size, replace = T)
> mean(s1)
[1] 71.9093
> means.temp <- append(means.temp, mean(s1))
> means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
> means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
> means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
> means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
> means.temp
[1] 71.90930 73.23840 67.96878 68.24461 69.00211
>
> iter <- 1000000
> # means <- c()
> means <- rep(NA, iter)
> for (i in 1:iter) {
+ # the below method is slow
+ # means <- append(means, mean(sample(p1, s.size, replace = T)))
+ # so we use the means[] method.
+ means[i] <- mean(sample(p1, s.size, replace = T))
+ }
> length(means)
[1] 1000000
> m.sm <- mean(means)
> sd.sm <- sd(means)
> m.sm
[1] 69.99913
> sd.sm
[1] 2.498727
> se.s <- sd.sm
> # but, we already learned that
> # m.sm and sd.sm follow CLT
> m.sm2 <- mean(p1)
> sd.sm2 <- sqrt(var(p1)/s.size) # or
> sd(p1)/sqrt(s.size)
[1] 2.5
> m.sm2
[1] 70
> sd.sm2
[,1]
[1,] 2.5
> se.z <- sd.sm2
>
> hist(means, breaks=50,
+ xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)),
+ col=rgb(1, 1, 1, .5))
> abline(v=mean(p1), col="black", lwd=3)
> # now we want to get sd of this distribution
> lo1 <- mean(p1)-1*se.z
> hi1 <- mean(p1)+1*se.z
> lo2 <- mean(p1)-2*se.z
> hi2 <- mean(p1)+2*se.z
> lo3 <- mean(p1)-3*se.z
> hi3 <- mean(p1)+3*se.z
> abline(v=mean(p1), col="black", lwd=2)
> # abline(v=mean(p2), colo='darkgreen', lwd=2)
> abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3),
+ col=c("red","red", "blue", "blue", "orange", "orange"),
+ lwd=2)
>
> # 95% confidence interval
> # sample means will be found in
> # between blue vertical line
> # which is
> c(lo2, hi2)
[1] 65 75
>
> # textbook says
> # one sample statistics inferential to the pop
> # we do not know population parameter
> s.size <- 16
> set.seed(101)
> s1 <- sample(p1, s.size)
> mean(s1)
[1] 74.10868
> c(var(s1))
[1] 71.94214
>
> m.sm3 <- mean(s1) #
> se3 <- sqrt(var(s1)/s.size)
> m.sm3
[1] 74.10868
> se3
[1] 2.120468
>
> # with the above information
> # we infer that pop mean = m.s1s
> # se of sample means (n=s.size) = sd.s1s
> lohi <- c(m.sm3 - 2*se3, m.sm3 + 2*se3)
>
> curve(dnorm(x, m.sm3, se3),
+ from = m.sm3 - 4 * se3,
+ to = m.sm3 + 4*se3,
+ main = paste("CLT dist", "\n", "mean =",
+ round(m.sm3,2),
+ "\n",
+ "se = ",
+ round(se3,2)))
> abline(v=c(lohi))
> lohi
[1] 69.86774 78.34961
>
r/confidence_interval.txt · Last modified: by hkimscil


