User Tools

Site Tools


r:confidence_interval

  • code01
  • output01
Loading...
> rnorm2 <- function(n,mean,sd) { 
+   mean + sd * scale(rnorm(n)) 
+ }
> 
> p1 <- rnorm2 (1000000, mean=70, sd=10)
> 
> s.size <- 16
> 
> means.temp <- c()
> s1 <- sample(p1, s.size, replace = T)
> mean(s1)
[1] 71.9093
> means.temp <- append(means.temp, mean(s1))
> means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
> means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
> means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
> means.temp <- append(means.temp, mean(sample(p1, s.size, replace = T)))
> means.temp
[1] 71.90930 73.23840 67.96878 68.24461 69.00211
> 
> iter <- 1000000
> # means <- c()
> means <- rep(NA, iter)
> for (i in 1:iter) {
+   # the below method is slow
+   # means <- append(means, mean(sample(p1, s.size, replace = T))) 
+   # so we use the means[] method.
+   means[i] <- mean(sample(p1, s.size, replace = T))
+ }
> length(means)
[1] 1000000
> m.sm <- mean(means)
> sd.sm <- sd(means)
> m.sm
[1] 69.99913
> sd.sm
[1] 2.498727
> se.s <- sd.sm
> # but, we already learned that 
> # m.sm and sd.sm follow CLT
> m.sm2 <- mean(p1)
> sd.sm2 <- sqrt(var(p1)/s.size) # or 
> sd(p1)/sqrt(s.size)
[1] 2.5
> m.sm2
[1] 70
> sd.sm2
     [,1]
[1,]  2.5
> se.z <- sd.sm2
> 
> hist(means, breaks=50, 
+      xlim = c(mean(means)-5*sd(means), mean(means)+10*sd(means)), 
+      col=rgb(1, 1, 1, .5))
> abline(v=mean(p1), col="black", lwd=3)
> # now we want to get sd of this distribution
> lo1 <- mean(p1)-1*se.z
> hi1 <- mean(p1)+1*se.z
> lo2 <- mean(p1)-2*se.z
> hi2 <- mean(p1)+2*se.z
> lo3 <- mean(p1)-3*se.z
> hi3 <- mean(p1)+3*se.z
> abline(v=mean(p1), col="black", lwd=2)
> # abline(v=mean(p2), colo='darkgreen', lwd=2)
> abline(v=c(lo1, hi1, lo2, hi2, lo3, hi3), 
+        col=c("red","red", "blue", "blue", "orange", "orange"), 
+        lwd=2)
> 
> # 95% confidence interval 
> # sample means will be found in 
> # between blue vertical line
> # which is 
> c(lo2, hi2)
[1] 65 75
> 
> # textbook says
> # one sample statistics inferential to the pop
> # we do not know population parameter
> s.size <- 16
> set.seed(101)
> s1 <- sample(p1, s.size)
> mean(s1)
[1] 74.10868
> c(var(s1))
[1] 71.94214
> 
> m.sm3 <- mean(s1) # 
> se3 <- sqrt(var(s1)/s.size)
> m.sm3
[1] 74.10868
> se3
[1] 2.120468
> 
> # with the above information
> # we infer that pop mean = m.s1s
> # se of sample means (n=s.size) = sd.s1s
> lohi <- c(m.sm3 - 2*se3, m.sm3 + 2*se3)
> 
> curve(dnorm(x, m.sm3, se3),
+       from = m.sm3 - 4 * se3, 
+       to = m.sm3 + 4*se3,
+       main = paste("CLT dist", "\n", "mean =", 
+                    round(m.sm3,2), 
+                    "\n", 
+                    "se = ", 
+                    round(se3,2)))
> abline(v=c(lohi))
> lohi
[1] 69.86774 78.34961
> 

r/confidence_interval.txt · Last modified: by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki