sampling_distribution_in_r
This is an old revision of the document!
Sampling distribution in R e.g. 1
n.ajstu <- 100000 mean.ajstu <- 70 sd.ajstu <- 10 set.seed(1024) ajstu <- rnorm2(n.ajstu, mean=mean.ajstu, sd=sd.ajstu) mean(ajstu) sd(ajstu) iter <- 10000 n <- 4 means4 <- rep (NA, iter) for(i in 1:iter){ means4[i] = mean(sample(ajstu, n)) } n <- 25 means25 <- rep (NA, iter) for(i in 1:iter){ means25[i] = mean(sample(ajstu, n)) } n <- 100 means100 <- rep (NA, iter) for(i in 1:iter){ means100[i] = mean(sample(ajstu, n)) } n <- 400 means400 <- rep (NA, iter) for(i in 1:iter){ means400[i] = mean(sample(ajstu, n)) } n <- 900 means900 <- rep (NA, iter) for(i in 1:iter){ means900[i] = mean(sample(ajstu, n)) } n <- 1600 means1600 <- rep (NA, iter) for(i in 1:iter){ means1600[i] = mean(sample(ajstu, n)) } n <- 2500 means2500 <- rep (NA, iter) for(i in 1:iter){ means2500[i] = mean(sample(ajstu, n)) } h4 <- hist(means4) h25 <- hist(means25) h100 <- hist(means100) h400 <- hist(means400) h900 <- hist(means900) h1600 <- hist(means1600) h2500 <- hist(means2500) plot(h4, ylim=c(0,3000), col="red") plot(h25, add = T, col="blue") plot(h100, add = T, col="green") plot(h400, add = T, col="grey") plot(h900, add = T, col="yellow") se4 <- sqrt(var(ajstu)/4) se25 <- sqrt(var(ajstu)/25) se100 <- sqrt(var(ajstu)/100) se400 <- sqrt(var(ajstu)/400) se900 <- sqrt(var(ajstu)/900) se1600 <- sqrt(var(ajstu)/1600) se2500 <- sqrt(var(ajstu)/2500) sss <- c(4,25,100,400,900,1600,2500) ses <- rep (NA, length(sss)) for(i in 1:length(sss)){ ses[i] = sqrt(var(ajstu)/sss[i]) } se.1 <- ses se.2 <- 2*ses lower.part.2 <- mean(ajstu)-se.2 upper.part.2 <- mean(ajstu)+se.2 data.frame(cbind(lower.part.2, upper.part.2))
Sampling distribution in proportion in R
pop <- rbinom(100000, size = 1, prob = 0.5) par(mfrow=c(2,2)) iter <- 10000 n <- 5 means <- rep (NA, iter) for(i in 1:iter){ means[i] = mean(sample(pop, n)) } mean(means) hist(means, xlim=c(0,1), main=n) iter <- 10000 n <- 25 means <- rep (NA, iter) for(i in 1:iter){ means[i] = mean(sample(pop, n)) } mean(means) hist(means, xlim=c(0,1), main=n) iter <- 10000 n <- 100 means <- rep (NA, iter) for(i in 1:iter){ means[i] = mean(sample(pop, n)) } mean(means) hist(means, xlim=c(0,1), main=n) iter <- 10000 n <- 900 means <- rep (NA, iter) for(i in 1:iter){ means[i] = mean(sample(pop, n)) } mean(means) sd(means) var(means) hist(means, xlim=c(0,1), main=n) par(mfrow=c(1,1))
set.seed(2020) pop <- rbinom(100000, size = 1, prob = 0.4) par(mfrow=c(2,2)) iter <- 1000 ns <- c(25, 100, 400, 900) l.ns <- length(ns) for (i in 1:l.ns) { for(k in 1:iter) { means[k] = mean(sample(pop, ns[i])) } mean(means) sd(means) hist(means, xlim=c(0,1), main=n) } par(mfrow=c(1,1))
0.5가 비율인 (proportion) 모집단에 대한 여론 조사를 위해서 900명의 샘플을 취하고 이를 이용하여 모집단의 위치를 추정하자.
n <- 900 samp <- sample(pop, n) mean(samp) p <- mean(samp) q <- 1-p ser <- sqrt((p*q)/n) ser2 <- ser * 2 p - ser2 p + ser2
sampling_distribution_in_r.1710890599.txt.gz · Last modified: 2024/03/20 08:23 by hkimscil