> rm(list=ls()) > > rnorm2 <- function(n,mean,sd){ + mean+sd*scale(rnorm(n)) + } > > n.p <- 100000 > m.p <- 100 > sd.p <- 10 > p1 <- rnorm2(n.p, m.p, sd.p) > m.p1 <- mean(p1) > sd.p1 <- sd(p1) > > p2 <- rnorm2(n.p, m.p+4, sd.p) > m.p2 <- mean(p2) > sd.p2 <- sd(p2) > > n.s <- 25 > se.z1 <- c(sqrt(var(p1)/n.s)) > se.z2 <- c(sqrt(var(p2)/n.s)) > se.z1 [1] 2 > se.z2 [1] 2 > > x.p1 <- seq(mean(p1)-5*se.z1, + mean(p2)+5*se.z1, + length.out = 500) > x.p2 <- seq(mean(p2)-5*se.z1, + mean(p2)+5*se.z1, + length.out = 500) > > # Calculate the probability > # density for a normal distribution > y.p1 <- dnorm(x.p1, mean(p1), se.z1) > y.p2 <- dnorm(x.p2, mean(p2), se.z2) > > # Plot the theoretical PDF > plot(x.p1, y.p1, type = "l", + lwd=3, + main = paste0("sample (n = ",n.s, ") means from p1 and p2 (unknown)"), + xlab = "Value", ylab = "Density") > lines(x.p2, y.p2, lty=2, lwd=3) > > > m.p1 <- mean(p1) > se1 <- c(m.p1-se.z1, m.p1+se.z1) > se2 <- c(m.p1-2*se.z1, m.p1+2*se.z1) > se3 <- c(m.p1-3*se.z1, m.p1+3*se.z1) > abline(v=c(m.p1,se1,se2,se3), + col=c('black', 'orange', 'orange', + 'green', 'green', + 'blue', 'blue'), + lwd=1) > > treated.s <- sample(p2, n.s) > m.treated.s <- mean(treated.s) > abline(v=m.treated.s, col='red', lwd=2) > > diff <- m.treated.s-mean(p1) > diff/se.z1 [1] 2.756739 > zscore <- abs(diff/se.z1) > pnorm(zscore, lower.tail = F)*2 [1] 0.005838089 > tscore <- zscore > pt(tscore, df=n.s-1, lower.tail = F)*2 [1] 0.01097571 > > # usual way - using sample's variance > # instead of p1's variance to get > # standard error value > se.s <- sqrt(var(treated.s)/n.s) > se.s [1] 2.046128 > tscore <- diff/se.s > tscore [1] 2.694592 > > > se1 <- c(m.p1-se.s, m.p1+se.s) > se2 <- c(m.p1-2*se.s, m.p1+2*se.s) > se3 <- c(m.p1-3*se.s, m.p1+3*se.s) > abline(v=c(se1,se2,se3), + col=c('darkorange', 'darkorange', + 'darkgreen', 'darkgreen', + 'darkblue', 'darkblue'), + lwd=2) > > > > > plot(x.p1, y.p1, type = "l", + lwd=3, + main = "Sample means from p1 and p2 (imaginary)", + xlab = "Value", ylab = "Density") > lines(x.p2, y.p2, lty=2, lwd=3) > > > m.p1 <- mean(p1) > se1 <- c(m.p1-se.s, m.p1+se.s) > se2 <- c(m.p1-2*se.s, m.p1+2*se.s) > se3 <- c(m.p1-3*se.s, m.p1+3*se.s) > abline(v=c(m.p1,se1,se2,se3), + col=c('black', 'darkorange', 'darkorange', + 'darkgreen', 'darkgreen', + 'darkblue', 'darkblue'), + lwd=2) > abline(v=m.treated.s, col='red', lwd=3) > se.s [1] 2.046128 > se.z1 [1] 2 > > c(m.treated.s-2*se.s, m.treated.s+2*se.s) [1] 101.4212 109.6057 > c <- qt(0.975, n.s-1) > c [1] 2.063899 > c(m.treated.s-c*se.s, m.treated.s+c*se.s) [1] 101.2905 109.7365 > m.p2 [1] 104 > > x.p.est <- seq(m.treated.s-5*se.s, + m.treated.s+5*se.z1, + length.out = 500) > y.p.est <- dnorm(x.p.est, m.treated.s, se.s) > > plot(x.p.est, y.p.est, type = "l", + lwd=3, + main = paste0("mu (", m.p2, ") estimation \n", "from a sample ", "mean=", round(m.treated.s,2)), + xlab = paste0("Value"), ylab = "Density") > se1 <- c(m.treated.s-se.s, m.treated.s+se.s) > se2 <- c(m.treated.s-2*se.s, m.treated.s+2*se.s) > se3 <- c(m.treated.s-3*se.s, m.treated.s+3*se.s) > abline(v=c(m.treated.s,se1,se2,se3), + col=c('black', 'darkorange', 'darkorange', + 'darkgreen', 'darkgreen', + 'darkblue', 'darkblue'), + lwd=2) > > tscore <- abs(diff/se.s) > pt(tscore, df=n.s-1, lower.tail = F) * 2 [1] 0.01266267 > t.test(treated.s, mu=m.p1, var.equal = T) One Sample t-test data: treated.s t = 2.6946, df = 24, p-value = 0.01266 alternative hypothesis: true mean is not equal to 100 95 percent confidence interval: 101.2905 109.7365 sample estimates: mean of x 105.5135 > {{.:pasted:20251130-225514.png}} {{.:pasted:20251130-225520.png}} {{.:pasted:20251130-225528.png}}