User Tools

Site Tools


summary_of_hypothesis_testing

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Next revision
Previous revision
summary_of_hypothesis_testing [2024/12/02 08:42] – created hkimscilsummary_of_hypothesis_testing [2025/09/13 21:45] (current) – [Hypothesis testing] hkimscil
Line 1: Line 1:
 +====== Hypothesis testing ======
 +see also [[:types of error]]
 +====== Basic ======
 +see first [[:sampling distribution and z-test]]
 +
 +====== Hypothesis testing, exp ======
 +<code>
 +rm(list=ls())
 +
 +rnorm2 <- function(n,mean,sd){ 
 +  mean+sd*scale(rnorm(n)) 
 +}
 +
 +n.p <- 10000
 +m.p <- 100
 +sd.p <- 10
 +p1 <- rnorm2(n.p, m.p, sd.p)
 +m.p1 <- mean(p1)
 +sd.p1 <- sd(p1)
 +
 +p2 <- rnorm2(n.p, m.p+10, sd.p)
 +m.p2 <- mean(p2)
 +sd.p2 <- sd(p2)
 +
 +n.s <- 100
 +se.z1 <- c(sqrt(var(p1)/n.s))
 +se.z2 <- c(sqrt(var(p2)/n.s))
 +
 +x.p1 <- seq(mean(p1)-5*se.z1, 
 +                mean(p2)+5*se.z1, 
 +                length.out = 500)
 +x.p2 <- seq(mean(p2)-5*se.z1, 
 +            mean(p2)+5*se.z1, 
 +            length.out = 500)
 +
 +# Calculate the probability 
 +# density for a normal distribution
 +y.p1 <- dnorm(x.p1, mean(p1), se.z1)
 +y.p2 <- dnorm(x.p2, mean(p2), se.z2)
 +
 +# Plot the theoretical PDF
 +plot(x.p1, y.p1, type = "l", 
 +     lwd=3, 
 +     main = "Sample means from p1 and p2 (imaginary)",
 +     xlab = "Value", ylab = "Density")
 +lines(x.p2, y.p2, lty=2, lwd=3, add=T)
 +
 +
 +m.p1 <- mean(p1)
 +se1 <- c(m.p1-se.z1, m.p1+se.z1)
 +se2 <- c(m.p1-2*se.z1, m.p1+2*se.z1)
 +se3 <- c(m.p1-3*se.z1, m.p1+3*se.z1)
 +abline(v=c(m.p1,se1,se2,se3), 
 +       col=c('black', 'orange', 'orange', 
 +             'green', 'green', 
 +             'blue', 'blue'), 
 +       lwd=1)
 +
 +treated.s <- sample(p2, n.s)
 +m.treated.s <- mean(treated.s)
 +abline(v=m.treated.s, col='red', lwd=2)
 +
 +se.z1
 +
 +diff <- m.treated.s-mean(p1)
 +diff/se.z1
 +
 +# usual way - using sample's variance 
 +# instead of p1's variance to get
 +# standard error value
 +se.s <- sqrt(var(treated.s)/n.s)
 +se.s
 +diff/se.s
 +
 +pt(diff/se.s, df=n.s-1, lower.tail = F) * 2
 +t.test(treated.s, mu=m.p1, var.equal = T)
 +
 +</code>
 +===== output =====
 +<code>
 +
 +
 +> rm(list=ls())
 +
 +> rnorm2 <- function(n,mean,sd){ 
 ++   mean+sd*scale(rnorm(n)) 
 ++ }
 +
 +> n.p <- 10000
 +> m.p <- 100
 +> sd.p <- 10
 +> p1 <- rnorm2(n.p, m.p, sd.p)
 +> m.p1 <- mean(p1)
 +> sd.p1 <- sd(p1)
 +
 +> p2 <- rnorm2(n.p, m.p+10, sd.p)
 +> m.p2 <- mean(p2)
 +> sd.p2 <- sd(p2)
 +
 +> n.s <- 100
 +> se.z1 <- c(sqrt(var(p1)/n.s))
 +> se.z2 <- c(sqrt(var(p2)/n.s))
 +
 +> x.p1 <- seq(mean(p1)-5*se.z1, 
 ++                 mean(p2)+5*se.z1, 
 ++                 length.out = 500)
 +> x.p2 <- seq(mean(p2)-5*se.z1, 
 ++             mean(p2)+5*se.z1, 
 ++             length.out = 500)
 +
 +> # Calculate the probability 
 +> # density for a normal distribution
 +> y.p1 <- dnorm(x.p1, mean(p1), se.z1)
 +> y.p2 <- dnorm(x.p2, mean(p2), se.z2)
 +
 +> # Plot the theoretical PDF
 +> plot(x.p1, y.p1, type = "l", 
 ++      lwd=3, 
 ++      main = "Sample means from p1 and p2 (imaginary)",
 ++      xlab = "Value", ylab = "Density")
 +> lines(x.p2, y.p2, lty=2, lwd=3)
 +
 +
 +> m.p1 <- mean(p1)
 +> se1 <- c(m.p1-se.z1, m.p1+se.z1)
 +> se2 <- c(m.p1-2*se.z1, m.p1+2*se.z1)
 +> se3 <- c(m.p1-3*se.z1, m.p1+3*se.z1)
 +> abline(v=c(m.p1,se1,se2,se3), 
 ++        col=c('black', 'orange', 'orange', 
 ++              'green', 'green', 
 ++              'blue', 'blue'), 
 ++        lwd=1)
 +
 +> treated.s <- sample(p2, n.s)
 +> m.treated.s <- mean(treated.s)
 +> abline(v=m.treated.s, col='red', lwd=2)
 +
 +</code>
 +{{:pasted:20250913-184943.png}}
 +
 +<code>
 +> se.z1
 +[1] 1
 +
 +> diff <- m.treated.s-mean(p1)
 +> diff/se.z1
 +[1] 9.057418
 +
 +> # usual way - using sample's variance 
 +> # instead of p1's variance to get
 +> # standard error value
 +> se.s <- sqrt(var(treated.s)/n.s)
 +> se.s
 +[1] 1.015243
 +> diff/se.s
 +[1] 8.921425
 +
 +> pt(diff/se.s, df=n.s-1, lower.tail = F) * 2
 +[1] 2.455388e-14
 +> t.test(treated.s, mu=m.p1, var.equal = T)
 +
 + One Sample t-test
 +
 +data:  treated.s
 +t = 8.9214, df = 99, p-value = 2.455e-14
 +alternative hypothesis: true mean is not equal to 100
 +95 percent confidence interval:
 + 107.0430 111.0719
 +sample estimates:
 +mean of x 
 + 109.0574 
 +
 +
 +</code>
 +====== se value and sample size ======
 +
 <code> <code>
 n.ajstu <- 100000 n.ajstu <- 100000
Line 109: Line 285:
 data.frame(cbind(sss, ses, lower.s2, upper.s2)) data.frame(cbind(sss, ses, lower.s2, upper.s2))
  
-n <- 200+# 12/2 lecture  
 +# note that we draw the statistical calculation  
 +# by "diff/se" = "diff/random_error" 
 +n <- 80
 mean.sample <- 103 mean.sample <- 103
-diff <- mean.sample - mean.ajstu  + 
-se <- sd.ajstu / sqrt(n) +sample <- rnorm2(n, mean.sample, sd.ajstu) 
-diff/se+mean(sample) 
 +sd(sample) 
 + 
 +diff <- mean.sample - mean.ajstu # this is actual difference 
 +se <- sd.ajstu / sqrt(n) # this is random error  
 +t.cal <- diff/se 
 +t.cal
 qnorm(0.025, lower.tail = F) qnorm(0.025, lower.tail = F)
 qnorm(0.01/2, lower.tail = F) qnorm(0.01/2, lower.tail = F)
 +qt(0.05/2, n-1, lower.tail=F)
 +
 +t.test(sample, mu=mean.ajstu)
  
 +# or we obtain the exact p value
 +p.value <- pt(t.cal, n-1, lower.tail = F)
 +p.value*2
  
  
  
 </code> </code>
summary_of_hypothesis_testing.1733096556.txt.gz · Last modified: 2024/12/02 08:42 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki