summary_of_hypothesis_testing
Differences
This shows you the differences between two versions of the page.
| Next revision | Previous revision | ||
| summary_of_hypothesis_testing [2024/12/02 08:42] – created hkimscil | summary_of_hypothesis_testing [2025/09/13 21:45] (current) – [Hypothesis testing] hkimscil | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| + | ====== Hypothesis testing ====== | ||
| + | see also [[:types of error]] | ||
| + | ====== Basic ====== | ||
| + | see first [[:sampling distribution and z-test]] | ||
| + | |||
| + | ====== Hypothesis testing, exp ====== | ||
| + | < | ||
| + | rm(list=ls()) | ||
| + | |||
| + | rnorm2 <- function(n, | ||
| + | mean+sd*scale(rnorm(n)) | ||
| + | } | ||
| + | |||
| + | n.p <- 10000 | ||
| + | m.p <- 100 | ||
| + | sd.p <- 10 | ||
| + | p1 <- rnorm2(n.p, m.p, sd.p) | ||
| + | m.p1 <- mean(p1) | ||
| + | sd.p1 <- sd(p1) | ||
| + | |||
| + | p2 <- rnorm2(n.p, m.p+10, sd.p) | ||
| + | m.p2 <- mean(p2) | ||
| + | sd.p2 <- sd(p2) | ||
| + | |||
| + | n.s <- 100 | ||
| + | se.z1 <- c(sqrt(var(p1)/ | ||
| + | se.z2 <- c(sqrt(var(p2)/ | ||
| + | |||
| + | x.p1 <- seq(mean(p1)-5*se.z1, | ||
| + | mean(p2)+5*se.z1, | ||
| + | length.out = 500) | ||
| + | x.p2 <- seq(mean(p2)-5*se.z1, | ||
| + | mean(p2)+5*se.z1, | ||
| + | length.out = 500) | ||
| + | |||
| + | # Calculate the probability | ||
| + | # density for a normal distribution | ||
| + | y.p1 <- dnorm(x.p1, mean(p1), se.z1) | ||
| + | y.p2 <- dnorm(x.p2, mean(p2), se.z2) | ||
| + | |||
| + | # Plot the theoretical PDF | ||
| + | plot(x.p1, y.p1, type = " | ||
| + | | ||
| + | main = " | ||
| + | xlab = " | ||
| + | lines(x.p2, y.p2, lty=2, lwd=3, add=T) | ||
| + | |||
| + | |||
| + | m.p1 <- mean(p1) | ||
| + | se1 <- c(m.p1-se.z1, | ||
| + | se2 <- c(m.p1-2*se.z1, | ||
| + | se3 <- c(m.p1-3*se.z1, | ||
| + | abline(v=c(m.p1, | ||
| + | | ||
| + | ' | ||
| + | ' | ||
| + | | ||
| + | |||
| + | treated.s <- sample(p2, n.s) | ||
| + | m.treated.s <- mean(treated.s) | ||
| + | abline(v=m.treated.s, | ||
| + | |||
| + | se.z1 | ||
| + | |||
| + | diff <- m.treated.s-mean(p1) | ||
| + | diff/se.z1 | ||
| + | |||
| + | # usual way - using sample' | ||
| + | # instead of p1's variance to get | ||
| + | # standard error value | ||
| + | se.s <- sqrt(var(treated.s)/ | ||
| + | se.s | ||
| + | diff/se.s | ||
| + | |||
| + | pt(diff/ | ||
| + | t.test(treated.s, | ||
| + | |||
| + | </ | ||
| + | ===== output ===== | ||
| + | < | ||
| + | > | ||
| + | > | ||
| + | > rm(list=ls()) | ||
| + | > | ||
| + | > rnorm2 <- function(n, | ||
| + | + | ||
| + | + } | ||
| + | > | ||
| + | > n.p <- 10000 | ||
| + | > m.p <- 100 | ||
| + | > sd.p <- 10 | ||
| + | > p1 <- rnorm2(n.p, m.p, sd.p) | ||
| + | > m.p1 <- mean(p1) | ||
| + | > sd.p1 <- sd(p1) | ||
| + | > | ||
| + | > p2 <- rnorm2(n.p, m.p+10, sd.p) | ||
| + | > m.p2 <- mean(p2) | ||
| + | > sd.p2 <- sd(p2) | ||
| + | > | ||
| + | > n.s <- 100 | ||
| + | > se.z1 <- c(sqrt(var(p1)/ | ||
| + | > se.z2 <- c(sqrt(var(p2)/ | ||
| + | > | ||
| + | > x.p1 <- seq(mean(p1)-5*se.z1, | ||
| + | + | ||
| + | + | ||
| + | > x.p2 <- seq(mean(p2)-5*se.z1, | ||
| + | + | ||
| + | + | ||
| + | > | ||
| + | > # Calculate the probability | ||
| + | > # density for a normal distribution | ||
| + | > y.p1 <- dnorm(x.p1, mean(p1), se.z1) | ||
| + | > y.p2 <- dnorm(x.p2, mean(p2), se.z2) | ||
| + | > | ||
| + | > # Plot the theoretical PDF | ||
| + | > plot(x.p1, y.p1, type = " | ||
| + | + lwd=3, | ||
| + | + main = " | ||
| + | + xlab = " | ||
| + | > lines(x.p2, y.p2, lty=2, lwd=3) | ||
| + | > | ||
| + | > | ||
| + | > m.p1 <- mean(p1) | ||
| + | > se1 <- c(m.p1-se.z1, | ||
| + | > se2 <- c(m.p1-2*se.z1, | ||
| + | > se3 <- c(m.p1-3*se.z1, | ||
| + | > abline(v=c(m.p1, | ||
| + | + col=c(' | ||
| + | + ' | ||
| + | + ' | ||
| + | + lwd=1) | ||
| + | > | ||
| + | > treated.s <- sample(p2, n.s) | ||
| + | > m.treated.s <- mean(treated.s) | ||
| + | > abline(v=m.treated.s, | ||
| + | > | ||
| + | </ | ||
| + | {{: | ||
| + | |||
| + | < | ||
| + | > se.z1 | ||
| + | [1] 1 | ||
| + | > | ||
| + | > diff <- m.treated.s-mean(p1) | ||
| + | > diff/se.z1 | ||
| + | [1] 9.057418 | ||
| + | > | ||
| + | > # usual way - using sample' | ||
| + | > # instead of p1's variance to get | ||
| + | > # standard error value | ||
| + | > se.s <- sqrt(var(treated.s)/ | ||
| + | > se.s | ||
| + | [1] 1.015243 | ||
| + | > diff/se.s | ||
| + | [1] 8.921425 | ||
| + | > | ||
| + | > pt(diff/ | ||
| + | [1] 2.455388e-14 | ||
| + | > t.test(treated.s, | ||
| + | |||
| + | One Sample t-test | ||
| + | |||
| + | data: treated.s | ||
| + | t = 8.9214, df = 99, p-value = 2.455e-14 | ||
| + | alternative hypothesis: true mean is not equal to 100 | ||
| + | 95 percent confidence interval: | ||
| + | | ||
| + | sample estimates: | ||
| + | mean of x | ||
| + | | ||
| + | |||
| + | > | ||
| + | </ | ||
| + | ====== se value and sample size ====== | ||
| + | |||
| < | < | ||
| n.ajstu <- 100000 | n.ajstu <- 100000 | ||
| Line 109: | Line 285: | ||
| data.frame(cbind(sss, | data.frame(cbind(sss, | ||
| - | n <- 200 | + | # 12/2 lecture |
| + | # note that we draw the statistical calculation | ||
| + | # by " | ||
| + | n <- 80 | ||
| mean.sample <- 103 | mean.sample <- 103 | ||
| - | diff <- mean.sample - mean.ajstu | + | |
| - | se <- sd.ajstu / sqrt(n) | + | sample <- rnorm2(n, mean.sample, |
| - | diff/se | + | mean(sample) |
| + | sd(sample) | ||
| + | |||
| + | diff <- mean.sample - mean.ajstu | ||
| + | se <- sd.ajstu / sqrt(n) | ||
| + | t.cal <- diff/se | ||
| + | t.cal | ||
| qnorm(0.025, | qnorm(0.025, | ||
| qnorm(0.01/ | qnorm(0.01/ | ||
| + | qt(0.05/2, n-1, lower.tail=F) | ||
| + | |||
| + | t.test(sample, | ||
| + | # or we obtain the exact p value | ||
| + | p.value <- pt(t.cal, n-1, lower.tail = F) | ||
| + | p.value*2 | ||
| </ | </ | ||
summary_of_hypothesis_testing.1733096556.txt.gz · Last modified: by hkimscil
