note.w02
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revision | |||
| note.w02 [2025/09/18 10:40] – [output] hkimscil | note.w02 [2025/09/18 10:40] (current) – [T-test sum up] hkimscil | ||
|---|---|---|---|
| Line 1004: | Line 1004: | ||
| </ | </ | ||
| </ | </ | ||
| - | |||
| - | ====== T-test sum up ====== | ||
| - | < | ||
| - | |||
| - | rm(list=ls()) | ||
| - | rnorm2 <- function(n, | ||
| - | mean+sd*scale(rnorm(n)) | ||
| - | } | ||
| - | se <- function(sample) { | ||
| - | sd(sample)/ | ||
| - | } | ||
| - | ss <- function(x) { | ||
| - | sum((x-mean(x))^2) | ||
| - | } | ||
| - | |||
| - | N.p <- 1000000 | ||
| - | m.p <- 100 | ||
| - | sd.p <- 10 | ||
| - | |||
| - | |||
| - | p1 <- rnorm2(N.p, m.p, sd.p) | ||
| - | mean(p1) | ||
| - | sd(p1) | ||
| - | |||
| - | p2 <- rnorm2(N.p, m.p+10, sd.p) | ||
| - | mean(p2) | ||
| - | sd(p2) | ||
| - | |||
| - | hist(p1, breaks=50, col = rgb(1, 0, 0, 0.5), | ||
| - | main = " | ||
| - | abline(v=mean(p1), | ||
| - | hist(p2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5)) | ||
| - | abline(v=mean(p2), | ||
| - | |||
| - | s.size <- 1000 | ||
| - | s2 <- sample(p2, s.size) | ||
| - | mean(s2) | ||
| - | sd(s2) | ||
| - | |||
| - | se.z <- sqrt(var(p1)/ | ||
| - | se.z <- c(se.z) | ||
| - | se.z.range <- c(-2*se.z, | ||
| - | se.z.range | ||
| - | |||
| - | mean(p1)+se.z.range | ||
| - | mean(s2) | ||
| - | |||
| - | z.cal <- (mean(s2) - mean(p1)) / se.z | ||
| - | z.cal | ||
| - | pnorm(z.cal, | ||
| - | |||
| - | z.cal | ||
| - | |||
| - | # principles . . . | ||
| - | # distribution of sample means | ||
| - | iter <- 100000 | ||
| - | means <- c() | ||
| - | for (i in 1:iter) { | ||
| - | m.of.s <- mean(sample(p1, | ||
| - | means <- append(means, | ||
| - | } | ||
| - | |||
| - | hist(means, | ||
| - | xlim = c(mean(means)-3*sd(means), | ||
| - | col = rgb(1, 0, 0, 0.5)) | ||
| - | abline(v=mean(p1), | ||
| - | abline(v=mean(s2), | ||
| - | | ||
| - | lo1 <- mean(p1)-se.z | ||
| - | hi1 <- mean(p1)+se.z | ||
| - | lo2 <- mean(p1)-2*se.z | ||
| - | hi2 <- mean(p1)+2*se.z | ||
| - | abline(v=c(lo1, | ||
| - | | ||
| - | | ||
| - | se.z | ||
| - | c(lo2, hi2) | ||
| - | pnorm(z.cal, | ||
| - | |||
| - | |||
| - | # Note that we use sqrt(var(s2)/ | ||
| - | # as our se value instread of | ||
| - | # sqrt(var(p1)/ | ||
| - | # This is a common practice for R | ||
| - | # In fact, some z.test (made by someone) | ||
| - | # function uses the former rather than | ||
| - | # latter. | ||
| - | |||
| - | sqrt(var(p1)/ | ||
| - | se.z | ||
| - | |||
| - | sqrt(var(s2)/ | ||
| - | se(s2) | ||
| - | |||
| - | t.cal.os <- (mean(s2) - mean(p1))/ se(s2) | ||
| - | z.cal <- (mean(s2) - mean(p1))/ se.z | ||
| - | t.cal.os | ||
| - | z.cal | ||
| - | |||
| - | df.s2 <- length(s2)-1 | ||
| - | df.s2 | ||
| - | p.t.os <- pt(abs(t.cal.os), | ||
| - | p.t.os | ||
| - | t.out <- t.test(s2, mu=mean(p1)) | ||
| - | |||
| - | library(BSDA) | ||
| - | z.out <- z.test(s2, p1, sigma.x = sd(s2), sigma.y = sd(p1)) | ||
| - | |||
| - | z.out$statistic # se.z 대신에 se(s2) 값으로 구한 z 값 | ||
| - | z.cal # se.z으로 (sqrt(var(p1)/ | ||
| - | |||
| - | t.out$statistic # se(s2)를 분모로 하여 구한 t 값 | ||
| - | t.cal.os # se(s2)를 이용하여 손으로 구한 t 값 | ||
| - | |||
| - | # But, after all, we use t.test method regardless of | ||
| - | # variation | ||
| - | |||
| - | hist(means, | ||
| - | xlim = c(mean(means)-3*sd(means), | ||
| - | col = rgb(1, 0, 0, 0.5)) | ||
| - | abline(v=mean(p1), | ||
| - | abline(v=mean(s2), | ||
| - | | ||
| - | lo1 <- mean(p1)-se.z | ||
| - | hi1 <- mean(p1)+se.z | ||
| - | lo2 <- mean(p1)-2*se.z | ||
| - | hi2 <- mean(p1)+2*se.z | ||
| - | abline(v=c(lo1, | ||
| - | | ||
| - | | ||
| - | |||
| - | # difference between black and blue line | ||
| - | # divided by | ||
| - | # se(s2) (= random difference) | ||
| - | # t.value | ||
| - | mean(s2) | ||
| - | mean(p1) | ||
| - | diff <- mean(s2)-mean(p1) | ||
| - | diff | ||
| - | se(s2) | ||
| - | diff/se(s2) | ||
| - | |||
| - | t.cal.os | ||
| - | |||
| - | ######################## | ||
| - | # 2 sample t-test | ||
| - | ######################## | ||
| - | # 가정. 아래에서 추출하는 두 | ||
| - | # 샘플의 모집단의 파라미터를 | ||
| - | # 모른다. | ||
| - | s1 <- sample(p1, s.size) | ||
| - | s2 <- sample(p2, s.size) | ||
| - | |||
| - | mean(s1) | ||
| - | mean(s2) | ||
| - | ss(s1) | ||
| - | ss(s2) | ||
| - | df.s1 <- length(s1)-1 | ||
| - | df.s2 <- length(s2)-1 | ||
| - | df.s1 | ||
| - | df.s2 | ||
| - | |||
| - | pooled.variance <- (ss(s1)+ss(s2))/ | ||
| - | pooled.variance | ||
| - | se.ts <- sqrt((pooled.variance/ | ||
| - | se.ts | ||
| - | t.cal.ts <- (mean(s1)-mean(s2))/ | ||
| - | t.cal.ts | ||
| - | p.val.ts <- pt(abs(t.cal.ts), | ||
| - | p.val.ts | ||
| - | |||
| - | t.test(s1, s2, var.equal = T) | ||
| - | |||
| - | se(s1) | ||
| - | se(s2) | ||
| - | |||
| - | mean(s1)+c(-se(s1)*2, | ||
| - | mean(s2)+c(-se(s2)*2, | ||
| - | |||
| - | mean(p1) | ||
| - | mean(p2) | ||
| - | |||
| - | hist(s1, breaks=50, | ||
| - | col = rgb(1, 0, 0, 0.5)) | ||
| - | hist(s2, breaks=50, add=T, col=rgb(0, | ||
| - | abline(v=mean(s1), | ||
| - | # hist(s2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5)) | ||
| - | abline(v=mean(s2), | ||
| - | |||
| - | diff <- mean(s1)-mean(s2) | ||
| - | se.ts | ||
| - | diff/se.ts | ||
| - | |||
| - | #### | ||
| - | # repeated measure t-test | ||
| - | # we can use the above case | ||
| - | # pop paramter unknown | ||
| - | # two consecutive measurement | ||
| - | # for the same sample | ||
| - | |||
| - | t1 <- s1 | ||
| - | t2 <- s2 | ||
| - | mean(t1) | ||
| - | mean(t2) | ||
| - | diff.s <- t1 - t2 | ||
| - | diff.s | ||
| - | t.cal.rm <- mean(diff.s)/ | ||
| - | t.cal.rm | ||
| - | p.val.rm <- pt(abs(t.cal.rm), | ||
| - | p.val.rm | ||
| - | t.test(s1, s2, paired = T) | ||
| - | |||
| - | # create multiple histogram | ||
| - | s.all <- c(s1,s2) | ||
| - | mean(s.all) | ||
| - | hist(s1, col=' | ||
| - | hist(s2, col=' | ||
| - | abline(v=c(mean(s.all)), | ||
| - | | ||
| - | abline(v=c(mean(s1), | ||
| - | | ||
| - | |||
| - | comb <- data.frame(s1, | ||
| - | dat <- stack(comb) | ||
| - | head(dat) | ||
| - | |||
| - | m.tot <- mean(s.all) | ||
| - | m.s1 <- mean(s1) | ||
| - | m.s2 <- mean(s2) | ||
| - | |||
| - | ss.tot <- ss(s.all) | ||
| - | ss.s1 <- ss(s1) | ||
| - | ss.s2 <- ss(s2) | ||
| - | |||
| - | df.tot <- length(s.all)-1 | ||
| - | df.s1 <- length(s1)-1 | ||
| - | df.s2 <- length(s2)-1 | ||
| - | |||
| - | ms.tot <- var(s.all) | ||
| - | ms.tot | ||
| - | ss.tot/ | ||
| - | |||
| - | var(s1) | ||
| - | ss.s1 / df.s1 | ||
| - | |||
| - | var(s2) | ||
| - | ss.s2 / df.s2 | ||
| - | |||
| - | ss.b.s1 <- length(s1) * ((m.tot - m.s1)^2) | ||
| - | ss.b.s2 <- length(s2) * ((m.tot - m.s1)^2) | ||
| - | ss.bet <- ss.b.s1+ss.b.s2 | ||
| - | ss.bet | ||
| - | |||
| - | ss.wit <- ss.s1 + ss.s2 | ||
| - | ss.wit | ||
| - | |||
| - | ss.bet + ss.wit | ||
| - | ss.tot | ||
| - | |||
| - | library(dplyr) | ||
| - | # df.bet <- length(unique(dat)) - 1 | ||
| - | df.bet <- nlevels(dat$ind) - 1 | ||
| - | df.wit <- df.s1+df.s2 | ||
| - | df.bet | ||
| - | df.wit | ||
| - | df.bet+df.wit | ||
| - | df.tot | ||
| - | |||
| - | ms.bet <- ss.bet / df.bet | ||
| - | ms.wit <- ss.wit / df.wit | ||
| - | ms.bet | ||
| - | ms.wit | ||
| - | |||
| - | f.cal <- ms.bet / ms.wit | ||
| - | f.cal | ||
| - | pf(f.cal, df.bet, df.wit, lower.tail = F) | ||
| - | |||
| - | |||
| - | f.test <- aov(dat$values~ dat$ind, data = dat) | ||
| - | summary(f.test) | ||
| - | |||
| - | sqrt(f.cal) | ||
| - | t.cal.ts | ||
| - | |||
| - | # this is anova after all. | ||
| - | </ | ||
note.w02.1758192015.txt.gz · Last modified: by hkimscil
