note.w02
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| note.w02 [2025/09/12 10:38] – [9] hkimscil | note.w02 [2025/09/18 10:40] (current) – [T-test sum up] hkimscil | ||
|---|---|---|---|
| Line 23: | Line 23: | ||
| sd(p1) | sd(p1) | ||
| - | p2 <- rnorm2(N.p, m.p+5, sd.p) | + | p2 <- rnorm2(N.p, m.p+20, sd.p) |
| mean(p2) | mean(p2) | ||
| sd(p2) | sd(p2) | ||
| Line 31: | Line 31: | ||
| var(p1) | var(p1) | ||
| + | hist(p1) | ||
| hist(p1, breaks=50, col = rgb(1, 1, 1, 0.5), | hist(p1, breaks=50, col = rgb(1, 1, 1, 0.5), | ||
| main = " | main = " | ||
| Line 41: | Line 41: | ||
| hist(p1, breaks=50, col=rgb(0, | hist(p1, breaks=50, col=rgb(0, | ||
| abline(v=mean(p1), | abline(v=mean(p1), | ||
| - | abline(v=mean(p1)-sd(p1), lwd=2) | + | abline(v=m.p1-sd.p1, lwd=2) |
| abline(v=mean(p1)+sd(p1), | abline(v=mean(p1)+sd(p1), | ||
| abline(v=c(m.p1-2*sd.p1, | abline(v=c(m.p1-2*sd.p1, | ||
| Line 60: | Line 60: | ||
| pnorm(m.p1+3*sd.p1, | pnorm(m.p1+3*sd.p1, | ||
| pnorm(m.p1-3*sd.p1, | pnorm(m.p1-3*sd.p1, | ||
| + | |||
| + | pnorm(121, 100, 10) - pnorm(85, 100, 10) | ||
| m.p1 | m.p1 | ||
| Line 69: | Line 71: | ||
| pnorm(1)-pnorm(-1) | pnorm(1)-pnorm(-1) | ||
| pnorm(2)-pnorm(-2) | pnorm(2)-pnorm(-2) | ||
| - | pnorm(3)-pnorm(3) | + | pnorm(3)-pnorm(-3) |
| 1-pnorm(-2)*2 | 1-pnorm(-2)*2 | ||
| Line 154: | Line 156: | ||
| ################################ | ################################ | ||
| - | s.size <- 50 | + | s.size <- 10 |
| means.temp <- c() | means.temp <- c() | ||
| Line 199: | Line 201: | ||
| se.z <- sqrt(var(p1)/ | se.z <- sqrt(var(p1)/ | ||
| + | se.z | ||
| se.z <- c(se.z) | se.z <- c(se.z) | ||
| se.z | se.z | ||
| Line 244: | Line 247: | ||
| c(hi2, hiz2) | c(hi2, hiz2) | ||
| c(hi3, hiz3) | c(hi3, hiz3) | ||
| - | |||
| hist(means, breaks=50, | hist(means, breaks=50, | ||
| Line 270: | Line 272: | ||
| col = rgb(1, 1, 1, .5)) | col = rgb(1, 1, 1, .5)) | ||
| abline(v=mean(means), | abline(v=mean(means), | ||
| - | abline(v=m.sample.i.got, | + | abline(v=c(lo1, |
| + | | ||
| + | | ||
| + | abline(v=m.sample.i.got, | ||
| # what is the probablity of getting | # what is the probablity of getting | ||
| Line 301: | Line 306: | ||
| sd(means) | sd(means) | ||
| - | tmp <- mean(means) - (m.s.from.p2 - mean(means)) | + | m.k <- mean(s.from.p2) |
| + | se.k <- sd(s.from.p2)/ | ||
| + | |||
| + | |||
| + | tmp <- mean(means) - (m.s.from.p2 | ||
| + | | ||
| tmp | tmp | ||
| Line 308: | Line 318: | ||
| col = rgb(1, 1, 1, .5)) | col = rgb(1, 1, 1, .5)) | ||
| abline(v=mean(means), | abline(v=mean(means), | ||
| + | abline(v=c(lo1, | ||
| + | | ||
| + | | ||
| abline(v=m.s.from.p2, | abline(v=m.s.from.p2, | ||
| Line 315: | Line 328: | ||
| m.s.from.p2 | m.s.from.p2 | ||
| pnorm(m.s.from.p2, | pnorm(m.s.from.p2, | ||
| + | pnorm(m.s.from.p2, | ||
| # then, what is the probabilty of getting | # then, what is the probabilty of getting | ||
| # greater than m.sample.i.got and | # greater than m.sample.i.got and | ||
| Line 323: | Line 336: | ||
| abline(v=tmp, | abline(v=tmp, | ||
| 2 * pnorm(m.s.from.p2, | 2 * pnorm(m.s.from.p2, | ||
| + | |||
| + | 2 * pnorm(m.s.from.p2, | ||
| + | |||
| se.z | se.z | ||
| Line 338: | Line 354: | ||
| pt(z.cal, 49, lower.tail = F)*2 | pt(z.cal, 49, lower.tail = F)*2 | ||
| t.test(s.from.p2, | t.test(s.from.p2, | ||
| + | |||
| + | |||
| + | |||
| </ | </ | ||
| Line 753: | Line 772: | ||
| <WRAP column half> | <WRAP column half> | ||
| ........................................................................... | ........................................................................... | ||
| + | [[:central limit theorem]] | ||
| + | $$\overline{X} \sim \displaystyle \text{N} \left(\mu, \dfrac{\sigma^{2}}{n} \right)$$ | ||
| + | [[: | ||
| + | [[:types of error]] | ||
| + | [[:r:types of error]] | ||
| + | |||
| </ | </ | ||
| </ | </ | ||
| Line 870: | Line 895: | ||
| <WRAP column half> | <WRAP column half> | ||
| ........................................................................... | ........................................................................... | ||
| + | {{: | ||
| </ | </ | ||
| </ | </ | ||
| Line 887: | Line 913: | ||
| > m.s.from.p2 <- mean(s.from.p2) | > m.s.from.p2 <- mean(s.from.p2) | ||
| > m.s.from.p2 | > m.s.from.p2 | ||
| - | [1] 119.0929 | + | [1] 116.0821 |
| > | > | ||
| > se.s | > se.s | ||
| - | [1] 3.163228 | + | [1] 3.166623 |
| > se.z | > se.z | ||
| [1] 3.162278 | [1] 3.162278 | ||
| > sd(means) | > sd(means) | ||
| - | [1] 3.163228 | + | [1] 3.166623 |
| > | > | ||
| > m.k <- mean(s.from.p2) | > m.k <- mean(s.from.p2) | ||
| Line 901: | Line 927: | ||
| > | > | ||
| > tmp <- mean(means) - (m.s.from.p2 | > tmp <- mean(means) - (m.s.from.p2 | ||
| - | + | + | + |
| > tmp | > tmp | ||
| - | [1] 80.90409 | + | [1] 83.92356 |
| > | > | ||
| > hist(means, breaks=30, | > hist(means, breaks=30, | ||
| Line 909: | Line 935: | ||
| + col = rgb(1, 1, 1, .5)) | + col = rgb(1, 1, 1, .5)) | ||
| > abline(v=mean(means), | > abline(v=mean(means), | ||
| + | > abline(v=c(lo1, | ||
| + | + col=c(" | ||
| + | + lwd=2) | ||
| > abline(v=m.s.from.p2, | > abline(v=m.s.from.p2, | ||
| > | > | ||
| Line 915: | Line 944: | ||
| > # m.sample.i.got? | > # m.sample.i.got? | ||
| > m.s.from.p2 | > m.s.from.p2 | ||
| - | [1] 119.0929 | + | [1] 116.0821 |
| > pnorm(m.s.from.p2, | > pnorm(m.s.from.p2, | ||
| - | [1] 7.816511e-10 | + | [1] 1.832221e-07 |
| > pnorm(m.s.from.p2, | > pnorm(m.s.from.p2, | ||
| [1] 0.5 | [1] 0.5 | ||
| Line 927: | Line 956: | ||
| > abline(v=tmp, | > abline(v=tmp, | ||
| > 2 * pnorm(m.s.from.p2, | > 2 * pnorm(m.s.from.p2, | ||
| - | [1] 1.563302e-09 | + | [1] 3.664442e-07 |
| > | > | ||
| > 2 * pnorm(m.s.from.p2, | > 2 * pnorm(m.s.from.p2, | ||
| Line 936: | Line 965: | ||
| [1] 3.162278 | [1] 3.162278 | ||
| > sd(s.from.p2)/ | > sd(s.from.p2)/ | ||
| - | [1] 3.35771 | + | [1] 3.209216 |
| > se.z.adjusted <- sqrt(var(s.from.p2)/ | > se.z.adjusted <- sqrt(var(s.from.p2)/ | ||
| > se.z.adjusted | > se.z.adjusted | ||
| - | [1] 3.35771 | + | [1] 3.209216 |
| > 2 * pnorm(m.s.from.p2, | > 2 * pnorm(m.s.from.p2, | ||
| + | + | ||
| - | [1] 1.298387e-08 | + | [1] 5.408382e-07 |
| > | > | ||
| > z.cal <- (m.s.from.p2 - mean(p1))/ | > z.cal <- (m.s.from.p2 - mean(p1))/ | ||
| > z.cal | > z.cal | ||
| - | [1] 5.686277 | + | [1] 5.011228 |
| > pnorm(z.cal, | > pnorm(z.cal, | ||
| - | [1] 1.298387e-08 | + | [1] 5.408382e-07 |
| > | > | ||
| > | > | ||
| > pt(z.cal, 49, lower.tail = F)*2 | > pt(z.cal, 49, lower.tail = F)*2 | ||
| - | [1] 7.095934e-07 | + | [1] 7.443756e-06 |
| > t.test(s.from.p2, | > t.test(s.from.p2, | ||
| Line 958: | Line 987: | ||
| data: s.from.p2 | data: s.from.p2 | ||
| - | t = 5.6863, df = 9, p-value = 0.0002995 | + | t = 5.0112, df = 9, p-value = 0.0007277 |
| alternative hypothesis: true mean is not equal to 100 | alternative hypothesis: true mean is not equal to 100 | ||
| 95 percent confidence interval: | 95 percent confidence interval: | ||
| - | 111.4972 126.6885 | + | 108.8224 123.3419 |
| sample estimates: | sample estimates: | ||
| mean of x | mean of x | ||
| - | 119.0929 | + | 116.0821 |
| - | + | > | |
| - | > | + | |
| > | > | ||
| </ | </ | ||
| Line 973: | Line 1001: | ||
| ........................................................................... | ........................................................................... | ||
| {{: | {{: | ||
| + | {{: | ||
| </ | </ | ||
| </ | </ | ||
| - | ====== T-test sum up ====== | ||
| - | < | ||
| - | |||
| - | rm(list=ls()) | ||
| - | rnorm2 <- function(n, | ||
| - | mean+sd*scale(rnorm(n)) | ||
| - | } | ||
| - | se <- function(sample) { | ||
| - | sd(sample)/ | ||
| - | } | ||
| - | ss <- function(x) { | ||
| - | sum((x-mean(x))^2) | ||
| - | } | ||
| - | |||
| - | N.p <- 1000000 | ||
| - | m.p <- 100 | ||
| - | sd.p <- 10 | ||
| - | |||
| - | |||
| - | p1 <- rnorm2(N.p, m.p, sd.p) | ||
| - | mean(p1) | ||
| - | sd(p1) | ||
| - | |||
| - | p2 <- rnorm2(N.p, m.p+10, sd.p) | ||
| - | mean(p2) | ||
| - | sd(p2) | ||
| - | |||
| - | hist(p1, breaks=50, col = rgb(1, 0, 0, 0.5), | ||
| - | main = " | ||
| - | abline(v=mean(p1), | ||
| - | hist(p2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5)) | ||
| - | abline(v=mean(p2), | ||
| - | |||
| - | s.size <- 1000 | ||
| - | s2 <- sample(p2, s.size) | ||
| - | mean(s2) | ||
| - | sd(s2) | ||
| - | |||
| - | se.z <- sqrt(var(p1)/ | ||
| - | se.z <- c(se.z) | ||
| - | se.z.range <- c(-2*se.z, | ||
| - | se.z.range | ||
| - | |||
| - | mean(p1)+se.z.range | ||
| - | mean(s2) | ||
| - | |||
| - | z.cal <- (mean(s2) - mean(p1)) / se.z | ||
| - | z.cal | ||
| - | pnorm(z.cal, | ||
| - | |||
| - | z.cal | ||
| - | |||
| - | # principles . . . | ||
| - | # distribution of sample means | ||
| - | iter <- 100000 | ||
| - | means <- c() | ||
| - | for (i in 1:iter) { | ||
| - | m.of.s <- mean(sample(p1, | ||
| - | means <- append(means, | ||
| - | } | ||
| - | |||
| - | hist(means, | ||
| - | xlim = c(mean(means)-3*sd(means), | ||
| - | col = rgb(1, 0, 0, 0.5)) | ||
| - | abline(v=mean(p1), | ||
| - | abline(v=mean(s2), | ||
| - | | ||
| - | lo1 <- mean(p1)-se.z | ||
| - | hi1 <- mean(p1)+se.z | ||
| - | lo2 <- mean(p1)-2*se.z | ||
| - | hi2 <- mean(p1)+2*se.z | ||
| - | abline(v=c(lo1, | ||
| - | | ||
| - | | ||
| - | se.z | ||
| - | c(lo2, hi2) | ||
| - | pnorm(z.cal, | ||
| - | |||
| - | |||
| - | # Note that we use sqrt(var(s2)/ | ||
| - | # as our se value instread of | ||
| - | # sqrt(var(p1)/ | ||
| - | # This is a common practice for R | ||
| - | # In fact, some z.test (made by someone) | ||
| - | # function uses the former rather than | ||
| - | # latter. | ||
| - | |||
| - | sqrt(var(p1)/ | ||
| - | se.z | ||
| - | |||
| - | sqrt(var(s2)/ | ||
| - | se(s2) | ||
| - | |||
| - | t.cal.os <- (mean(s2) - mean(p1))/ se(s2) | ||
| - | z.cal <- (mean(s2) - mean(p1))/ se.z | ||
| - | t.cal.os | ||
| - | z.cal | ||
| - | |||
| - | df.s2 <- length(s2)-1 | ||
| - | df.s2 | ||
| - | p.t.os <- pt(abs(t.cal.os), | ||
| - | p.t.os | ||
| - | t.out <- t.test(s2, mu=mean(p1)) | ||
| - | |||
| - | library(BSDA) | ||
| - | z.out <- z.test(s2, p1, sigma.x = sd(s2), sigma.y = sd(p1)) | ||
| - | |||
| - | z.out$statistic # se.z 대신에 se(s2) 값으로 구한 z 값 | ||
| - | z.cal # se.z으로 (sqrt(var(p1)/ | ||
| - | |||
| - | t.out$statistic # se(s2)를 분모로 하여 구한 t 값 | ||
| - | t.cal.os # se(s2)를 이용하여 손으로 구한 t 값 | ||
| - | |||
| - | # But, after all, we use t.test method regardless of | ||
| - | # variation | ||
| - | |||
| - | hist(means, | ||
| - | xlim = c(mean(means)-3*sd(means), | ||
| - | col = rgb(1, 0, 0, 0.5)) | ||
| - | abline(v=mean(p1), | ||
| - | abline(v=mean(s2), | ||
| - | | ||
| - | lo1 <- mean(p1)-se.z | ||
| - | hi1 <- mean(p1)+se.z | ||
| - | lo2 <- mean(p1)-2*se.z | ||
| - | hi2 <- mean(p1)+2*se.z | ||
| - | abline(v=c(lo1, | ||
| - | | ||
| - | | ||
| - | |||
| - | # difference between black and blue line | ||
| - | # divided by | ||
| - | # se(s2) (= random difference) | ||
| - | # t.value | ||
| - | mean(s2) | ||
| - | mean(p1) | ||
| - | diff <- mean(s2)-mean(p1) | ||
| - | diff | ||
| - | se(s2) | ||
| - | diff/se(s2) | ||
| - | |||
| - | t.cal.os | ||
| - | |||
| - | ######################## | ||
| - | # 2 sample t-test | ||
| - | ######################## | ||
| - | # 가정. 아래에서 추출하는 두 | ||
| - | # 샘플의 모집단의 파라미터를 | ||
| - | # 모른다. | ||
| - | s1 <- sample(p1, s.size) | ||
| - | s2 <- sample(p2, s.size) | ||
| - | |||
| - | mean(s1) | ||
| - | mean(s2) | ||
| - | ss(s1) | ||
| - | ss(s2) | ||
| - | df.s1 <- length(s1)-1 | ||
| - | df.s2 <- length(s2)-1 | ||
| - | df.s1 | ||
| - | df.s2 | ||
| - | |||
| - | pooled.variance <- (ss(s1)+ss(s2))/ | ||
| - | pooled.variance | ||
| - | se.ts <- sqrt((pooled.variance/ | ||
| - | se.ts | ||
| - | t.cal.ts <- (mean(s1)-mean(s2))/ | ||
| - | t.cal.ts | ||
| - | p.val.ts <- pt(abs(t.cal.ts), | ||
| - | p.val.ts | ||
| - | |||
| - | t.test(s1, s2, var.equal = T) | ||
| - | |||
| - | se(s1) | ||
| - | se(s2) | ||
| - | |||
| - | mean(s1)+c(-se(s1)*2, | ||
| - | mean(s2)+c(-se(s2)*2, | ||
| - | |||
| - | mean(p1) | ||
| - | mean(p2) | ||
| - | |||
| - | hist(s1, breaks=50, | ||
| - | col = rgb(1, 0, 0, 0.5)) | ||
| - | hist(s2, breaks=50, add=T, col=rgb(0, | ||
| - | abline(v=mean(s1), | ||
| - | # hist(s2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5)) | ||
| - | abline(v=mean(s2), | ||
| - | |||
| - | diff <- mean(s1)-mean(s2) | ||
| - | se.ts | ||
| - | diff/se.ts | ||
| - | |||
| - | #### | ||
| - | # repeated measure t-test | ||
| - | # we can use the above case | ||
| - | # pop paramter unknown | ||
| - | # two consecutive measurement | ||
| - | # for the same sample | ||
| - | |||
| - | t1 <- s1 | ||
| - | t2 <- s2 | ||
| - | mean(t1) | ||
| - | mean(t2) | ||
| - | diff.s <- t1 - t2 | ||
| - | diff.s | ||
| - | t.cal.rm <- mean(diff.s)/ | ||
| - | t.cal.rm | ||
| - | p.val.rm <- pt(abs(t.cal.rm), | ||
| - | p.val.rm | ||
| - | t.test(s1, s2, paired = T) | ||
| - | |||
| - | # create multiple histogram | ||
| - | s.all <- c(s1,s2) | ||
| - | mean(s.all) | ||
| - | hist(s1, col=' | ||
| - | hist(s2, col=' | ||
| - | abline(v=c(mean(s.all)), | ||
| - | | ||
| - | abline(v=c(mean(s1), | ||
| - | | ||
| - | |||
| - | comb <- data.frame(s1, | ||
| - | dat <- stack(comb) | ||
| - | head(dat) | ||
| - | |||
| - | m.tot <- mean(s.all) | ||
| - | m.s1 <- mean(s1) | ||
| - | m.s2 <- mean(s2) | ||
| - | |||
| - | ss.tot <- ss(s.all) | ||
| - | ss.s1 <- ss(s1) | ||
| - | ss.s2 <- ss(s2) | ||
| - | |||
| - | df.tot <- length(s.all)-1 | ||
| - | df.s1 <- length(s1)-1 | ||
| - | df.s2 <- length(s2)-1 | ||
| - | |||
| - | ms.tot <- var(s.all) | ||
| - | ms.tot | ||
| - | ss.tot/ | ||
| - | |||
| - | var(s1) | ||
| - | ss.s1 / df.s1 | ||
| - | |||
| - | var(s2) | ||
| - | ss.s2 / df.s2 | ||
| - | |||
| - | ss.b.s1 <- length(s1) * ((m.tot - m.s1)^2) | ||
| - | ss.b.s2 <- length(s2) * ((m.tot - m.s1)^2) | ||
| - | ss.bet <- ss.b.s1+ss.b.s2 | ||
| - | ss.bet | ||
| - | |||
| - | ss.wit <- ss.s1 + ss.s2 | ||
| - | ss.wit | ||
| - | |||
| - | ss.bet + ss.wit | ||
| - | ss.tot | ||
| - | |||
| - | library(dplyr) | ||
| - | # df.bet <- length(unique(dat)) - 1 | ||
| - | df.bet <- nlevels(dat$ind) - 1 | ||
| - | df.wit <- df.s1+df.s2 | ||
| - | df.bet | ||
| - | df.wit | ||
| - | df.bet+df.wit | ||
| - | df.tot | ||
| - | |||
| - | ms.bet <- ss.bet / df.bet | ||
| - | ms.wit <- ss.wit / df.wit | ||
| - | ms.bet | ||
| - | ms.wit | ||
| - | |||
| - | f.cal <- ms.bet / ms.wit | ||
| - | f.cal | ||
| - | pf(f.cal, df.bet, df.wit, lower.tail = F) | ||
| - | |||
| - | |||
| - | f.test <- aov(dat$values~ dat$ind, data = dat) | ||
| - | summary(f.test) | ||
| - | |||
| - | sqrt(f.cal) | ||
| - | t.cal.ts | ||
| - | |||
| - | # this is anova after all. | ||
| - | </ | ||
| - | |||
| - | ====== output ====== | ||
| - | < | ||
| - | > rm(list=ls()) | ||
| - | > rnorm2 <- function(n, | ||
| - | + | ||
| - | + } | ||
| - | > se <- function(sample) { | ||
| - | + | ||
| - | + } | ||
| - | > ss <- function(x) { | ||
| - | + | ||
| - | + } | ||
| - | > | ||
| - | > N.p <- 1000000 | ||
| - | > m.p <- 100 | ||
| - | > sd.p <- 10 | ||
| - | > | ||
| - | > | ||
| - | > p1 <- rnorm2(N.p, m.p, sd.p) | ||
| - | > mean(p1) | ||
| - | [1] 100 | ||
| - | > sd(p1) | ||
| - | [1] 10 | ||
| - | > | ||
| - | > p2 <- rnorm2(N.p, m.p+10, sd.p) | ||
| - | > mean(p2) | ||
| - | [1] 110 | ||
| - | > sd(p2) | ||
| - | [1] 10 | ||
| - | > | ||
| - | > hist(p1, breaks=50, col = rgb(1, 0, 0, 0.5), | ||
| - | + main = " | ||
| - | > abline(v=mean(p1), | ||
| - | > hist(p2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5)) | ||
| - | > abline(v=mean(p2), | ||
| - | > | ||
| - | > s.size <- 1000 | ||
| - | > s2 <- sample(p2, s.size) | ||
| - | > mean(s2) | ||
| - | [1] 110.4892 | ||
| - | > sd(s2) | ||
| - | [1] 10.36614 | ||
| - | > | ||
| - | > se.z <- sqrt(var(p1)/ | ||
| - | > se.z <- c(se.z) | ||
| - | > se.z.range <- c(-2*se.z, | ||
| - | > se.z.range | ||
| - | [1] -0.6324555 | ||
| - | > | ||
| - | > mean(p1)+se.z.range | ||
| - | [1] 99.36754 100.63246 | ||
| - | > mean(s2) | ||
| - | [1] 110.4892 | ||
| - | > | ||
| - | > z.cal <- (mean(s2) - mean(p1)) / se.z | ||
| - | > z.cal | ||
| - | [1] 33.16976 | ||
| - | > pnorm(z.cal, | ||
| - | [1] 2.93954e-241 | ||
| - | > | ||
| - | > z.cal | ||
| - | [1] 33.16976 | ||
| - | > | ||
| - | > # principles . . . | ||
| - | > # distribution of sample means | ||
| - | > iter <- 100000 | ||
| - | > means <- c() | ||
| - | > for (i in 1:iter) { | ||
| - | + | ||
| - | + means <- append(means, | ||
| - | + } | ||
| - | > | ||
| - | > hist(means, | ||
| - | + xlim = c(mean(means)-3*sd(means), | ||
| - | + col = rgb(1, 0, 0, 0.5)) | ||
| - | > abline(v=mean(p1), | ||
| - | > abline(v=mean(s2), | ||
| - | + col=" | ||
| - | > lo1 <- mean(p1)-se.z | ||
| - | > hi1 <- mean(p1)+se.z | ||
| - | > lo2 <- mean(p1)-2*se.z | ||
| - | > hi2 <- mean(p1)+2*se.z | ||
| - | > abline(v=c(lo1, | ||
| - | + col=c(" | ||
| - | + lwd=2) | ||
| - | > se.z | ||
| - | [1] 0.3162278 | ||
| - | > c(lo2, hi2) | ||
| - | [1] 99.36754 100.63246 | ||
| - | > pnorm(z.cal, | ||
| - | [1] 2.93954e-241 | ||
| - | > | ||
| - | > | ||
| - | > # Note that we use sqrt(var(s2)/ | ||
| - | > # as our se value instread of | ||
| - | > # sqrt(var(p1)/ | ||
| - | > # This is a common practice for R | ||
| - | > # In fact, some z.test (made by someone) | ||
| - | > # function uses the former rather than | ||
| - | > # latter. | ||
| - | > | ||
| - | > sqrt(var(p1)/ | ||
| - | [,1] | ||
| - | [1,] 0.3162278 | ||
| - | > se.z | ||
| - | [1] 0.3162278 | ||
| - | > | ||
| - | > sqrt(var(s2)/ | ||
| - | [1] 0.3278061 | ||
| - | > se(s2) | ||
| - | [1] 0.3278061 | ||
| - | > | ||
| - | > t.cal.os <- (mean(s2) - mean(p1))/ se(s2) | ||
| - | > z.cal <- (mean(s2) - mean(p1))/ se.z | ||
| - | > t.cal.os | ||
| - | [1] 31.99818 | ||
| - | > z.cal | ||
| - | [1] 33.16976 | ||
| - | > | ||
| - | > df.s2 <- length(s2)-1 | ||
| - | > df.s2 | ||
| - | [1] 999 | ||
| - | > p.t.os <- pt(abs(t.cal.os), | ||
| - | > p.t.os | ||
| - | [1] 3.162139e-155 | ||
| - | > t.out <- t.test(s2, mu=mean(p1)) | ||
| - | > | ||
| - | > library(BSDA) | ||
| - | 필요한 패키지를 로딩중입니다: | ||
| - | |||
| - | 다음의 패키지를 부착합니다: | ||
| - | |||
| - | The following object is masked from ‘package: | ||
| - | |||
| - | Orange | ||
| - | |||
| - | 경고메시지(들): | ||
| - | 패키지 ‘BSDA’는 R 버전 4.3.3에서 작성되었습니다 | ||
| - | > z.out <- z.test(s2, p1, sigma.x = sd(s2), sigma.y = sd(p1)) | ||
| - | > | ||
| - | > z.out$statistic # se.z 대신에 se(s2) 값으로 구한 z 값 | ||
| - | z | ||
| - | 31.9833 | ||
| - | > z.cal # se.z으로 (sqrt(var(p1)/ | ||
| - | [1] 33.16976 | ||
| - | > | ||
| - | > t.out$statistic # se(s2)를 분모로 하여 구한 t 값 | ||
| - | | ||
| - | 31.99818 | ||
| - | > t.cal.os # se(s2)를 이용하여 손으로 구한 t 값 | ||
| - | [1] 31.99818 | ||
| - | > | ||
| - | > # But, after all, we use t.test method regardless of | ||
| - | > # variation | ||
| - | > | ||
| - | > hist(means, | ||
| - | + xlim = c(mean(means)-3*sd(means), | ||
| - | + col = rgb(1, 0, 0, 0.5)) | ||
| - | > abline(v=mean(p1), | ||
| - | > abline(v=mean(s2), | ||
| - | + col=" | ||
| - | > lo1 <- mean(p1)-se.z | ||
| - | > hi1 <- mean(p1)+se.z | ||
| - | > lo2 <- mean(p1)-2*se.z | ||
| - | > hi2 <- mean(p1)+2*se.z | ||
| - | > abline(v=c(lo1, | ||
| - | + col=c(" | ||
| - | + lwd=2) | ||
| - | > | ||
| - | > # difference between black and blue line | ||
| - | > # divided by | ||
| - | > # se(s2) (= random difference) | ||
| - | > # t.value | ||
| - | > mean(s2) | ||
| - | [1] 110.4892 | ||
| - | > mean(p1) | ||
| - | [1] 100 | ||
| - | > diff <- mean(s2)-mean(p1) | ||
| - | > diff | ||
| - | [1] 10.4892 | ||
| - | > se(s2) | ||
| - | [1] 0.3278061 | ||
| - | > diff/se(s2) | ||
| - | [1] 31.99818 | ||
| - | > | ||
| - | > t.cal.os | ||
| - | [1] 31.99818 | ||
| - | > | ||
| - | > ######################## | ||
| - | > # 2 sample t-test | ||
| - | > ######################## | ||
| - | > # 가정. 아래에서 추출하는 두 | ||
| - | > # 샘플의 모집단의 파라미터를 | ||
| - | > # 모른다. | ||
| - | > s1 <- sample(p1, s.size) | ||
| - | > s2 <- sample(p2, s.size) | ||
| - | > | ||
| - | > mean(s1) | ||
| - | [1] 100.7138 | ||
| - | > mean(s2) | ||
| - | [1] 109.8804 | ||
| - | > ss(s1) | ||
| - | [1] 108288.9 | ||
| - | > ss(s2) | ||
| - | [1] 100751 | ||
| - | > df.s1 <- length(s1)-1 | ||
| - | > df.s2 <- length(s2)-1 | ||
| - | > df.s1 | ||
| - | [1] 999 | ||
| - | > df.s2 | ||
| - | [1] 999 | ||
| - | > | ||
| - | > pooled.variance <- (ss(s1)+ss(s2))/ | ||
| - | > pooled.variance | ||
| - | [1] 104.6246 | ||
| - | > se.ts <- sqrt((pooled.variance/ | ||
| - | > se.ts | ||
| - | [1] 0.4574376 | ||
| - | > t.cal.ts <- (mean(s1)-mean(s2))/ | ||
| - | > t.cal.ts | ||
| - | [1] -20.03892 | ||
| - | > p.val.ts <- pt(abs(t.cal.ts), | ||
| - | > p.val.ts | ||
| - | [1] 1.522061e-81 | ||
| - | > | ||
| - | > t.test(s1, s2, var.equal = T) | ||
| - | |||
| - | Two Sample t-test | ||
| - | |||
| - | data: s1 and s2 | ||
| - | t = -20.039, df = 1998, p-value < 2.2e-16 | ||
| - | alternative hypothesis: true difference in means is not equal to 0 | ||
| - | 95 percent confidence interval: | ||
| - | | ||
| - | sample estimates: | ||
| - | mean of x mean of y | ||
| - | | ||
| - | |||
| - | > | ||
| - | > se(s1) | ||
| - | [1] 0.3292374 | ||
| - | > se(s2) | ||
| - | [1] 0.3175719 | ||
| - | > | ||
| - | > mean(s1)+c(-se(s1)*2, | ||
| - | [1] 100.0553 101.3723 | ||
| - | > mean(s2)+c(-se(s2)*2, | ||
| - | [1] 109.2452 110.5155 | ||
| - | > | ||
| - | > mean(p1) | ||
| - | [1] 100 | ||
| - | > mean(p2) | ||
| - | [1] 110 | ||
| - | > | ||
| - | > hist(s1, breaks=50, | ||
| - | + col = rgb(1, 0, 0, 0.5)) | ||
| - | > hist(s2, breaks=50, add=T, col=rgb(0, | ||
| - | > abline(v=mean(s1), | ||
| - | > # hist(s2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5)) | ||
| - | > abline(v=mean(s2), | ||
| - | > | ||
| - | > diff <- mean(s1)-mean(s2) | ||
| - | > se.ts | ||
| - | [1] 0.4574376 | ||
| - | > diff/se.ts | ||
| - | [1] -20.03892 | ||
| - | > | ||
| - | > #### | ||
| - | > # repeated measure t-test | ||
| - | > # we can use the above case | ||
| - | > # pop paramter unknown | ||
| - | > # two consecutive measurement | ||
| - | > # for the same sample | ||
| - | > | ||
| - | > t1 <- s1 | ||
| - | > t2 <- s2 | ||
| - | > mean(t1) | ||
| - | [1] 100.7138 | ||
| - | > mean(t2) | ||
| - | [1] 109.8804 | ||
| - | > diff.s <- t1 - t2 | ||
| - | > diff.s | ||
| - | [1] -11.22445947 | ||
| - | | ||
| - | [15] | ||
| - | [22] -1.00704696 | ||
| - | [29] -10.58244069 -21.50608157 -53.61230898 | ||
| - | [36] | ||
| - | [43] -19.16855657 | ||
| - | [50] | ||
| - | [57] -8.28743679 -38.33995044 -50.60884456 | ||
| - | [64] | ||
| - | [71] -20.50728324 -19.37275012 -23.27866089 -11.74962053 -34.35317908 -26.10460199 | ||
| - | [78] -24.79252799 | ||
| - | [85] -23.66447959 | ||
| - | [92] -21.22842221 -11.68774036 | ||
| - | [99] 19.63435733 -14.40578016 -11.24172079 | ||
| - | | ||
| - | [113] -10.23092427 | ||
| - | [120] -31.50478963 | ||
| - | | ||
| - | | ||
| - | | ||
| - | | ||
| - | | ||
| - | | ||
| - | | ||
| - | | ||
| - | [183] -38.63379525 -16.58800530 -17.62672134 | ||
| - | [190] -22.25276559 | ||
| - | [197] -17.21120659 | ||
| - | | ||
| - | | ||
| - | [218] -24.41623403 | ||
| - | [225] -13.27091592 | ||
| - | [232] -11.52528966 | ||
| - | [239] -13.34969773 -37.98584006 | ||
| - | [246] -24.07983488 | ||
| - | [253] -12.30101864 | ||
| - | | ||
| - | | ||
| - | | ||
| - | | ||
| - | [288] -10.10160879 -11.59352210 | ||
| - | | ||
| - | [302] -35.18133234 | ||
| - | | ||
| - | | ||
| - | [323] -31.81200194 -12.67414744 | ||
| - | | ||
| - | [337] -17.60471709 | ||
| - | [344] -10.52672545 | ||
| - | | ||
| - | [358] -23.18199896 -18.58488442 -26.09162223 | ||
| - | | ||
| - | | ||
| - | [379] -37.25886118 | ||
| - | [386] -10.17057116 -11.45984282 -11.29130871 -15.60613249 | ||
| - | [393] -21.77681110 | ||
| - | | ||
| - | [407] -17.52668230 | ||
| - | [414] -14.30821541 -10.88427284 | ||
| - | | ||
| - | [428] -23.35919604 | ||
| - | | ||
| - | [442] -19.75974381 | ||
| - | | ||
| - | | ||
| - | [463] -19.77773032 | ||
| - | | ||
| - | | ||
| - | [484] -18.62856464 -25.58573774 -11.07888550 -16.77332405 | ||
| - | | ||
| - | | ||
| - | | ||
| - | | ||
| - | [519] -27.58434557 | ||
| - | [526] -44.00039083 -11.40638340 -10.42772214 | ||
| - | [533] -27.11548294 -26.69534077 | ||
| - | | ||
| - | [547] -18.87000360 | ||
| - | [554] -17.69850124 -11.92887098 | ||
| - | | ||
| - | | ||
| - | | ||
| - | [582] -12.87197389 -27.14630137 -14.53291112 | ||
| - | | ||
| - | | ||
| - | | ||
| - | | ||
| - | | ||
| - | | ||
| - | | ||
| - | | ||
| - | [645] -42.02480771 -13.42077241 | ||
| - | [652] -17.68303368 | ||
| - | [659] -19.48957914 -28.88885682 -10.59583907 -32.20537872 -11.95661953 -23.77887711 -23.51551361 | ||
| - | [666] -17.32443690 -10.86310515 | ||
| - | [673] -12.41671968 | ||
| - | | ||
| - | [687] -21.75000477 | ||
| - | [694] -34.30010114 | ||
| - | [701] -19.46561809 | ||
| - | [708] -27.91638644 -12.61381331 | ||
| - | [715] -20.55376627 | ||
| - | [722] -14.38944775 | ||
| - | | ||
| - | [736] -20.25705972 | ||
| - | [743] -13.32818441 | ||
| - | | ||
| - | [757] -19.16011286 -13.91801221 -11.66649472 | ||
| - | [764] -14.75337241 | ||
| - | | ||
| - | [778] -34.01869977 | ||
| - | | ||
| - | | ||
| - | | ||
| - | | ||
| - | [813] -14.60867384 | ||
| - | [820] -13.93270232 -20.21993468 | ||
| - | [827] -29.04193544 -24.04102844 -14.01878071 | ||
| - | | ||
| - | | ||
| - | | ||
| - | | ||
| - | [862] -30.93593593 | ||
| - | | ||
| - | [876] -14.65449874 -25.51320775 -35.73124575 | ||
| - | | ||
| - | | ||
| - | | ||
| - | | ||
| - | | ||
| - | [918] -21.82098554 | ||
| - | [925] -26.29548705 -28.39677088 | ||
| - | [932] -11.67413566 -11.59680714 | ||
| - | | ||
| - | | ||
| - | | ||
| - | | ||
| - | [967] -18.49661342 -12.30226946 | ||
| - | [974] -11.26368153 | ||
| - | [981] -25.17421015 -10.35273557 | ||
| - | | ||
| - | [995] -22.27223290 | ||
| - | > t.cal.rm <- mean(diff.s)/ | ||
| - | > t.cal.rm | ||
| - | [1] -19.82846 | ||
| - | > p.val.rm <- pt(abs(t.cal.rm), | ||
| - | > p.val.rm | ||
| - | [1] 4.836389e-74 | ||
| - | > t.test(s1, s2, paired = T) | ||
| - | |||
| - | Paired t-test | ||
| - | |||
| - | data: s1 and s2 | ||
| - | t = -19.828, df = 999, p-value < 2.2e-16 | ||
| - | alternative hypothesis: true mean difference is not equal to 0 | ||
| - | 95 percent confidence interval: | ||
| - | | ||
| - | sample estimates: | ||
| - | mean difference | ||
| - | -9.166555 | ||
| - | |||
| - | > | ||
| - | > # create multiple histogram | ||
| - | > s.all <- c(s1,s2) | ||
| - | > mean(s.all) | ||
| - | [1] 105.2971 | ||
| - | > hist(s1, col=' | ||
| - | > hist(s2, col=' | ||
| - | > abline(v=c(mean(s.all)), | ||
| - | + col=c(" | ||
| - | > abline(v=c(mean(s1), | ||
| - | + col=c(" | ||
| - | > | ||
| - | > comb <- data.frame(s1, | ||
| - | > dat <- stack(comb) | ||
| - | > head(dat) | ||
| - | | ||
| - | 1 93.17788 | ||
| - | 2 103.00254 | ||
| - | 3 104.53388 | ||
| - | 4 88.59698 | ||
| - | 5 105.67789 | ||
| - | 6 112.72657 | ||
| - | > | ||
| - | > m.tot <- mean(s.all) | ||
| - | > m.s1 <- mean(s1) | ||
| - | > m.s2 <- mean(s2) | ||
| - | > | ||
| - | > ss.tot <- ss(s.all) | ||
| - | > ss.s1 <- ss(s1) | ||
| - | > ss.s2 <- ss(s2) | ||
| - | > | ||
| - | > df.tot <- length(s.all)-1 | ||
| - | > df.s1 <- length(s1)-1 | ||
| - | > df.s2 <- length(s2)-1 | ||
| - | > | ||
| - | > ms.tot <- var(s.all) | ||
| - | > ms.tot | ||
| - | [1] 125.5892 | ||
| - | > ss.tot/ | ||
| - | [1] 125.5892 | ||
| - | > | ||
| - | > var(s1) | ||
| - | [1] 108.3973 | ||
| - | > ss.s1 / df.s1 | ||
| - | [1] 108.3973 | ||
| - | > | ||
| - | > var(s2) | ||
| - | [1] 100.8519 | ||
| - | > ss.s2 / df.s2 | ||
| - | [1] 100.8519 | ||
| - | > | ||
| - | > ss.b.s1 <- length(s1) * ((m.tot - m.s1)^2) | ||
| - | > ss.b.s2 <- length(s2) * ((m.tot - m.s1)^2) | ||
| - | > ss.bet <- ss.b.s1+ss.b.s2 | ||
| - | > ss.bet | ||
| - | [1] 42012.87 | ||
| - | > | ||
| - | > ss.wit <- ss.s1 + ss.s2 | ||
| - | > ss.wit | ||
| - | [1] 209039.9 | ||
| - | > | ||
| - | > ss.bet + ss.wit | ||
| - | [1] 251052.8 | ||
| - | > ss.tot | ||
| - | [1] 251052.8 | ||
| - | > | ||
| - | > library(dplyr) | ||
| - | |||
| - | 다음의 패키지를 부착합니다: | ||
| - | |||
| - | The following objects are masked from ‘package: | ||
| - | |||
| - | filter, lag | ||
| - | |||
| - | The following objects are masked from ‘package: | ||
| - | |||
| - | intersect, setdiff, setequal, union | ||
| - | > # df.bet <- length(unique(dat)) - 1 | ||
| - | > df.bet <- nlevels(dat$ind) - 1 | ||
| - | > df.wit <- df.s1+df.s2 | ||
| - | > df.bet | ||
| - | [1] 1 | ||
| - | > df.wit | ||
| - | [1] 1998 | ||
| - | > df.bet+df.wit | ||
| - | [1] 1999 | ||
| - | > df.tot | ||
| - | [1] 1999 | ||
| - | > | ||
| - | > ms.bet <- ss.bet / df.bet | ||
| - | > ms.wit <- ss.wit / df.wit | ||
| - | > ms.bet | ||
| - | [1] 42012.87 | ||
| - | > ms.wit | ||
| - | [1] 104.6246 | ||
| - | > | ||
| - | > f.cal <- ms.bet / ms.wit | ||
| - | > f.cal | ||
| - | [1] 401.5582 | ||
| - | > pf(f.cal, df.bet, df.wit, lower.tail = F) | ||
| - | [1] 1.522061e-81 | ||
| - | > | ||
| - | > | ||
| - | > f.test <- aov(dat$values~ dat$ind, data = dat) | ||
| - | > summary(f.test) | ||
| - | Df Sum Sq Mean Sq F value Pr(> | ||
| - | dat$ind | ||
| - | Residuals | ||
| - | --- | ||
| - | Signif. codes: | ||
| - | > | ||
| - | > sqrt(f.cal) | ||
| - | [1] 20.03892 | ||
| - | > t.cal.ts | ||
| - | [1] -20.03892 | ||
| - | > | ||
| - | > # this is anova after all. | ||
| - | > | ||
| - | </ | ||
note.w02.1757673493.txt.gz · Last modified: by hkimscil
