note.w02
Differences
This shows you the differences between two versions of the page.
Next revision | Previous revision | ||
note.w02 [2025/09/11 10:35] – created hkimscil | note.w02 [2025/09/11 10:40] (current) – [output] hkimscil | ||
---|---|---|---|
Line 339: | Line 339: | ||
t.test(s.from.p2, | t.test(s.from.p2, | ||
</ | </ | ||
+ | |||
+ | ====== output ====== | ||
+ | < | ||
+ | > rm(list=ls()) | ||
+ | > rnorm2 <- function(n, | ||
+ | + | ||
+ | + } | ||
+ | > | ||
+ | > se <- function(sample) { | ||
+ | + | ||
+ | + } | ||
+ | > | ||
+ | > ss <- function(x) { | ||
+ | + | ||
+ | + } | ||
+ | > | ||
+ | > ################################ | ||
+ | > N.p <- 1000000 | ||
+ | > m.p <- 100 | ||
+ | > sd.p <- 10 | ||
+ | > | ||
+ | > p1 <- rnorm2(N.p, m.p, sd.p) | ||
+ | > mean(p1) | ||
+ | [1] 100 | ||
+ | > sd(p1) | ||
+ | [1] 10 | ||
+ | > | ||
+ | > p2 <- rnorm2(N.p, m.p+5, sd.p) | ||
+ | > mean(p2) | ||
+ | [1] 105 | ||
+ | > sd(p2) | ||
+ | [1] 10 | ||
+ | > | ||
+ | > m.p1 <- mean(p1) | ||
+ | > sd.p1 <- sd(p1) | ||
+ | > var(p1) | ||
+ | [,1] | ||
+ | [1,] 100 | ||
+ | > | ||
+ | > | ||
+ | > hist(p1, breaks=50, col = rgb(1, 1, 1, 0.5), | ||
+ | + main = " | ||
+ | > abline(v=mean(p1), | ||
+ | > hist(p2, add=T, breaks=50, col=rgb(1, | ||
+ | > abline(v=mean(p2), | ||
+ | > | ||
+ | > | ||
+ | > hist(p1, breaks=50, col=rgb(0, | ||
+ | > abline(v=mean(p1), | ||
+ | > abline(v=mean(p1)-sd(p1), | ||
+ | > abline(v=mean(p1)+sd(p1), | ||
+ | > abline(v=c(m.p1-2*sd.p1, | ||
+ | > abline(v=c(m.p1-3*sd.p1, | ||
+ | > | ||
+ | > # area bet black = 68% | ||
+ | > # between red = 95% | ||
+ | > # between green = 99% | ||
+ | > | ||
+ | > pnorm(m.p1+sd.p1, | ||
+ | [1] 0.8413447 | ||
+ | > pnorm(m.p1-sd.p1, | ||
+ | [1] 0.1586553 | ||
+ | > pnorm(m.p1+sd.p1, | ||
+ | + | ||
+ | [1] 0.6826895 | ||
+ | > | ||
+ | > pnorm(m.p1+2*sd.p1, | ||
+ | + | ||
+ | [1] 0.9544997 | ||
+ | > | ||
+ | > pnorm(m.p1+3*sd.p1, | ||
+ | + | ||
+ | [1] 0.9973002 | ||
+ | > | ||
+ | > m.p1 | ||
+ | [1] 100 | ||
+ | > sd.p1 | ||
+ | [1] 10 | ||
+ | > | ||
+ | > (m.p1-m.p1)/ | ||
+ | [1] 0 | ||
+ | > ((m.p1-sd.p1) - m.p1) / sd.p1 | ||
+ | [1] -1 | ||
+ | > (120-100)/ | ||
+ | [1] 2 | ||
+ | > pnorm(1)-pnorm(-1) | ||
+ | [1] 0.6826895 | ||
+ | > pnorm(2)-pnorm(-2) | ||
+ | [1] 0.9544997 | ||
+ | > pnorm(3)-pnorm(3) | ||
+ | [1] 0 | ||
+ | > | ||
+ | > 1-pnorm(-2)*2 | ||
+ | [1] 0.9544997 | ||
+ | > pnorm(2)-pnorm(-2) | ||
+ | [1] 0.9544997 | ||
+ | > | ||
+ | > pnorm(120, 100, 10) | ||
+ | [1] 0.9772499 | ||
+ | > pnorm(2)-pnorm(-2) | ||
+ | [1] 0.9544997 | ||
+ | > | ||
+ | > zscore <- (120-100)/ | ||
+ | > pnorm(zscore)-pnorm(-zscore) | ||
+ | [1] 0.9544997 | ||
+ | > zscore | ||
+ | [1] 2 | ||
+ | > | ||
+ | > # reminder. | ||
+ | > pnorm(-1) | ||
+ | [1] 0.1586553 | ||
+ | > pnorm(1, 0, 1, lower.tail = F) | ||
+ | [1] 0.1586553 | ||
+ | > pnorm(110, 100, 10, lower.tail = F) | ||
+ | [1] 0.1586553 | ||
+ | > zscore <- (110-100)/ | ||
+ | > pnorm(zscore, | ||
+ | [1] 0.1586553 | ||
+ | > | ||
+ | > pnorm(118, 100, 10, lower.tail = F) | ||
+ | [1] 0.03593032 | ||
+ | > pnorm(18/ | ||
+ | [1] 0.03593032 | ||
+ | > | ||
+ | > z.p1 <- (p1-mean(p1))/ | ||
+ | > mean(z.p1) | ||
+ | [1] 8.189457e-18 | ||
+ | > round(mean(z.p1), | ||
+ | [1] 0 | ||
+ | > sd(z.p1) | ||
+ | [1] 1 | ||
+ | > pnorm(1.8)-pnorm(-1.8) | ||
+ | [1] 0.9281394 | ||
+ | > | ||
+ | > hist(z.p1, breaks=50, col=rgb(1, | ||
+ | > abline(v=c(m.p1, | ||
+ | > 1-(pnorm(1.8)-pnorm(-1.8)) | ||
+ | [1] 0.07186064 | ||
+ | > | ||
+ | > | ||
+ | > pnorm(1)-pnorm(-1) | ||
+ | [1] 0.6826895 | ||
+ | > 1-(pnorm(-1)*2) | ||
+ | [1] 0.6826895 | ||
+ | > | ||
+ | > pnorm(2)-pnorm(-2) | ||
+ | [1] 0.9544997 | ||
+ | > 1-(pnorm(-2)*2) | ||
+ | [1] 0.9544997 | ||
+ | > | ||
+ | > 1-(pnorm(-3)*2) | ||
+ | [1] 0.9973002 | ||
+ | > | ||
+ | > # | ||
+ | > hist(p1, breaks=50, col=rgb(.9, | ||
+ | > abline(v=mean(p1), | ||
+ | > abline(v=mean(p1)-sd(p1), | ||
+ | > abline(v=mean(p1)+sd(p1), | ||
+ | > abline(v=c(m.p1-2*sd.p1, | ||
+ | > abline(v=c(m.p1-3*sd.p1, | ||
+ | > | ||
+ | > # 68% | ||
+ | > a <- qnorm(.32/ | ||
+ | > b <- qnorm(1-.32/ | ||
+ | > c(a, b) | ||
+ | [1] -0.9944579 | ||
+ | > c(-1, 1) | ||
+ | [1] -1 1 | ||
+ | > # note that | ||
+ | > .32/2 | ||
+ | [1] 0.16 | ||
+ | > pnorm(-1) | ||
+ | [1] 0.1586553 | ||
+ | > qnorm(.32/ | ||
+ | [1] -0.9944579 | ||
+ | > qnorm(pnorm(-1)) | ||
+ | [1] -1 | ||
+ | > | ||
+ | > # 95% | ||
+ | > c <- qnorm(.05/ | ||
+ | > d <- qnorm(1-.05/ | ||
+ | > c(c, d) | ||
+ | [1] -1.959964 | ||
+ | > c(-2,2) | ||
+ | [1] -2 2 | ||
+ | > | ||
+ | > # 99% | ||
+ | > e <- qnorm(.01/ | ||
+ | > f <- qnorm(1-.01/ | ||
+ | > c(e,f) | ||
+ | [1] -2.575829 | ||
+ | > c(-3,3) | ||
+ | [1] -3 3 | ||
+ | > | ||
+ | > | ||
+ | > pnorm(b)-pnorm(a) | ||
+ | [1] 0.68 | ||
+ | > c(a, b) | ||
+ | [1] -0.9944579 | ||
+ | > pnorm(d)-pnorm(c) | ||
+ | [1] 0.95 | ||
+ | > c(c, d) | ||
+ | [1] -1.959964 | ||
+ | > pnorm(f)-pnorm(e) | ||
+ | [1] 0.99 | ||
+ | > c(e, f) | ||
+ | [1] -2.575829 | ||
+ | > | ||
+ | > qnorm(.5) | ||
+ | [1] 0 | ||
+ | > qnorm(1) | ||
+ | [1] Inf | ||
+ | > | ||
+ | > | ||
+ | > ################################ | ||
+ | > s.size <- 50 | ||
+ | > | ||
+ | > means.temp <- c() | ||
+ | > s1 <- sample(p1, s.size, replace = T) | ||
+ | > mean(s1) | ||
+ | [1] 98.76098 | ||
+ | > means.temp <- append(means.temp, | ||
+ | > means.temp <- append(means.temp, | ||
+ | > means.temp <- append(means.temp, | ||
+ | > means.temp <- append(means.temp, | ||
+ | > means.temp <- append(means.temp, | ||
+ | > means.temp | ||
+ | [1] 98.76098 | ||
+ | > | ||
+ | > iter <- 1000000 | ||
+ | > # means <- c() | ||
+ | > means <- rep(NA, iter) | ||
+ | > for (i in 1:iter) { | ||
+ | + # means <- append(means, | ||
+ | + | ||
+ | + } | ||
+ | > length(means) | ||
+ | [1] 1000000 | ||
+ | > mean(means) | ||
+ | [1] 99.99824 | ||
+ | > sd(means) | ||
+ | [1] 1.414035 | ||
+ | > se.s <- sd(means) | ||
+ | > | ||
+ | > hist(means, breaks=50, | ||
+ | + xlim = c(mean(means)-5*sd(means), | ||
+ | + col=rgb(1, 1, 1, .5)) | ||
+ | > abline(v=mean(means), | ||
+ | > # now we want to get sd of this distribution | ||
+ | > lo1 <- mean(means)-se.s | ||
+ | > hi1 <- mean(means)+se.s | ||
+ | > lo2 <- mean(means)-2*se.s | ||
+ | > hi2 <- mean(means)+2*se.s | ||
+ | > lo3 <- mean(means)-3*se.s | ||
+ | > hi3 <- mean(means)+3*se.s | ||
+ | > abline(v=mean(means), | ||
+ | > # abline(v=mean(p2), | ||
+ | > abline(v=c(lo1, | ||
+ | + col=c(" | ||
+ | + lwd=2) | ||
+ | > | ||
+ | > # meanwhile . . . . | ||
+ | > se.s | ||
+ | [1] 1.414035 | ||
+ | > | ||
+ | > se.z <- sqrt(var(p1)/ | ||
+ | > se.z <- c(se.z) | ||
+ | > se.z | ||
+ | [1] 1.414214 | ||
+ | > | ||
+ | > se.z.adjusted <- sqrt(var(s1)/ | ||
+ | > se.z.adjusted | ||
+ | [1] 1.370464 | ||
+ | > | ||
+ | > # sd of sample means (sd(means)) | ||
+ | > # = se.s | ||
+ | > | ||
+ | > # when iter value goes to | ||
+ | > # infinite value: | ||
+ | > # mean(means) = mean(p1) | ||
+ | > # and | ||
+ | > # sd(means) = sd(p1) / sqrt(s.size) | ||
+ | > # that is, se.s = se.z | ||
+ | > # This is called CLT (Central Limit Theorem) | ||
+ | > # see http:// | ||
+ | > | ||
+ | > mean(means) | ||
+ | [1] 99.99824 | ||
+ | > mean(p1) | ||
+ | [1] 100 | ||
+ | > sd(means) | ||
+ | [1] 1.414035 | ||
+ | > var(p1) | ||
+ | [,1] | ||
+ | [1,] 100 | ||
+ | > # remember we started talking sample size 10 | ||
+ | > sqrt(var(p1)/ | ||
+ | [,1] | ||
+ | [1,] 1.414214 | ||
+ | > se.z | ||
+ | [1] 1.414214 | ||
+ | > | ||
+ | > sd(means) | ||
+ | [1] 1.414035 | ||
+ | > se.s | ||
+ | [1] 1.414035 | ||
+ | > se.z | ||
+ | [1] 1.414214 | ||
+ | > | ||
+ | > # because CLT | ||
+ | > loz1 <- mean(p1)-se.z | ||
+ | > hiz1 <- mean(p1)+se.z | ||
+ | > loz2 <- mean(p1)-2*se.z | ||
+ | > hiz2 <- mean(p1)+2*se.z | ||
+ | > loz3 <- mean(p1)-3*se.z | ||
+ | > hiz3 <- mean(p1)+3*se.z | ||
+ | > | ||
+ | > c(lo1, loz1) | ||
+ | [1] 98.58421 98.58579 | ||
+ | > c(lo2, loz2) | ||
+ | [1] 97.17017 97.17157 | ||
+ | > c(lo3, loz3) | ||
+ | [1] 95.75614 95.75736 | ||
+ | > | ||
+ | > c(hi1, hiz1) | ||
+ | [1] 101.4123 101.4142 | ||
+ | > c(hi2, hiz2) | ||
+ | [1] 102.8263 102.8284 | ||
+ | > c(hi3, hiz3) | ||
+ | [1] 104.2403 104.2426 | ||
+ | > | ||
+ | > | ||
+ | > hist(means, breaks=50, | ||
+ | + xlim = c(mean(means)-5*sd(means), | ||
+ | + col = rgb(1, 1, 1, .5)) | ||
+ | > abline(v=mean(means), | ||
+ | > # abline(v=mean(p2), | ||
+ | > abline(v=c(lo1, | ||
+ | + col=c(" | ||
+ | + lwd=2) | ||
+ | > | ||
+ | > round(c(lo1, | ||
+ | [1] 99 101 | ||
+ | > round(c(lo2, | ||
+ | [1] 97 103 | ||
+ | > round(c(lo3, | ||
+ | [1] 96 104 | ||
+ | > | ||
+ | > round(c(loz1, | ||
+ | [1] 99 101 | ||
+ | > round(c(loz2, | ||
+ | [1] 97 103 | ||
+ | > round(c(loz3, | ||
+ | [1] 96 104 | ||
+ | > | ||
+ | > m.sample.i.got <- mean(means)+ 1.5*sd(means) | ||
+ | > m.sample.i.got | ||
+ | [1] 102.1193 | ||
+ | > | ||
+ | > hist(means, breaks=30, | ||
+ | + xlim = c(mean(means)-7*sd(means), | ||
+ | + col = rgb(1, 1, 1, .5)) | ||
+ | > abline(v=mean(means), | ||
+ | > abline(v=m.sample.i.got, | ||
+ | > | ||
+ | > # what is the probablity of getting | ||
+ | > # greater than | ||
+ | > # m.sample.i.got? | ||
+ | > m.sample.i.got | ||
+ | [1] 102.1193 | ||
+ | > pnorm(m.sample.i.got, | ||
+ | [1] 0.0668072 | ||
+ | > pnorm(m.sample.i.got, | ||
+ | [1] 0.0669929 | ||
+ | > | ||
+ | > # then, what is the probabilty of getting | ||
+ | > # greater than m.sample.i.got and | ||
+ | > # less than corresponding value, which is | ||
+ | > # mean(means) - m.sample.i.got - mean(means) | ||
+ | > # (green line) | ||
+ | > tmp <- mean(means) - (m.sample.i.got - mean(means)) | ||
+ | > abline(v=tmp, | ||
+ | > 2 * pnorm(m.sample.i.got, | ||
+ | [1] 0.1339368 | ||
+ | > m.sample.i.got | ||
+ | [1] 102.1193 | ||
+ | > | ||
+ | > ### one more time | ||
+ | > # this time, with a story | ||
+ | > mean(p2) | ||
+ | [1] 105 | ||
+ | > sd(p2) | ||
+ | [1] 10 | ||
+ | > s.from.p2 <- sample(p2, s.size) | ||
+ | > m.s.from.p2 <- mean(s.from.p2) | ||
+ | > m.s.from.p2 | ||
+ | [1] 103.1283 | ||
+ | > | ||
+ | > se.s | ||
+ | [1] 1.414035 | ||
+ | > se.z | ||
+ | [1] 1.414214 | ||
+ | > sd(means) | ||
+ | [1] 1.414035 | ||
+ | > | ||
+ | > tmp <- mean(means) - (m.s.from.p2 - mean(means)) | ||
+ | > tmp | ||
+ | [1] 96.86822 | ||
+ | > | ||
+ | > hist(means, breaks=30, | ||
+ | + xlim = c(tmp-4*sd(means), | ||
+ | + col = rgb(1, 1, 1, .5)) | ||
+ | > abline(v=mean(means), | ||
+ | > abline(v=m.s.from.p2, | ||
+ | > | ||
+ | > # what is the probablity of getting | ||
+ | > # greater than | ||
+ | > # m.sample.i.got? | ||
+ | > m.s.from.p2 | ||
+ | [1] 103.1283 | ||
+ | > pnorm(m.s.from.p2, | ||
+ | [1] 0.01348266 | ||
+ | > | ||
+ | > # then, what is the probabilty of getting | ||
+ | > # greater than m.sample.i.got and | ||
+ | > # less than corresponding value, which is | ||
+ | > # mean(means) - m.sample.i.got - mean(means) | ||
+ | > # (green line) | ||
+ | > abline(v=tmp, | ||
+ | > 2 * pnorm(m.s.from.p2, | ||
+ | [1] 0.02696533 | ||
+ | > | ||
+ | > se.z | ||
+ | [1] 1.414214 | ||
+ | > sd(s.from.p2)/ | ||
+ | [1] 1.414296 | ||
+ | > se.z.adjusted <- sqrt(var(s.from.p2)/ | ||
+ | > se.z.adjusted | ||
+ | [1] 1.414296 | ||
+ | > 2 * pnorm(m.s.from.p2, | ||
+ | + | ||
+ | [1] 0.02697421 | ||
+ | > | ||
+ | > z.cal <- (m.s.from.p2 - mean(p1))/ | ||
+ | > z.cal | ||
+ | [1] 2.211891 | ||
+ | > pnorm(z.cal, | ||
+ | [1] 0.02697421 | ||
+ | > | ||
+ | > | ||
+ | > pt(z.cal, 49, lower.tail = F)*2 | ||
+ | [1] 0.03166797 | ||
+ | > t.test(s.from.p2, | ||
+ | |||
+ | One Sample t-test | ||
+ | |||
+ | data: s.from.p2 | ||
+ | t = 2.2119, df = 49, p-value = 0.03167 | ||
+ | alternative hypothesis: true mean is not equal to 100 | ||
+ | 95 percent confidence interval: | ||
+ | | ||
+ | sample estimates: | ||
+ | mean of x | ||
+ | | ||
+ | |||
+ | > | ||
+ | </ | ||
+ | |||
+ | ====== T-test sum up ====== | ||
+ | < | ||
+ | |||
+ | rm(list=ls()) | ||
+ | rnorm2 <- function(n, | ||
+ | mean+sd*scale(rnorm(n)) | ||
+ | } | ||
+ | se <- function(sample) { | ||
+ | sd(sample)/ | ||
+ | } | ||
+ | ss <- function(x) { | ||
+ | sum((x-mean(x))^2) | ||
+ | } | ||
+ | |||
+ | N.p <- 1000000 | ||
+ | m.p <- 100 | ||
+ | sd.p <- 10 | ||
+ | |||
+ | |||
+ | p1 <- rnorm2(N.p, m.p, sd.p) | ||
+ | mean(p1) | ||
+ | sd(p1) | ||
+ | |||
+ | p2 <- rnorm2(N.p, m.p+10, sd.p) | ||
+ | mean(p2) | ||
+ | sd(p2) | ||
+ | |||
+ | hist(p1, breaks=50, col = rgb(1, 0, 0, 0.5), | ||
+ | main = " | ||
+ | abline(v=mean(p1), | ||
+ | hist(p2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5)) | ||
+ | abline(v=mean(p2), | ||
+ | |||
+ | s.size <- 1000 | ||
+ | s2 <- sample(p2, s.size) | ||
+ | mean(s2) | ||
+ | sd(s2) | ||
+ | |||
+ | se.z <- sqrt(var(p1)/ | ||
+ | se.z <- c(se.z) | ||
+ | se.z.range <- c(-2*se.z, | ||
+ | se.z.range | ||
+ | |||
+ | mean(p1)+se.z.range | ||
+ | mean(s2) | ||
+ | |||
+ | z.cal <- (mean(s2) - mean(p1)) / se.z | ||
+ | z.cal | ||
+ | pnorm(z.cal, | ||
+ | |||
+ | z.cal | ||
+ | |||
+ | # principles . . . | ||
+ | # distribution of sample means | ||
+ | iter <- 100000 | ||
+ | means <- c() | ||
+ | for (i in 1:iter) { | ||
+ | m.of.s <- mean(sample(p1, | ||
+ | means <- append(means, | ||
+ | } | ||
+ | |||
+ | hist(means, | ||
+ | xlim = c(mean(means)-3*sd(means), | ||
+ | col = rgb(1, 0, 0, 0.5)) | ||
+ | abline(v=mean(p1), | ||
+ | abline(v=mean(s2), | ||
+ | | ||
+ | lo1 <- mean(p1)-se.z | ||
+ | hi1 <- mean(p1)+se.z | ||
+ | lo2 <- mean(p1)-2*se.z | ||
+ | hi2 <- mean(p1)+2*se.z | ||
+ | abline(v=c(lo1, | ||
+ | | ||
+ | | ||
+ | se.z | ||
+ | c(lo2, hi2) | ||
+ | pnorm(z.cal, | ||
+ | |||
+ | |||
+ | # Note that we use sqrt(var(s2)/ | ||
+ | # as our se value instread of | ||
+ | # sqrt(var(p1)/ | ||
+ | # This is a common practice for R | ||
+ | # In fact, some z.test (made by someone) | ||
+ | # function uses the former rather than | ||
+ | # latter. | ||
+ | |||
+ | sqrt(var(p1)/ | ||
+ | se.z | ||
+ | |||
+ | sqrt(var(s2)/ | ||
+ | se(s2) | ||
+ | |||
+ | t.cal.os <- (mean(s2) - mean(p1))/ se(s2) | ||
+ | z.cal <- (mean(s2) - mean(p1))/ se.z | ||
+ | t.cal.os | ||
+ | z.cal | ||
+ | |||
+ | df.s2 <- length(s2)-1 | ||
+ | df.s2 | ||
+ | p.t.os <- pt(abs(t.cal.os), | ||
+ | p.t.os | ||
+ | t.out <- t.test(s2, mu=mean(p1)) | ||
+ | |||
+ | library(BSDA) | ||
+ | z.out <- z.test(s2, p1, sigma.x = sd(s2), sigma.y = sd(p1)) | ||
+ | |||
+ | z.out$statistic # se.z 대신에 se(s2) 값으로 구한 z 값 | ||
+ | z.cal # se.z으로 (sqrt(var(p1)/ | ||
+ | |||
+ | t.out$statistic # se(s2)를 분모로 하여 구한 t 값 | ||
+ | t.cal.os # se(s2)를 이용하여 손으로 구한 t 값 | ||
+ | |||
+ | # But, after all, we use t.test method regardless of | ||
+ | # variation | ||
+ | |||
+ | hist(means, | ||
+ | xlim = c(mean(means)-3*sd(means), | ||
+ | col = rgb(1, 0, 0, 0.5)) | ||
+ | abline(v=mean(p1), | ||
+ | abline(v=mean(s2), | ||
+ | | ||
+ | lo1 <- mean(p1)-se.z | ||
+ | hi1 <- mean(p1)+se.z | ||
+ | lo2 <- mean(p1)-2*se.z | ||
+ | hi2 <- mean(p1)+2*se.z | ||
+ | abline(v=c(lo1, | ||
+ | | ||
+ | | ||
+ | |||
+ | # difference between black and blue line | ||
+ | # divided by | ||
+ | # se(s2) (= random difference) | ||
+ | # t.value | ||
+ | mean(s2) | ||
+ | mean(p1) | ||
+ | diff <- mean(s2)-mean(p1) | ||
+ | diff | ||
+ | se(s2) | ||
+ | diff/se(s2) | ||
+ | |||
+ | t.cal.os | ||
+ | |||
+ | ######################## | ||
+ | # 2 sample t-test | ||
+ | ######################## | ||
+ | # 가정. 아래에서 추출하는 두 | ||
+ | # 샘플의 모집단의 파라미터를 | ||
+ | # 모른다. | ||
+ | s1 <- sample(p1, s.size) | ||
+ | s2 <- sample(p2, s.size) | ||
+ | |||
+ | mean(s1) | ||
+ | mean(s2) | ||
+ | ss(s1) | ||
+ | ss(s2) | ||
+ | df.s1 <- length(s1)-1 | ||
+ | df.s2 <- length(s2)-1 | ||
+ | df.s1 | ||
+ | df.s2 | ||
+ | |||
+ | pooled.variance <- (ss(s1)+ss(s2))/ | ||
+ | pooled.variance | ||
+ | se.ts <- sqrt((pooled.variance/ | ||
+ | se.ts | ||
+ | t.cal.ts <- (mean(s1)-mean(s2))/ | ||
+ | t.cal.ts | ||
+ | p.val.ts <- pt(abs(t.cal.ts), | ||
+ | p.val.ts | ||
+ | |||
+ | t.test(s1, s2, var.equal = T) | ||
+ | |||
+ | se(s1) | ||
+ | se(s2) | ||
+ | |||
+ | mean(s1)+c(-se(s1)*2, | ||
+ | mean(s2)+c(-se(s2)*2, | ||
+ | |||
+ | mean(p1) | ||
+ | mean(p2) | ||
+ | |||
+ | hist(s1, breaks=50, | ||
+ | col = rgb(1, 0, 0, 0.5)) | ||
+ | hist(s2, breaks=50, add=T, col=rgb(0, | ||
+ | abline(v=mean(s1), | ||
+ | # hist(s2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5)) | ||
+ | abline(v=mean(s2), | ||
+ | |||
+ | diff <- mean(s1)-mean(s2) | ||
+ | se.ts | ||
+ | diff/se.ts | ||
+ | |||
+ | #### | ||
+ | # repeated measure t-test | ||
+ | # we can use the above case | ||
+ | # pop paramter unknown | ||
+ | # two consecutive measurement | ||
+ | # for the same sample | ||
+ | |||
+ | t1 <- s1 | ||
+ | t2 <- s2 | ||
+ | mean(t1) | ||
+ | mean(t2) | ||
+ | diff.s <- t1 - t2 | ||
+ | diff.s | ||
+ | t.cal.rm <- mean(diff.s)/ | ||
+ | t.cal.rm | ||
+ | p.val.rm <- pt(abs(t.cal.rm), | ||
+ | p.val.rm | ||
+ | t.test(s1, s2, paired = T) | ||
+ | |||
+ | # create multiple histogram | ||
+ | s.all <- c(s1,s2) | ||
+ | mean(s.all) | ||
+ | hist(s1, col=' | ||
+ | hist(s2, col=' | ||
+ | abline(v=c(mean(s.all)), | ||
+ | | ||
+ | abline(v=c(mean(s1), | ||
+ | | ||
+ | |||
+ | comb <- data.frame(s1, | ||
+ | dat <- stack(comb) | ||
+ | head(dat) | ||
+ | |||
+ | m.tot <- mean(s.all) | ||
+ | m.s1 <- mean(s1) | ||
+ | m.s2 <- mean(s2) | ||
+ | |||
+ | ss.tot <- ss(s.all) | ||
+ | ss.s1 <- ss(s1) | ||
+ | ss.s2 <- ss(s2) | ||
+ | |||
+ | df.tot <- length(s.all)-1 | ||
+ | df.s1 <- length(s1)-1 | ||
+ | df.s2 <- length(s2)-1 | ||
+ | |||
+ | ms.tot <- var(s.all) | ||
+ | ms.tot | ||
+ | ss.tot/ | ||
+ | |||
+ | var(s1) | ||
+ | ss.s1 / df.s1 | ||
+ | |||
+ | var(s2) | ||
+ | ss.s2 / df.s2 | ||
+ | |||
+ | ss.b.s1 <- length(s1) * ((m.tot - m.s1)^2) | ||
+ | ss.b.s2 <- length(s2) * ((m.tot - m.s1)^2) | ||
+ | ss.bet <- ss.b.s1+ss.b.s2 | ||
+ | ss.bet | ||
+ | |||
+ | ss.wit <- ss.s1 + ss.s2 | ||
+ | ss.wit | ||
+ | |||
+ | ss.bet + ss.wit | ||
+ | ss.tot | ||
+ | |||
+ | library(dplyr) | ||
+ | # df.bet <- length(unique(dat)) - 1 | ||
+ | df.bet <- nlevels(dat$ind) - 1 | ||
+ | df.wit <- df.s1+df.s2 | ||
+ | df.bet | ||
+ | df.wit | ||
+ | df.bet+df.wit | ||
+ | df.tot | ||
+ | |||
+ | ms.bet <- ss.bet / df.bet | ||
+ | ms.wit <- ss.wit / df.wit | ||
+ | ms.bet | ||
+ | ms.wit | ||
+ | |||
+ | f.cal <- ms.bet / ms.wit | ||
+ | f.cal | ||
+ | pf(f.cal, df.bet, df.wit, lower.tail = F) | ||
+ | |||
+ | |||
+ | f.test <- aov(dat$values~ dat$ind, data = dat) | ||
+ | summary(f.test) | ||
+ | |||
+ | sqrt(f.cal) | ||
+ | t.cal.ts | ||
+ | |||
+ | # this is anova after all. | ||
+ | </ | ||
+ | |||
+ | ====== output ====== | ||
+ | < | ||
+ | > rm(list=ls()) | ||
+ | > rnorm2 <- function(n, | ||
+ | + | ||
+ | + } | ||
+ | > se <- function(sample) { | ||
+ | + | ||
+ | + } | ||
+ | > ss <- function(x) { | ||
+ | + | ||
+ | + } | ||
+ | > | ||
+ | > N.p <- 1000000 | ||
+ | > m.p <- 100 | ||
+ | > sd.p <- 10 | ||
+ | > | ||
+ | > | ||
+ | > p1 <- rnorm2(N.p, m.p, sd.p) | ||
+ | > mean(p1) | ||
+ | [1] 100 | ||
+ | > sd(p1) | ||
+ | [1] 10 | ||
+ | > | ||
+ | > p2 <- rnorm2(N.p, m.p+10, sd.p) | ||
+ | > mean(p2) | ||
+ | [1] 110 | ||
+ | > sd(p2) | ||
+ | [1] 10 | ||
+ | > | ||
+ | > hist(p1, breaks=50, col = rgb(1, 0, 0, 0.5), | ||
+ | + main = " | ||
+ | > abline(v=mean(p1), | ||
+ | > hist(p2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5)) | ||
+ | > abline(v=mean(p2), | ||
+ | > | ||
+ | > s.size <- 1000 | ||
+ | > s2 <- sample(p2, s.size) | ||
+ | > mean(s2) | ||
+ | [1] 110.4892 | ||
+ | > sd(s2) | ||
+ | [1] 10.36614 | ||
+ | > | ||
+ | > se.z <- sqrt(var(p1)/ | ||
+ | > se.z <- c(se.z) | ||
+ | > se.z.range <- c(-2*se.z, | ||
+ | > se.z.range | ||
+ | [1] -0.6324555 | ||
+ | > | ||
+ | > mean(p1)+se.z.range | ||
+ | [1] 99.36754 100.63246 | ||
+ | > mean(s2) | ||
+ | [1] 110.4892 | ||
+ | > | ||
+ | > z.cal <- (mean(s2) - mean(p1)) / se.z | ||
+ | > z.cal | ||
+ | [1] 33.16976 | ||
+ | > pnorm(z.cal, | ||
+ | [1] 2.93954e-241 | ||
+ | > | ||
+ | > z.cal | ||
+ | [1] 33.16976 | ||
+ | > | ||
+ | > # principles . . . | ||
+ | > # distribution of sample means | ||
+ | > iter <- 100000 | ||
+ | > means <- c() | ||
+ | > for (i in 1:iter) { | ||
+ | + | ||
+ | + means <- append(means, | ||
+ | + } | ||
+ | > | ||
+ | > hist(means, | ||
+ | + xlim = c(mean(means)-3*sd(means), | ||
+ | + col = rgb(1, 0, 0, 0.5)) | ||
+ | > abline(v=mean(p1), | ||
+ | > abline(v=mean(s2), | ||
+ | + col=" | ||
+ | > lo1 <- mean(p1)-se.z | ||
+ | > hi1 <- mean(p1)+se.z | ||
+ | > lo2 <- mean(p1)-2*se.z | ||
+ | > hi2 <- mean(p1)+2*se.z | ||
+ | > abline(v=c(lo1, | ||
+ | + col=c(" | ||
+ | + lwd=2) | ||
+ | > se.z | ||
+ | [1] 0.3162278 | ||
+ | > c(lo2, hi2) | ||
+ | [1] 99.36754 100.63246 | ||
+ | > pnorm(z.cal, | ||
+ | [1] 2.93954e-241 | ||
+ | > | ||
+ | > | ||
+ | > # Note that we use sqrt(var(s2)/ | ||
+ | > # as our se value instread of | ||
+ | > # sqrt(var(p1)/ | ||
+ | > # This is a common practice for R | ||
+ | > # In fact, some z.test (made by someone) | ||
+ | > # function uses the former rather than | ||
+ | > # latter. | ||
+ | > | ||
+ | > sqrt(var(p1)/ | ||
+ | [,1] | ||
+ | [1,] 0.3162278 | ||
+ | > se.z | ||
+ | [1] 0.3162278 | ||
+ | > | ||
+ | > sqrt(var(s2)/ | ||
+ | [1] 0.3278061 | ||
+ | > se(s2) | ||
+ | [1] 0.3278061 | ||
+ | > | ||
+ | > t.cal.os <- (mean(s2) - mean(p1))/ se(s2) | ||
+ | > z.cal <- (mean(s2) - mean(p1))/ se.z | ||
+ | > t.cal.os | ||
+ | [1] 31.99818 | ||
+ | > z.cal | ||
+ | [1] 33.16976 | ||
+ | > | ||
+ | > df.s2 <- length(s2)-1 | ||
+ | > df.s2 | ||
+ | [1] 999 | ||
+ | > p.t.os <- pt(abs(t.cal.os), | ||
+ | > p.t.os | ||
+ | [1] 3.162139e-155 | ||
+ | > t.out <- t.test(s2, mu=mean(p1)) | ||
+ | > | ||
+ | > library(BSDA) | ||
+ | 필요한 패키지를 로딩중입니다: | ||
+ | |||
+ | 다음의 패키지를 부착합니다: | ||
+ | |||
+ | The following object is masked from ‘package: | ||
+ | |||
+ | Orange | ||
+ | |||
+ | 경고메시지(들): | ||
+ | 패키지 ‘BSDA’는 R 버전 4.3.3에서 작성되었습니다 | ||
+ | > z.out <- z.test(s2, p1, sigma.x = sd(s2), sigma.y = sd(p1)) | ||
+ | > | ||
+ | > z.out$statistic # se.z 대신에 se(s2) 값으로 구한 z 값 | ||
+ | z | ||
+ | 31.9833 | ||
+ | > z.cal # se.z으로 (sqrt(var(p1)/ | ||
+ | [1] 33.16976 | ||
+ | > | ||
+ | > t.out$statistic # se(s2)를 분모로 하여 구한 t 값 | ||
+ | | ||
+ | 31.99818 | ||
+ | > t.cal.os # se(s2)를 이용하여 손으로 구한 t 값 | ||
+ | [1] 31.99818 | ||
+ | > | ||
+ | > # But, after all, we use t.test method regardless of | ||
+ | > # variation | ||
+ | > | ||
+ | > hist(means, | ||
+ | + xlim = c(mean(means)-3*sd(means), | ||
+ | + col = rgb(1, 0, 0, 0.5)) | ||
+ | > abline(v=mean(p1), | ||
+ | > abline(v=mean(s2), | ||
+ | + col=" | ||
+ | > lo1 <- mean(p1)-se.z | ||
+ | > hi1 <- mean(p1)+se.z | ||
+ | > lo2 <- mean(p1)-2*se.z | ||
+ | > hi2 <- mean(p1)+2*se.z | ||
+ | > abline(v=c(lo1, | ||
+ | + col=c(" | ||
+ | + lwd=2) | ||
+ | > | ||
+ | > # difference between black and blue line | ||
+ | > # divided by | ||
+ | > # se(s2) (= random difference) | ||
+ | > # t.value | ||
+ | > mean(s2) | ||
+ | [1] 110.4892 | ||
+ | > mean(p1) | ||
+ | [1] 100 | ||
+ | > diff <- mean(s2)-mean(p1) | ||
+ | > diff | ||
+ | [1] 10.4892 | ||
+ | > se(s2) | ||
+ | [1] 0.3278061 | ||
+ | > diff/se(s2) | ||
+ | [1] 31.99818 | ||
+ | > | ||
+ | > t.cal.os | ||
+ | [1] 31.99818 | ||
+ | > | ||
+ | > ######################## | ||
+ | > # 2 sample t-test | ||
+ | > ######################## | ||
+ | > # 가정. 아래에서 추출하는 두 | ||
+ | > # 샘플의 모집단의 파라미터를 | ||
+ | > # 모른다. | ||
+ | > s1 <- sample(p1, s.size) | ||
+ | > s2 <- sample(p2, s.size) | ||
+ | > | ||
+ | > mean(s1) | ||
+ | [1] 100.7138 | ||
+ | > mean(s2) | ||
+ | [1] 109.8804 | ||
+ | > ss(s1) | ||
+ | [1] 108288.9 | ||
+ | > ss(s2) | ||
+ | [1] 100751 | ||
+ | > df.s1 <- length(s1)-1 | ||
+ | > df.s2 <- length(s2)-1 | ||
+ | > df.s1 | ||
+ | [1] 999 | ||
+ | > df.s2 | ||
+ | [1] 999 | ||
+ | > | ||
+ | > pooled.variance <- (ss(s1)+ss(s2))/ | ||
+ | > pooled.variance | ||
+ | [1] 104.6246 | ||
+ | > se.ts <- sqrt((pooled.variance/ | ||
+ | > se.ts | ||
+ | [1] 0.4574376 | ||
+ | > t.cal.ts <- (mean(s1)-mean(s2))/ | ||
+ | > t.cal.ts | ||
+ | [1] -20.03892 | ||
+ | > p.val.ts <- pt(abs(t.cal.ts), | ||
+ | > p.val.ts | ||
+ | [1] 1.522061e-81 | ||
+ | > | ||
+ | > t.test(s1, s2, var.equal = T) | ||
+ | |||
+ | Two Sample t-test | ||
+ | |||
+ | data: s1 and s2 | ||
+ | t = -20.039, df = 1998, p-value < 2.2e-16 | ||
+ | alternative hypothesis: true difference in means is not equal to 0 | ||
+ | 95 percent confidence interval: | ||
+ | | ||
+ | sample estimates: | ||
+ | mean of x mean of y | ||
+ | | ||
+ | |||
+ | > | ||
+ | > se(s1) | ||
+ | [1] 0.3292374 | ||
+ | > se(s2) | ||
+ | [1] 0.3175719 | ||
+ | > | ||
+ | > mean(s1)+c(-se(s1)*2, | ||
+ | [1] 100.0553 101.3723 | ||
+ | > mean(s2)+c(-se(s2)*2, | ||
+ | [1] 109.2452 110.5155 | ||
+ | > | ||
+ | > mean(p1) | ||
+ | [1] 100 | ||
+ | > mean(p2) | ||
+ | [1] 110 | ||
+ | > | ||
+ | > hist(s1, breaks=50, | ||
+ | + col = rgb(1, 0, 0, 0.5)) | ||
+ | > hist(s2, breaks=50, add=T, col=rgb(0, | ||
+ | > abline(v=mean(s1), | ||
+ | > # hist(s2, breaks=50, add=TRUE, col = rgb(0, 0, 1, 0.5)) | ||
+ | > abline(v=mean(s2), | ||
+ | > | ||
+ | > diff <- mean(s1)-mean(s2) | ||
+ | > se.ts | ||
+ | [1] 0.4574376 | ||
+ | > diff/se.ts | ||
+ | [1] -20.03892 | ||
+ | > | ||
+ | > #### | ||
+ | > # repeated measure t-test | ||
+ | > # we can use the above case | ||
+ | > # pop paramter unknown | ||
+ | > # two consecutive measurement | ||
+ | > # for the same sample | ||
+ | > | ||
+ | > t1 <- s1 | ||
+ | > t2 <- s2 | ||
+ | > mean(t1) | ||
+ | [1] 100.7138 | ||
+ | > mean(t2) | ||
+ | [1] 109.8804 | ||
+ | > diff.s <- t1 - t2 | ||
+ | > diff.s | ||
+ | [1] -11.22445947 | ||
+ | | ||
+ | [15] | ||
+ | [22] -1.00704696 | ||
+ | [29] -10.58244069 -21.50608157 -53.61230898 | ||
+ | [36] | ||
+ | [43] -19.16855657 | ||
+ | [50] | ||
+ | [57] -8.28743679 -38.33995044 -50.60884456 | ||
+ | [64] | ||
+ | [71] -20.50728324 -19.37275012 -23.27866089 -11.74962053 -34.35317908 -26.10460199 | ||
+ | [78] -24.79252799 | ||
+ | [85] -23.66447959 | ||
+ | [92] -21.22842221 -11.68774036 | ||
+ | [99] 19.63435733 -14.40578016 -11.24172079 | ||
+ | | ||
+ | [113] -10.23092427 | ||
+ | [120] -31.50478963 | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | [183] -38.63379525 -16.58800530 -17.62672134 | ||
+ | [190] -22.25276559 | ||
+ | [197] -17.21120659 | ||
+ | | ||
+ | | ||
+ | [218] -24.41623403 | ||
+ | [225] -13.27091592 | ||
+ | [232] -11.52528966 | ||
+ | [239] -13.34969773 -37.98584006 | ||
+ | [246] -24.07983488 | ||
+ | [253] -12.30101864 | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | [288] -10.10160879 -11.59352210 | ||
+ | | ||
+ | [302] -35.18133234 | ||
+ | | ||
+ | | ||
+ | [323] -31.81200194 -12.67414744 | ||
+ | | ||
+ | [337] -17.60471709 | ||
+ | [344] -10.52672545 | ||
+ | | ||
+ | [358] -23.18199896 -18.58488442 -26.09162223 | ||
+ | | ||
+ | | ||
+ | [379] -37.25886118 | ||
+ | [386] -10.17057116 -11.45984282 -11.29130871 -15.60613249 | ||
+ | [393] -21.77681110 | ||
+ | | ||
+ | [407] -17.52668230 | ||
+ | [414] -14.30821541 -10.88427284 | ||
+ | | ||
+ | [428] -23.35919604 | ||
+ | | ||
+ | [442] -19.75974381 | ||
+ | | ||
+ | | ||
+ | [463] -19.77773032 | ||
+ | | ||
+ | | ||
+ | [484] -18.62856464 -25.58573774 -11.07888550 -16.77332405 | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | [519] -27.58434557 | ||
+ | [526] -44.00039083 -11.40638340 -10.42772214 | ||
+ | [533] -27.11548294 -26.69534077 | ||
+ | | ||
+ | [547] -18.87000360 | ||
+ | [554] -17.69850124 -11.92887098 | ||
+ | | ||
+ | | ||
+ | | ||
+ | [582] -12.87197389 -27.14630137 -14.53291112 | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | [645] -42.02480771 -13.42077241 | ||
+ | [652] -17.68303368 | ||
+ | [659] -19.48957914 -28.88885682 -10.59583907 -32.20537872 -11.95661953 -23.77887711 -23.51551361 | ||
+ | [666] -17.32443690 -10.86310515 | ||
+ | [673] -12.41671968 | ||
+ | | ||
+ | [687] -21.75000477 | ||
+ | [694] -34.30010114 | ||
+ | [701] -19.46561809 | ||
+ | [708] -27.91638644 -12.61381331 | ||
+ | [715] -20.55376627 | ||
+ | [722] -14.38944775 | ||
+ | | ||
+ | [736] -20.25705972 | ||
+ | [743] -13.32818441 | ||
+ | | ||
+ | [757] -19.16011286 -13.91801221 -11.66649472 | ||
+ | [764] -14.75337241 | ||
+ | | ||
+ | [778] -34.01869977 | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | [813] -14.60867384 | ||
+ | [820] -13.93270232 -20.21993468 | ||
+ | [827] -29.04193544 -24.04102844 -14.01878071 | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | [862] -30.93593593 | ||
+ | | ||
+ | [876] -14.65449874 -25.51320775 -35.73124575 | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | [918] -21.82098554 | ||
+ | [925] -26.29548705 -28.39677088 | ||
+ | [932] -11.67413566 -11.59680714 | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | [967] -18.49661342 -12.30226946 | ||
+ | [974] -11.26368153 | ||
+ | [981] -25.17421015 -10.35273557 | ||
+ | | ||
+ | [995] -22.27223290 | ||
+ | > t.cal.rm <- mean(diff.s)/ | ||
+ | > t.cal.rm | ||
+ | [1] -19.82846 | ||
+ | > p.val.rm <- pt(abs(t.cal.rm), | ||
+ | > p.val.rm | ||
+ | [1] 4.836389e-74 | ||
+ | > t.test(s1, s2, paired = T) | ||
+ | |||
+ | Paired t-test | ||
+ | |||
+ | data: s1 and s2 | ||
+ | t = -19.828, df = 999, p-value < 2.2e-16 | ||
+ | alternative hypothesis: true mean difference is not equal to 0 | ||
+ | 95 percent confidence interval: | ||
+ | | ||
+ | sample estimates: | ||
+ | mean difference | ||
+ | -9.166555 | ||
+ | |||
+ | > | ||
+ | > # create multiple histogram | ||
+ | > s.all <- c(s1,s2) | ||
+ | > mean(s.all) | ||
+ | [1] 105.2971 | ||
+ | > hist(s1, col=' | ||
+ | > hist(s2, col=' | ||
+ | > abline(v=c(mean(s.all)), | ||
+ | + col=c(" | ||
+ | > abline(v=c(mean(s1), | ||
+ | + col=c(" | ||
+ | > | ||
+ | > comb <- data.frame(s1, | ||
+ | > dat <- stack(comb) | ||
+ | > head(dat) | ||
+ | | ||
+ | 1 93.17788 | ||
+ | 2 103.00254 | ||
+ | 3 104.53388 | ||
+ | 4 88.59698 | ||
+ | 5 105.67789 | ||
+ | 6 112.72657 | ||
+ | > | ||
+ | > m.tot <- mean(s.all) | ||
+ | > m.s1 <- mean(s1) | ||
+ | > m.s2 <- mean(s2) | ||
+ | > | ||
+ | > ss.tot <- ss(s.all) | ||
+ | > ss.s1 <- ss(s1) | ||
+ | > ss.s2 <- ss(s2) | ||
+ | > | ||
+ | > df.tot <- length(s.all)-1 | ||
+ | > df.s1 <- length(s1)-1 | ||
+ | > df.s2 <- length(s2)-1 | ||
+ | > | ||
+ | > ms.tot <- var(s.all) | ||
+ | > ms.tot | ||
+ | [1] 125.5892 | ||
+ | > ss.tot/ | ||
+ | [1] 125.5892 | ||
+ | > | ||
+ | > var(s1) | ||
+ | [1] 108.3973 | ||
+ | > ss.s1 / df.s1 | ||
+ | [1] 108.3973 | ||
+ | > | ||
+ | > var(s2) | ||
+ | [1] 100.8519 | ||
+ | > ss.s2 / df.s2 | ||
+ | [1] 100.8519 | ||
+ | > | ||
+ | > ss.b.s1 <- length(s1) * ((m.tot - m.s1)^2) | ||
+ | > ss.b.s2 <- length(s2) * ((m.tot - m.s1)^2) | ||
+ | > ss.bet <- ss.b.s1+ss.b.s2 | ||
+ | > ss.bet | ||
+ | [1] 42012.87 | ||
+ | > | ||
+ | > ss.wit <- ss.s1 + ss.s2 | ||
+ | > ss.wit | ||
+ | [1] 209039.9 | ||
+ | > | ||
+ | > ss.bet + ss.wit | ||
+ | [1] 251052.8 | ||
+ | > ss.tot | ||
+ | [1] 251052.8 | ||
+ | > | ||
+ | > library(dplyr) | ||
+ | |||
+ | 다음의 패키지를 부착합니다: | ||
+ | |||
+ | The following objects are masked from ‘package: | ||
+ | |||
+ | filter, lag | ||
+ | |||
+ | The following objects are masked from ‘package: | ||
+ | |||
+ | intersect, setdiff, setequal, union | ||
+ | > # df.bet <- length(unique(dat)) - 1 | ||
+ | > df.bet <- nlevels(dat$ind) - 1 | ||
+ | > df.wit <- df.s1+df.s2 | ||
+ | > df.bet | ||
+ | [1] 1 | ||
+ | > df.wit | ||
+ | [1] 1998 | ||
+ | > df.bet+df.wit | ||
+ | [1] 1999 | ||
+ | > df.tot | ||
+ | [1] 1999 | ||
+ | > | ||
+ | > ms.bet <- ss.bet / df.bet | ||
+ | > ms.wit <- ss.wit / df.wit | ||
+ | > ms.bet | ||
+ | [1] 42012.87 | ||
+ | > ms.wit | ||
+ | [1] 104.6246 | ||
+ | > | ||
+ | > f.cal <- ms.bet / ms.wit | ||
+ | > f.cal | ||
+ | [1] 401.5582 | ||
+ | > pf(f.cal, df.bet, df.wit, lower.tail = F) | ||
+ | [1] 1.522061e-81 | ||
+ | > | ||
+ | > | ||
+ | > f.test <- aov(dat$values~ dat$ind, data = dat) | ||
+ | > summary(f.test) | ||
+ | Df Sum Sq Mean Sq F value Pr(> | ||
+ | dat$ind | ||
+ | Residuals | ||
+ | --- | ||
+ | Signif. codes: | ||
+ | > | ||
+ | > sqrt(f.cal) | ||
+ | [1] 20.03892 | ||
+ | > t.cal.ts | ||
+ | [1] -20.03892 | ||
+ | > | ||
+ | > # this is anova after all. | ||
+ | > | ||
+ | </ | ||
+ |
note.w02.1757554535.txt.gz · Last modified: 2025/09/11 10:35 by hkimscil