r:general_statistics
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revisionLast revisionBoth sides next revision | ||
r:general_statistics [2017/10/30 08:59] – [9.1. Summarizing Your Data] hkimscil | r:general_statistics [2019/10/11 07:53] – [Forming a Confidence Interval for a Mean] hkimscil | ||
---|---|---|---|
Line 77: | Line 77: | ||
====== Calculating Relative Frequencies ====== | ====== Calculating Relative Frequencies ====== | ||
< | < | ||
- | > mean(Cars93$MPG.city > 14) # see the summary(Cars93$MPG.city) the above | + | > mean(Cars93$MPG.city > 14) # see the summary(Cars93$MPG.city) the above = 100%, min = 15이므로 |
[1] 1 | [1] 1 | ||
Line 181: | Line 181: | ||
</ | </ | ||
+ | < | ||
+ | > cardata <- data.frame(Cars93$Origin, | ||
+ | > cardata | ||
+ | | ||
+ | 1 non-USA | ||
+ | 2 non-USA | ||
+ | 3 non-USA | ||
+ | 4 non-USA | ||
+ | 5 non-USA | ||
+ | 6 USA | ||
+ | 7 USA Large | ||
+ | 8 USA Large | ||
+ | 9 USA | ||
+ | 10 | ||
+ | 11 | ||
+ | 12 | ||
+ | 13 | ||
+ | 14 | ||
+ | 15 | ||
+ | 16 | ||
+ | 17 | ||
+ | 18 | ||
+ | 19 | ||
+ | 20 | ||
+ | 21 | ||
+ | 22 | ||
+ | 23 | ||
+ | 24 | ||
+ | 25 | ||
+ | 26 | ||
+ | 27 | ||
+ | 28 | ||
+ | 29 | ||
+ | 30 | ||
+ | 31 | ||
+ | 32 | ||
+ | 33 | ||
+ | 34 | ||
+ | 35 | ||
+ | 36 | ||
+ | 37 | ||
+ | 38 | ||
+ | 39 | ||
+ | 40 | ||
+ | 41 | ||
+ | 42 | ||
+ | 43 | ||
+ | 44 | ||
+ | 45 | ||
+ | 46 | ||
+ | 47 | ||
+ | 48 | ||
+ | 49 | ||
+ | 50 | ||
+ | 51 | ||
+ | 52 | ||
+ | 53 | ||
+ | 54 | ||
+ | 55 | ||
+ | 56 | ||
+ | 57 | ||
+ | 58 | ||
+ | 59 | ||
+ | 60 | ||
+ | 61 | ||
+ | 62 | ||
+ | 63 | ||
+ | 64 | ||
+ | 65 | ||
+ | 66 | ||
+ | 67 | ||
+ | 68 | ||
+ | 69 | ||
+ | 70 | ||
+ | 71 | ||
+ | 72 | ||
+ | 73 | ||
+ | 74 | ||
+ | 75 | ||
+ | 76 | ||
+ | 77 | ||
+ | 78 | ||
+ | 79 | ||
+ | 80 | ||
+ | 81 | ||
+ | 82 | ||
+ | 83 | ||
+ | 84 | ||
+ | 85 | ||
+ | 86 | ||
+ | 87 | ||
+ | 88 | ||
+ | 89 | ||
+ | 90 | ||
+ | 91 | ||
+ | 92 | ||
+ | 93 | ||
+ | > cartbl <- table(cardata) | ||
+ | > cartbl | ||
+ | | ||
+ | Cars93.Origin Compact Large Midsize Small Sporty Van | ||
+ | USA | ||
+ | non-USA | ||
+ | > summary(cartbl) | ||
+ | Number of cases in table: 93 | ||
+ | Number of factors: 2 | ||
+ | Test for independence of all factors: | ||
+ | Chisq = 14.08, df = 5, p-value = 0.01511 | ||
+ | Chi-squared approximation may be incorrect | ||
+ | > chisq.test(cartbl) | ||
+ | |||
+ | Pearson' | ||
+ | |||
+ | data: cartbl | ||
+ | X-squared = 14.08, df = 5, p-value = 0.01511 | ||
+ | |||
+ | Warning message: | ||
+ | In chisq.test(cartbl) : 카이제곱 approximation은 정확하지 않을수도 있습니다 | ||
+ | > | ||
+ | </ | ||
====== Calculating Quantiles (and Quartiles) of a Dataset ====== | ====== Calculating Quantiles (and Quartiles) of a Dataset ====== | ||
Line 200: | Line 320: | ||
< | < | ||
+ | > dur > mean(dur) | ||
+ | [1] TRUE FALSE FALSE FALSE TRUE FALSE TRUE TRUE FALSE TRUE FALSE TRUE TRUE FALSE | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | | ||
+ | [99] FALSE TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE FALSE | ||
+ | [113] TRUE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE TRUE | ||
+ | [127] FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE | ||
+ | [141] TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE | ||
+ | [155] TRUE TRUE TRUE TRUE FALSE TRUE FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE | ||
+ | [169] FALSE TRUE FALSE FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE FALSE TRUE | ||
+ | [183] TRUE TRUE FALSE TRUE TRUE FALSE TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE | ||
+ | [197] TRUE TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE | ||
+ | [211] FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE | ||
+ | [225] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE FALSE TRUE FALSE FALSE TRUE | ||
+ | [239] TRUE FALSE TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE FALSE TRUE FALSE TRUE | ||
+ | [253] TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE FALSE | ||
+ | [267] TRUE TRUE FALSE TRUE FALSE TRUE | ||
> mean(dur > mean(dur)) | > mean(dur > mean(dur)) | ||
[1] 0.6176471 | [1] 0.6176471 | ||
Line 284: | Line 425: | ||
> round(mean(zdur)) | > round(mean(zdur)) | ||
[1] 0 | [1] 0 | ||
+ | > round(sd(zdur)) | ||
+ | [1] 1 | ||
+ | |||
</ | </ | ||
Line 353: | Line 497: | ||
====== Forming a Confidence Interval for a Mean ====== | ====== Forming a Confidence Interval for a Mean ====== | ||
- | < | + | < |
+ | > set.seed(1024) | ||
+ | > x <- rnorm(50, mean=100, sd=15) | ||
+ | > s <- sd(x) | ||
> m <- mean(x) | > m <- mean(x) | ||
> n <- length(x) | > n <- length(x) | ||
Line 365: | Line 512: | ||
> SE | > SE | ||
[1] 2.458358 | [1] 2.458358 | ||
- | > E <- qt(.975, df=n-1)*SE | + | ## qt fun: qt(prob, df) zscore 2점에 해당하는 점수는? |
+ | > qtv <- qt(.975, df=n-1) | ||
+ | > qtv | ||
+ | [1] | ||
+ | ## qtv는 2에 해당하는 95퍼센트 CL | ||
+ | ## 이 때의 CI는 | ||
+ | > E <- qtv*SE | ||
> E | > E | ||
[1] 4.940254 | [1] 4.940254 | ||
Line 454: | Line 607: | ||
mpg.auto = mtcars[L, | mpg.auto = mtcars[L, | ||
mpg.auto | mpg.auto | ||
- | [1] 21.4 18.7 18.1 14.3 24.4 ... | + | [1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4 10.4 14.7 21.5 15.5 15.2 |
+ | [18] 13.3 19.2 | ||
mpg.manual = mtcars[!L, | mpg.manual = mtcars[!L, | ||
mpg.manual | mpg.manual | ||
- | [1] 21.0 21.0 22.8 32.4 30.4 ... | + | [1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0 21.4 |
t.test(mpg.auto, | t.test(mpg.auto, | ||
Line 493: | Line 647: | ||
Another eg. | Another eg. | ||
- | < | + | <code>> a = c(175, 168, 168, 190, 156, 181, 182, 175, 174, 179) |
- | b = c(185, 169, 173, 173, 188, 186, 175, 174, 179, 180) | + | > b = c(185, 169, 173, 173, 188, 186, 175, 174, 179, 180) |
</ | </ | ||
- | < | + | <code>> t.test(a,b, var.equal=TRUE, |
Two Sample t-test | Two Sample t-test | ||
Line 511: | Line 665: | ||
- | qt(0.975, 18) | + | > qt(0.975, 18) |
[1] 2.100922 | [1] 2.100922 | ||
</ | </ | ||
- | < | + | <code>> var.test(a, |
F test to compare two variances | F test to compare two variances | ||
Line 529: | Line 683: | ||
- | qf(0.95, 9, 9) | + | > qf(0.95, 9, 9) |
[1] 3.178893 | [1] 3.178893 | ||
Line 540: | Line 694: | ||
===== e.g., ===== | ===== e.g., ===== | ||
< | < | ||
- | #> extra group ID | + | > extra group ID |
- | #> 1 0.7 | + | > 1 0.7 |
- | #> 2 | + | > 2 |
- | #> 3 | + | > 3 |
- | #> 4 | + | > 4 |
- | #> 5 | + | > 5 |
- | #> 6 3.4 | + | > 6 3.4 |
- | #> 7 3.7 | + | > 7 3.7 |
- | #> 8 0.8 | + | > 8 0.8 |
- | #> 9 0.0 | + | > 9 0.0 |
- | #> 10 | + | > 10 |
- | #> 11 | + | > 11 |
- | #> 12 | + | > 12 |
- | #> 13 | + | > 13 |
- | #> 14 | + | > 14 |
- | #> 15 -0.1 | + | > 15 -0.1 |
- | #> 16 | + | > 16 |
- | #> 17 | + | > 17 |
- | #> 18 | + | > 18 |
- | #> 19 | + | > 19 |
- | #> 20 | + | > 20 |
</ | </ | ||
- | < | + | <code>> sleep_wide <- data.frame( |
ID=1:10, | ID=1:10, | ||
group1=sleep$extra[1: | group1=sleep$extra[1: | ||
Line 568: | Line 722: | ||
) | ) | ||
sleep_wide | sleep_wide | ||
- | #> ID group1 group2 | + | > ID group1 group2 |
- | #> 1 | + | > 1 |
- | #> 2 | + | > 2 |
- | #> 3 | + | > 3 |
- | #> 4 | + | > 4 |
- | #> 5 | + | > 5 |
- | #> 6 | + | > 6 |
- | #> 7 | + | > 7 |
- | #> 8 | + | > 8 |
- | #> 9 | + | > 9 |
- | #> 10 10 2.0 3.4 | + | > 10 10 2.0 3.4 |
</ | </ | ||
Ignore the ID variable for a convenience. | Ignore the ID variable for a convenience. | ||
Line 585: | Line 739: | ||
# Welch t-test | # Welch t-test | ||
t.test(extra ~ group, sleep) | t.test(extra ~ group, sleep) | ||
- | + | > | |
- | #> | + | > Welch Two Sample t-test |
- | #> Welch Two Sample t-test | + | > |
- | #> | + | > data: extra by group |
- | #> data: extra by group | + | > t = -1.8608, df = 17.776, p-value = 0.07939 |
- | #> t = -1.8608, df = 17.776, p-value = 0.07939 | + | > alternative hypothesis: true difference in means is not equal to 0 |
- | #> alternative hypothesis: true difference in means is not equal to 0 | + | > 95 percent confidence interval: |
- | #> 95 percent confidence interval: | + | > -3.3654832 |
- | #> -3.3654832 | + | > sample estimates: |
- | #> sample estimates: | + | > mean in group 1 mean in group 2 |
- | #> mean in group 1 mean in group 2 | + | > 0.75 2.33 |
- | #> 0.75 2.33 | + | |
# Same for wide data (two separate vectors) | # Same for wide data (two separate vectors) | ||
- | # t.test(sleep_wide$group1, | + | > t.test(sleep_wide$group1, |
</ | </ | ||
Line 606: | Line 759: | ||
< | < | ||
# Student t-test | # Student t-test | ||
- | t.test(extra ~ group, sleep, var.equal=TRUE) | + | > t.test(extra ~ group, sleep, var.equal=TRUE) |
- | #> | + | > |
- | #> Two Sample t-test | + | > Two Sample t-test |
- | #> | + | > |
- | #> data: extra by group | + | > data: extra by group |
- | #> t = -1.8608, df = 18, p-value = 0.07919 | + | > t = -1.8608, df = 18, p-value = 0.07919 |
- | #> alternative hypothesis: true difference in means is not equal to 0 | + | > alternative hypothesis: true difference in means is not equal to 0 |
- | #> 95 percent confidence interval: | + | > 95 percent confidence interval: |
- | #> -3.363874 | + | > -3.363874 |
- | #> sample estimates: | + | > sample estimates: |
- | #> mean in group 1 mean in group 2 | + | > mean in group 1 mean in group 2 |
- | #> 0.75 2.33 | + | > |
</ | </ | ||
- | < | + | < |
- | # t.test(sleep_wide$group1, | + | > t.test(sleep_wide$group1, |
</ | </ | ||
Line 630: | Line 783: | ||
< | < | ||
# Sort by group then ID | # Sort by group then ID | ||
- | sleep <- sleep[order(sleep$group, | + | > sleep <- sleep[order(sleep$group, |
# Paired t-test | # Paired t-test | ||
- | t.test(extra ~ group, sleep, paired=TRUE) | + | > t.test(extra ~ group, sleep, paired=TRUE) |
- | #> | + | |
- | #> Paired t-test | + | |
- | #> | + | |
- | #> data: extra by group | + | |
- | #> t = -4.0621, df = 9, p-value = 0.002833 | + | t = -4.0621, df = 9, p-value = 0.002833 |
- | #> alternative hypothesis: true difference in means is not equal to 0 | + | |
- | #> 95 percent confidence interval: | + | 95 percent confidence interval: |
- | #> | + | -2.4598858 -0.7001142 |
- | #> sample estimates: | + | |
- | #> mean of the differences | + | mean of the differences |
- | #> -1.58 | + | -1.58 |
</ | </ | ||
< | < | ||
- | # t.test(sleep.wide$group1, | + | > t.test(sleep.wide$group1, |
+ | |||
+ | Paired t-test | ||
+ | |||
+ | data: sleep_wide$group1 and sleep_wide$group2 | ||
+ | t = -4.0621, df = 9, p-value = 0.002833 | ||
+ | alternative hypothesis: true difference in means is not equal to 0 | ||
+ | 95 percent confidence interval: | ||
+ | | ||
+ | sample estimates: | ||
+ | mean of the differences | ||
+ | -1.58 | ||
</ | </ | ||
The paired t-test is equivalent to testing whether difference between each pair of observations has a population mean of 0. (See below for comparing a single group to a population mean.) | The paired t-test is equivalent to testing whether difference between each pair of observations has a population mean of 0. (See below for comparing a single group to a population mean.) | ||
- | < | + | <code>> t.test(sleep_wide$group1 - sleep_wide$group2, mu=0, var.equal=TRUE) |
- | #> Error in t.test(sleep.wide$group1 - sleep.wide$group2, | + | |
+ | One Sample | ||
+ | |||
+ | data: sleep_wide$group1 - sleep_wide$group2 | ||
+ | t = -4.0621, df = 9, p-value = 0.002833 | ||
+ | alternative hypothesis: true mean is not equal to 0 | ||
+ | 95 percent confidence interval: | ||
+ | -2.4598858 -0.7001142 | ||
+ | sample estimates: | ||
+ | mean of x | ||
+ | -1.58 | ||
</ | </ | ||
Line 663: | Line 839: | ||
< | < | ||
t.test(sleep$extra, | t.test(sleep$extra, | ||
- | #> | + | > |
- | #> One Sample t-test | + | > One Sample t-test |
- | #> | + | > |
- | #> data: sleep$extra | + | > data: sleep$extra |
- | #> t = 3.413, df = 19, p-value = 0.002918 | + | > t = 3.413, df = 19, p-value = 0.002918 |
- | #> alternative hypothesis: true mean is not equal to 0 | + | > alternative hypothesis: true mean is not equal to 0 |
- | #> 95 percent confidence interval: | + | > 95 percent confidence interval: |
- | #> 0.5955845 2.4844155 | + | > 0.5955845 2.4844155 |
- | #> sample estimates: | + | > sample estimates: |
- | #> mean of x | + | > mean of x |
- | #> 1.54 | + | > 1.54 |
</ | </ | ||
r/general_statistics.txt · Last modified: 2019/10/11 07:56 by hkimscil