r:general_statistics
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| r:general_statistics [2016/11/02 01:31] – [e.g.,] hkimscil | r:general_statistics [2019/10/10 22:56] (current) – [Forming a Confidence Interval for a Mean] hkimscil | ||
|---|---|---|---|
| Line 53: | Line 53: | ||
| < | < | ||
| + | suburbs <- read.csv(" | ||
| </ | </ | ||
| Line 76: | Line 77: | ||
| ====== Calculating Relative Frequencies ====== | ====== Calculating Relative Frequencies ====== | ||
| < | < | ||
| - | > mean(Cars93$MPG.city > 14) # see the summary(Cars93$MPG.city) the above | + | > mean(Cars93$MPG.city > 14) # see the summary(Cars93$MPG.city) the above = 100%, min = 15이므로 |
| [1] 1 | [1] 1 | ||
| Line 180: | Line 181: | ||
| </ | </ | ||
| + | < | ||
| + | > cardata <- data.frame(Cars93$Origin, | ||
| + | > cardata | ||
| + | | ||
| + | 1 non-USA | ||
| + | 2 non-USA | ||
| + | 3 non-USA | ||
| + | 4 non-USA | ||
| + | 5 non-USA | ||
| + | 6 USA | ||
| + | 7 USA Large | ||
| + | 8 USA Large | ||
| + | 9 USA | ||
| + | 10 | ||
| + | 11 | ||
| + | 12 | ||
| + | 13 | ||
| + | 14 | ||
| + | 15 | ||
| + | 16 | ||
| + | 17 | ||
| + | 18 | ||
| + | 19 | ||
| + | 20 | ||
| + | 21 | ||
| + | 22 | ||
| + | 23 | ||
| + | 24 | ||
| + | 25 | ||
| + | 26 | ||
| + | 27 | ||
| + | 28 | ||
| + | 29 | ||
| + | 30 | ||
| + | 31 | ||
| + | 32 | ||
| + | 33 | ||
| + | 34 | ||
| + | 35 | ||
| + | 36 | ||
| + | 37 | ||
| + | 38 | ||
| + | 39 | ||
| + | 40 | ||
| + | 41 | ||
| + | 42 | ||
| + | 43 | ||
| + | 44 | ||
| + | 45 | ||
| + | 46 | ||
| + | 47 | ||
| + | 48 | ||
| + | 49 | ||
| + | 50 | ||
| + | 51 | ||
| + | 52 | ||
| + | 53 | ||
| + | 54 | ||
| + | 55 | ||
| + | 56 | ||
| + | 57 | ||
| + | 58 | ||
| + | 59 | ||
| + | 60 | ||
| + | 61 | ||
| + | 62 | ||
| + | 63 | ||
| + | 64 | ||
| + | 65 | ||
| + | 66 | ||
| + | 67 | ||
| + | 68 | ||
| + | 69 | ||
| + | 70 | ||
| + | 71 | ||
| + | 72 | ||
| + | 73 | ||
| + | 74 | ||
| + | 75 | ||
| + | 76 | ||
| + | 77 | ||
| + | 78 | ||
| + | 79 | ||
| + | 80 | ||
| + | 81 | ||
| + | 82 | ||
| + | 83 | ||
| + | 84 | ||
| + | 85 | ||
| + | 86 | ||
| + | 87 | ||
| + | 88 | ||
| + | 89 | ||
| + | 90 | ||
| + | 91 | ||
| + | 92 | ||
| + | 93 | ||
| + | > cartbl <- table(cardata) | ||
| + | > cartbl | ||
| + | | ||
| + | Cars93.Origin Compact Large Midsize Small Sporty Van | ||
| + | USA | ||
| + | non-USA | ||
| + | > summary(cartbl) | ||
| + | Number of cases in table: 93 | ||
| + | Number of factors: 2 | ||
| + | Test for independence of all factors: | ||
| + | Chisq = 14.08, df = 5, p-value = 0.01511 | ||
| + | Chi-squared approximation may be incorrect | ||
| + | > chisq.test(cartbl) | ||
| + | |||
| + | Pearson' | ||
| + | |||
| + | data: cartbl | ||
| + | X-squared = 14.08, df = 5, p-value = 0.01511 | ||
| + | |||
| + | Warning message: | ||
| + | In chisq.test(cartbl) : 카이제곱 approximation은 정확하지 않을수도 있습니다 | ||
| + | > | ||
| + | </ | ||
| ====== Calculating Quantiles (and Quartiles) of a Dataset ====== | ====== Calculating Quantiles (and Quartiles) of a Dataset ====== | ||
| Line 199: | Line 320: | ||
| < | < | ||
| + | > dur > mean(dur) | ||
| + | [1] TRUE FALSE FALSE FALSE TRUE FALSE TRUE TRUE FALSE TRUE FALSE TRUE TRUE FALSE | ||
| + | | ||
| + | | ||
| + | | ||
| + | | ||
| + | | ||
| + | | ||
| + | [99] FALSE TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE FALSE | ||
| + | [113] TRUE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE TRUE | ||
| + | [127] FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE | ||
| + | [141] TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE | ||
| + | [155] TRUE TRUE TRUE TRUE FALSE TRUE FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE | ||
| + | [169] FALSE TRUE FALSE FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE FALSE TRUE | ||
| + | [183] TRUE TRUE FALSE TRUE TRUE FALSE TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE | ||
| + | [197] TRUE TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE | ||
| + | [211] FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE | ||
| + | [225] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE FALSE TRUE FALSE FALSE TRUE | ||
| + | [239] TRUE FALSE TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE FALSE TRUE FALSE TRUE | ||
| + | [253] TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE FALSE | ||
| + | [267] TRUE TRUE FALSE TRUE FALSE TRUE | ||
| > mean(dur > mean(dur)) | > mean(dur > mean(dur)) | ||
| [1] 0.6176471 | [1] 0.6176471 | ||
| Line 283: | Line 425: | ||
| > round(mean(zdur)) | > round(mean(zdur)) | ||
| [1] 0 | [1] 0 | ||
| + | > round(sd(zdur)) | ||
| + | [1] 1 | ||
| + | |||
| </ | </ | ||
| Line 352: | Line 497: | ||
| ====== Forming a Confidence Interval for a Mean ====== | ====== Forming a Confidence Interval for a Mean ====== | ||
| - | < | + | < |
| + | > set.seed(1024) | ||
| + | > x <- rnorm(50, mean=100, sd=15) | ||
| + | > s <- sd(x) | ||
| > m <- mean(x) | > m <- mean(x) | ||
| > n <- length(x) | > n <- length(x) | ||
| Line 364: | Line 512: | ||
| > SE | > SE | ||
| [1] 2.458358 | [1] 2.458358 | ||
| - | > E <- qt(.975, df=n-1)*SE | + | ## qt fun: qt(prob, df) zscore 2점에 해당하는 점수는? |
| + | > qtv <- qt(.975, df=n-1) | ||
| + | > qtv | ||
| + | [1] | ||
| + | ## qtv는 2에 해당하는 95퍼센트 CL | ||
| + | ## 이 때의 CI는 | ||
| + | > E <- qtv*SE | ||
| > E | > E | ||
| [1] 4.940254 | [1] 4.940254 | ||
| Line 372: | Line 526: | ||
| </ | </ | ||
| - | < | + | < |
| + | > t.test(x, mu=98) | ||
| One Sample t-test | One Sample t-test | ||
| data: x | data: x | ||
| - | t = 39.052, df = 49, p-value | + | t = 0.37089, df = 49, p-value |
| - | alternative hypothesis: true mean is not equal to 0 | + | alternative hypothesis: true mean is not equal to 98 |
| 95 percent confidence interval: | 95 percent confidence interval: | ||
| - | | + | |
| sample estimates: | sample estimates: | ||
| mean of x | mean of x | ||
| - | 96.00386 | + | 98.83223 |
| + | |||
| + | > t.test(x, mu=100) | ||
| + | |||
| + | One Sample t-test | ||
| + | |||
| + | data: x | ||
| + | t = -0.52043, df = 49, p-value = 0.6051 | ||
| + | alternative hypothesis: true mean is not equal to 100 | ||
| + | 95 percent confidence interval: | ||
| + | 94.32303 103.34143 | ||
| + | sample estimates: | ||
| + | mean of x | ||
| + | | ||
| + | |||
| + | > t.test(x, mu=95) | ||
| + | |||
| + | One Sample t-test | ||
| + | |||
| + | data: x | ||
| + | t = 1.7079, df = 49, p-value = 0.09399 | ||
| + | alternative hypothesis: true mean is not equal to 95 | ||
| + | 95 percent confidence interval: | ||
| + | 94.32303 103.34143 | ||
| + | sample estimates: | ||
| + | mean of x | ||
| + | | ||
| + | |||
| + | > | ||
| </ | </ | ||
| Line 396: | Line 579: | ||
| W = 0.97415, p-value = 0.3386 | W = 0.97415, p-value = 0.3386 | ||
| </ | </ | ||
| + | The large p-value suggests the underlying population could be normally distributed. The next example reports a small p-value for y, so it is unlikely that this sample came from a normal population: | ||
| + | |||
| + | normal distribution assumed -> var.equal=T | ||
| + | normal distribution not assumed -> var.equal=F | ||
| + | |||
| ====== Comparing the Means of Two Samples ====== | ====== Comparing the Means of Two Samples ====== | ||
| Line 448: | Line 636: | ||
| mpg.auto = mtcars[L, | mpg.auto = mtcars[L, | ||
| mpg.auto | mpg.auto | ||
| - | [1] 21.4 18.7 18.1 14.3 24.4 ... | + | [1] 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 10.4 10.4 14.7 21.5 15.5 15.2 |
| + | [18] 13.3 19.2 | ||
| mpg.manual = mtcars[!L, | mpg.manual = mtcars[!L, | ||
| mpg.manual | mpg.manual | ||
| - | [1] 21.0 21.0 22.8 32.4 30.4 ... | + | [1] 21.0 21.0 22.8 32.4 30.4 33.9 27.3 26.0 30.4 15.8 19.7 15.0 21.4 |
| t.test(mpg.auto, | t.test(mpg.auto, | ||
| Line 466: | Line 655: | ||
| mean of x mean of y | mean of x mean of y | ||
| | | ||
| + | </ | ||
| + | OR | ||
| + | |||
| + | < | ||
| + | |||
| + | Welch Two Sample t-test | ||
| + | |||
| + | data: mtcars$mpg by mtcars$am | ||
| + | t = -3.7671, df = 18.332, p-value = 0.001374 | ||
| + | alternative hypothesis: true difference in means is not equal to 0 | ||
| + | 95 percent confidence interval: | ||
| + | | ||
| + | sample estimates: | ||
| + | mean in group 0 mean in group 1 | ||
| + | | ||
| </ | </ | ||
| + | |||
| Another eg. | Another eg. | ||
| - | < | + | <code>> a = c(175, 168, 168, 190, 156, 181, 182, 175, 174, 179) |
| - | b = c(185, 169, 173, 173, 188, 186, 175, 174, 179, 180) | + | > b = c(185, 169, 173, 173, 188, 186, 175, 174, 179, 180) |
| </ | </ | ||
| - | < | + | <code>> t.test(a,b, var.equal=TRUE, |
| Two Sample t-test | Two Sample t-test | ||
| Line 489: | Line 694: | ||
| - | qt(0.975, 18) | + | > qt(0.975, 18) |
| [1] 2.100922 | [1] 2.100922 | ||
| </ | </ | ||
| - | < | + | <code>> var.test(a, |
| F test to compare two variances | F test to compare two variances | ||
| Line 507: | Line 712: | ||
| - | qf(0.95, 9, 9) | + | > qf(0.95, 9, 9) |
| [1] 3.178893 | [1] 3.178893 | ||
| Line 518: | Line 723: | ||
| ===== e.g., ===== | ===== e.g., ===== | ||
| < | < | ||
| - | #> extra group ID | + | > extra group ID |
| - | #> 1 0.7 | + | > 1 0.7 |
| - | #> 2 | + | > 2 |
| - | #> 3 | + | > 3 |
| - | #> 4 | + | > 4 |
| - | #> 5 | + | > 5 |
| - | #> 6 3.4 | + | > 6 3.4 |
| - | #> 7 3.7 | + | > 7 3.7 |
| - | #> 8 0.8 | + | > 8 0.8 |
| - | #> 9 0.0 | + | > 9 0.0 |
| - | #> 10 | + | > 10 |
| - | #> 11 | + | > 11 |
| - | #> 12 | + | > 12 |
| - | #> 13 | + | > 13 |
| - | #> 14 | + | > 14 |
| - | #> 15 -0.1 | + | > 15 -0.1 |
| - | #> 16 | + | > 16 |
| - | #> 17 | + | > 17 |
| - | #> 18 | + | > 18 |
| - | #> 19 | + | > 19 |
| - | #> 20 | + | > 20 |
| </ | </ | ||
| - | < | + | <code>> sleep_wide <- data.frame( |
| ID=1:10, | ID=1:10, | ||
| group1=sleep$extra[1: | group1=sleep$extra[1: | ||
| Line 546: | Line 751: | ||
| ) | ) | ||
| sleep_wide | sleep_wide | ||
| - | #> ID group1 group2 | + | > ID group1 group2 |
| - | #> 1 | + | > 1 |
| - | #> 2 | + | > 2 |
| - | #> 3 | + | > 3 |
| - | #> 4 | + | > 4 |
| - | #> 5 | + | > 5 |
| - | #> 6 | + | > 6 |
| - | #> 7 | + | > 7 |
| - | #> 8 | + | > 8 |
| - | #> 9 | + | > 9 |
| - | #> 10 10 2.0 3.4 | + | > 10 10 2.0 3.4 |
| </ | </ | ||
| Ignore the ID variable for a convenience. | Ignore the ID variable for a convenience. | ||
| Line 563: | Line 768: | ||
| # Welch t-test | # Welch t-test | ||
| t.test(extra ~ group, sleep) | t.test(extra ~ group, sleep) | ||
| - | + | > | |
| - | #> | + | > Welch Two Sample t-test |
| - | #> Welch Two Sample t-test | + | > |
| - | #> | + | > data: extra by group |
| - | #> data: extra by group | + | > t = -1.8608, df = 17.776, p-value = 0.07939 |
| - | #> t = -1.8608, df = 17.776, p-value = 0.07939 | + | > alternative hypothesis: true difference in means is not equal to 0 |
| - | #> alternative hypothesis: true difference in means is not equal to 0 | + | > 95 percent confidence interval: |
| - | #> 95 percent confidence interval: | + | > -3.3654832 |
| - | #> -3.3654832 | + | > sample estimates: |
| - | #> sample estimates: | + | > mean in group 1 mean in group 2 |
| - | #> mean in group 1 mean in group 2 | + | > 0.75 2.33 |
| - | #> 0.75 2.33 | + | |
| # Same for wide data (two separate vectors) | # Same for wide data (two separate vectors) | ||
| - | # t.test(sleep_wide$group1, | + | > t.test(sleep_wide$group1, |
| </ | </ | ||
| Line 584: | Line 788: | ||
| < | < | ||
| # Student t-test | # Student t-test | ||
| - | t.test(extra ~ group, sleep, var.equal=TRUE) | + | > t.test(extra ~ group, sleep, var.equal=TRUE) |
| - | #> | + | > |
| - | #> Two Sample t-test | + | > Two Sample t-test |
| - | #> | + | > |
| - | #> data: extra by group | + | > data: extra by group |
| - | #> t = -1.8608, df = 18, p-value = 0.07919 | + | > t = -1.8608, df = 18, p-value = 0.07919 |
| - | #> alternative hypothesis: true difference in means is not equal to 0 | + | > alternative hypothesis: true difference in means is not equal to 0 |
| - | #> 95 percent confidence interval: | + | > 95 percent confidence interval: |
| - | #> -3.363874 | + | > -3.363874 |
| - | #> sample estimates: | + | > sample estimates: |
| - | #> mean in group 1 mean in group 2 | + | > mean in group 1 mean in group 2 |
| - | #> 0.75 2.33 | + | > |
| </ | </ | ||
| - | < | + | < |
| - | # t.test(sleep_wide$group1, | + | > t.test(sleep_wide$group1, |
| </ | </ | ||
| Line 608: | Line 812: | ||
| < | < | ||
| # Sort by group then ID | # Sort by group then ID | ||
| - | sleep <- sleep[order(sleep$group, | + | > sleep <- sleep[order(sleep$group, |
| # Paired t-test | # Paired t-test | ||
| - | t.test(extra ~ group, sleep, paired=TRUE) | + | > t.test(extra ~ group, sleep, paired=TRUE) |
| - | #> | + | |
| - | #> Paired t-test | + | |
| - | #> | + | |
| - | #> data: extra by group | + | |
| - | #> t = -4.0621, df = 9, p-value = 0.002833 | + | t = -4.0621, df = 9, p-value = 0.002833 |
| - | #> alternative hypothesis: true difference in means is not equal to 0 | + | |
| - | #> 95 percent confidence interval: | + | 95 percent confidence interval: |
| - | #> | + | -2.4598858 -0.7001142 |
| - | #> sample estimates: | + | |
| - | #> mean of the differences | + | mean of the differences |
| - | #> -1.58 | + | -1.58 |
| </ | </ | ||
| < | < | ||
| - | # t.test(sleep.wide$group1, | + | > t.test(sleep.wide$group1, |
| + | |||
| + | Paired t-test | ||
| + | |||
| + | data: sleep_wide$group1 and sleep_wide$group2 | ||
| + | t = -4.0621, df = 9, p-value = 0.002833 | ||
| + | alternative hypothesis: true difference in means is not equal to 0 | ||
| + | 95 percent confidence interval: | ||
| + | | ||
| + | sample estimates: | ||
| + | mean of the differences | ||
| + | -1.58 | ||
| </ | </ | ||
| The paired t-test is equivalent to testing whether difference between each pair of observations has a population mean of 0. (See below for comparing a single group to a population mean.) | The paired t-test is equivalent to testing whether difference between each pair of observations has a population mean of 0. (See below for comparing a single group to a population mean.) | ||
| - | < | + | <code>> t.test(sleep_wide$group1 - sleep_wide$group2, mu=0, var.equal=TRUE) |
| - | #> Error in t.test(sleep.wide$group1 - sleep.wide$group2, | + | |
| + | One Sample | ||
| + | |||
| + | data: sleep_wide$group1 - sleep_wide$group2 | ||
| + | t = -4.0621, df = 9, p-value = 0.002833 | ||
| + | alternative hypothesis: true mean is not equal to 0 | ||
| + | 95 percent confidence interval: | ||
| + | -2.4598858 -0.7001142 | ||
| + | sample estimates: | ||
| + | mean of x | ||
| + | -1.58 | ||
| </ | </ | ||
| Line 641: | Line 868: | ||
| < | < | ||
| t.test(sleep$extra, | t.test(sleep$extra, | ||
| - | #> | + | > |
| - | #> One Sample t-test | + | > One Sample t-test |
| - | #> | + | > |
| - | #> data: sleep$extra | + | > data: sleep$extra |
| - | #> t = 3.413, df = 19, p-value = 0.002918 | + | > t = 3.413, df = 19, p-value = 0.002918 |
| - | #> alternative hypothesis: true mean is not equal to 0 | + | > alternative hypothesis: true mean is not equal to 0 |
| - | #> 95 percent confidence interval: | + | > 95 percent confidence interval: |
| - | #> 0.5955845 2.4844155 | + | > 0.5955845 2.4844155 |
| - | #> sample estimates: | + | > sample estimates: |
| - | #> mean of x | + | > mean of x |
| - | #> 1.54 | + | > 1.54 |
| </ | </ | ||
r/general_statistics.1478050316.txt.gz · Last modified: by hkimscil
