b:head_first_statistics:visualization
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| b:head_first_statistics:visualization [2025/09/03 07:48] – [Histogram skewedness] hkimscil | b:head_first_statistics:visualization [2025/09/08 08:22] (current) – [Histogram Modality] hkimscil | ||
|---|---|---|---|
| Line 91: | Line 91: | ||
| </ | </ | ||
| {{: | {{: | ||
| + | |||
| + | < | ||
| + | dat.iq <- rnorm(1000, 100, 15) | ||
| + | head(dat.iq) | ||
| + | tail(dat.iq) | ||
| + | head(dat.iq, | ||
| + | tail(dat.iq, | ||
| + | |||
| + | mean(dat.iq) | ||
| + | sd(dat.iq) | ||
| + | |||
| + | hist(dat.iq) | ||
| + | hist(dat.iq, | ||
| + | |||
| + | set.seed(101) | ||
| + | dat.iq <- rnorm(1000, 100, 15) | ||
| + | head(dat.iq) | ||
| + | tail(dat.iq) | ||
| + | head(dat.iq, | ||
| + | tail(dat.iq, | ||
| + | |||
| + | mean(dat.iq) | ||
| + | sd(dat.iq) | ||
| + | |||
| + | hist(dat.iq) | ||
| + | hist(dat.iq, | ||
| + | </ | ||
| ====== Scatter plot ====== | ====== Scatter plot ====== | ||
| < | < | ||
| Line 164: | Line 191: | ||
| <WRAP clear/> | <WRAP clear/> | ||
| ====== Histogram skewedness ====== | ====== Histogram skewedness ====== | ||
| - | + | < | |
| - | + | ||
| - | [{{: | + | |
| - | < | + | |
| - | [{{: | + | |
| - | <WRAP clear/> | + | |
| < | < | ||
| + | #### | ||
| + | # left-skewed distribution | ||
| + | # 1. | ||
| set.seed(1) | set.seed(1) | ||
| data <- rbeta(500, shape1 = 10, shape2 = 2) | data <- rbeta(500, shape1 = 10, shape2 = 2) | ||
| - | hist(data, probability = TRUE, main = " | + | hist(data, probability = TRUE, |
| - | xlab = " | + | main = " |
| + | xlab = " | ||
| + | col = " | ||
| + | # 2. | ||
| # install.packages(" | # install.packages(" | ||
| library(fitdistrplus) | library(fitdistrplus) | ||
| Line 184: | Line 211: | ||
| beta_est <- fit$estimate[" | beta_est <- fit$estimate[" | ||
| - | # | + | # 3. |
| curve(dbeta(x, | curve(dbeta(x, | ||
| add = TRUE, col = " | add = TRUE, col = " | ||
| </ | </ | ||
| + | </ | ||
| + | <WRAP column half> | ||
| + | {{: | ||
| + | </ | ||
| + | <WRAP clear/> | ||
| + | <WRAP column half> | ||
| < | < | ||
| - | </ | ||
| set.seed(1) | set.seed(1) | ||
| data <- rbeta(500, shape1 = 10, shape2 = 10) | data <- rbeta(500, shape1 = 10, shape2 = 10) | ||
| - | hist(data, probability = TRUE, main = " | + | hist(data, probability = TRUE, |
| - | xlab = " | + | main = " |
| + | xlab = " | ||
| + | col = " | ||
| + | # 2. | ||
| # install.packages(" | # install.packages(" | ||
| library(fitdistrplus) | library(fitdistrplus) | ||
| Line 203: | Line 238: | ||
| beta_est <- fit$estimate[" | beta_est <- fit$estimate[" | ||
| - | # | + | # 3. |
| curve(dbeta(x, | curve(dbeta(x, | ||
| add = TRUE, col = " | add = TRUE, col = " | ||
| + | </ | ||
| + | </ | ||
| + | <WRAP column half> | ||
| + | {{: | ||
| + | </ | ||
| + | <WRAP clear/> | ||
| + | <WRAP column half> | ||
| < | < | ||
| + | ## | ||
| + | # right-skewed distribution | ||
| + | # 1. | ||
| set.seed(1) | set.seed(1) | ||
| data <- rbeta(500, shape1 = 2, shape2 = 10) | data <- rbeta(500, shape1 = 2, shape2 = 10) | ||
| - | hist(data, probability = TRUE, main = " | + | hist(data, probability = TRUE, |
| - | xlab = " | + | main = " |
| + | xlab = " | ||
| + | col = " | ||
| # install.packages(" | # install.packages(" | ||
| Line 221: | Line 268: | ||
| beta_est <- fit$estimate[" | beta_est <- fit$estimate[" | ||
| - | # 3. Add the Beta density curve | + | # |
| curve(dbeta(x, | curve(dbeta(x, | ||
| add = TRUE, col = " | add = TRUE, col = " | ||
| </ | </ | ||
| - | {{: | + | </ |
| - | {{: | + | <WRAP column half> |
| - | {{: | + | {{: |
| + | </ | ||
| + | <WRAP clear/> | ||
| - | ====== | + | ====== |
| + | <WRAP column half> | ||
| + | Unimodal | ||
| + | < | ||
| + | ### unimodal data | ||
| + | set.seed(1) | ||
| + | d.1 <- rnorm(500, 10, 2) | ||
| + | hist(d.1, breaks = 30, probability = T, | ||
| + | main = "Hist with Unimodal distrib", | ||
| + | xlab = " | ||
| + | col = " | ||
| + | lines(density(d.1), | ||
| + | col = " | ||
| + | </ | ||
| + | </ | ||
| + | |||
| + | <WRAP column half> | ||
| + | {{: | ||
| + | </ | ||
| + | |||
| + | <WRAP clear/> | ||
| + | Bimodal distribution | ||
| + | <WRAP column half> | ||
| + | < | ||
| + | ### bimodal data | ||
| + | set.seed(1) | ||
| + | d.1 <- rnorm(500, 10, 2) | ||
| + | d.2 <- rnorm(500, 20, 2) | ||
| + | d.all <- c(d.1, d.2) | ||
| + | hist(d.all, breaks = 30, probability = T, | ||
| + | main = "Hist with bimodal distrib", | ||
| + | xlab = " | ||
| + | col = " | ||
| + | lines(density(d.all), | ||
| + | col = " | ||
| + | </ | ||
| + | </ | ||
| + | |||
| + | <WRAP column half> | ||
| + | {{: | ||
| + | </ | ||
| + | <WRAP clear/> | ||
| + | |||
| + | <WRAP column half> | ||
| + | < | ||
| + | ### multi-modal data | ||
| + | # Parameters for the first normal distribution (Mode 1) | ||
| + | m.1 <- 50 | ||
| + | sd.1 <- 5 | ||
| + | |||
| + | # Parameters for the second normal distribution (Mode 2) | ||
| + | m.2 <- 100 | ||
| + | sd.2 <- 15 | ||
| + | |||
| + | m.3 <- 160 | ||
| + | sd.3 <- 6 | ||
| + | |||
| + | # Mixing proportion for Mode 1 | ||
| + | prop.1 <- 0.3 | ||
| + | # Mixing proportion for Mode 2 | ||
| + | prop.2 <- 0.6 # This is 1 - prop1 | ||
| + | # Mixing proportion for Mode 2 | ||
| + | prop.3 <- 1.0 # This is 1 - prop1 | ||
| + | |||
| + | # Number of samples to generate | ||
| + | n.sam <- 1000 | ||
| + | |||
| + | # Create an empty vector to store the combined samples | ||
| + | |||
| + | mm.dist <- numeric(n.sam) | ||
| + | set.seed(1) | ||
| + | for (i in 1:n.sam) { | ||
| + | # Randomly choose which distribution to sample from | ||
| + | tmp <- runif(1) | ||
| + | if (tmp < prop.1) { | ||
| + | mm.dist[i] <- rnorm(1, mean = m.1, sd = sd.1) | ||
| + | } else if (tmp < prop.2) { | ||
| + | mm.dist[i] <- rnorm(1, mean = m.2, sd = sd.2) | ||
| + | } else { | ||
| + | mm.dist[i] <- rnorm(1, mean = m.3, sd = sd.3) | ||
| + | } | ||
| + | |||
| + | } | ||
| + | |||
| + | hist(mm.dist, | ||
| + | main = " | ||
| + | xlab = " | ||
| + | freq = FALSE, probability = T, | ||
| + | col = " | ||
| + | lines(density(mm.dist), | ||
| + | col = " | ||
| + | |||
| + | </ | ||
| + | </ | ||
| + | <WRAP column half> | ||
| + | {{: | ||
| + | </ | ||
| + | <WRAP clear/> | ||
| + | |||
| + | |||
| + | ====== box plot ====== | ||
| + | <WRAP column half> | ||
| < | < | ||
| # Boxplot of MPG by Car Cylinders | # Boxplot of MPG by Car Cylinders | ||
| Line 238: | Line 388: | ||
| ylab=" | ylab=" | ||
| </ | </ | ||
| - | {{: | + | </ |
| + | <WRAP column half> | ||
| + | {{: | ||
| + | </ | ||
| + | <WRAP clear/> | ||
| + | ====== see also ====== | ||
| + | https:// | ||
b/head_first_statistics/visualization.1756853319.txt.gz · Last modified: by hkimscil
