b:head_first_statistics:visualization
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
b:head_first_statistics:visualization [2024/09/04 08:36] – [Scatter plot] hkimscil | b:head_first_statistics:visualization [2025/09/08 08:22] (current) – [Histogram Modality] hkimscil | ||
---|---|---|---|
Line 91: | Line 91: | ||
</ | </ | ||
{{: | {{: | ||
+ | |||
+ | < | ||
+ | dat.iq <- rnorm(1000, 100, 15) | ||
+ | head(dat.iq) | ||
+ | tail(dat.iq) | ||
+ | head(dat.iq, | ||
+ | tail(dat.iq, | ||
+ | |||
+ | mean(dat.iq) | ||
+ | sd(dat.iq) | ||
+ | |||
+ | hist(dat.iq) | ||
+ | hist(dat.iq, | ||
+ | |||
+ | set.seed(101) | ||
+ | dat.iq <- rnorm(1000, 100, 15) | ||
+ | head(dat.iq) | ||
+ | tail(dat.iq) | ||
+ | head(dat.iq, | ||
+ | tail(dat.iq, | ||
+ | |||
+ | mean(dat.iq) | ||
+ | sd(dat.iq) | ||
+ | |||
+ | hist(dat.iq) | ||
+ | hist(dat.iq, | ||
+ | </ | ||
====== Scatter plot ====== | ====== Scatter plot ====== | ||
< | < | ||
Line 152: | Line 179: | ||
{{: | {{: | ||
- | A bit more fancy line | + | Outlier에 대한 주의 |
- | < | + | [{{:pearson-6.png? |}}] |
- | # by Number of Car Cylinders | + | |
- | library(car) | + | |
- | scatterplot(mpg ~ wt | cyl, data=mtcars, | + | |
- | | + | |
- | | + | |
- | | + | |
- | {{:c:ps1-1: | + | |
<WRAP clear /> | <WRAP clear /> | ||
- | see | + | |
+ | ====== Presentation ====== | ||
+ | For a very good example, | ||
https:// | https:// | ||
* Life expectancy data: {{: | * Life expectancy data: {{: | ||
+ | <WRAP clear/> | ||
+ | ====== Histogram skewedness ====== | ||
+ | <WRAP column half> | ||
< | < | ||
- | le <- as.data.frame(read.csv("http:// | + | #### |
- | colnames(le)[1] <- "c.code" | + | # left-skewed distribution |
- | lea <- le$X2017 | + | # 1. |
- | leb <- lea[complete.cases(lea)] | + | set.seed(1) |
- | hist(leb, color="grey") | + | data <- rbeta(500, shape1 = 10, shape2 = 2) |
+ | hist(data, probability = TRUE, | ||
+ | main = "Histogram with Left-skewed data", | ||
+ | | ||
+ | col = " | ||
+ | |||
+ | # 2. | ||
+ | # install.packages(" | ||
+ | library(fitdistrplus) | ||
+ | |||
+ | fit <- fitdist(data, | ||
+ | alpha_est | ||
+ | beta_est | ||
+ | |||
+ | # 3. | ||
+ | curve(dbeta(x, shape1 = alpha_est, shape2 = beta_est), | ||
+ | add = TRUE, col = "red", lwd = 2) | ||
</ | </ | ||
+ | </ | ||
- | [{{:c:ps1-1:2019:pasted:20190909-110252.png|Life expectancy in 2017}}] | + | <WRAP column half> |
- | <WRAP clear/> | + | {{:b:head_first_statistics:pasted:20250903-074821.png}} |
- | [{{:c:ps1-1:2019:pasted:20190909-104759.png|Distribution of temperature}}] | + | </ |
- | <WRAP clear/> | + | <WRAP clear/> |
- | [{{:c:ps1-1:2019:pasted:20190909-111117.png|skewness}}] | + | <WRAP column half> |
- | <WRAP clear/> | + | < |
- | [{{:c:ps1-1:2019:pasted:20190909-111001.png|modality}}] | + | set.seed(1) |
- | <WRAP clear/>. | + | data <- rbeta(500, shape1 = 10, shape2 = 10) |
- | box plot | + | hist(data, probability = TRUE, |
+ | main = " | ||
+ | xlab = " | ||
+ | col = " | ||
+ | |||
+ | # 2. | ||
+ | # install.packages(" | ||
+ | library(fitdistrplus) | ||
+ | |||
+ | fit <- fitdist(data, | ||
+ | alpha_est <- fit$estimate[" | ||
+ | beta_est <- fit$estimate[" | ||
+ | |||
+ | # 3. | ||
+ | curve(dbeta(x, | ||
+ | add = TRUE, col = " | ||
+ | </ | ||
+ | </ | ||
+ | |||
+ | <WRAP column half> | ||
+ | {{:b:head_first_statistics: | ||
+ | </ | ||
+ | |||
+ | <WRAP clear/> | ||
+ | <WRAP column half> | ||
+ | < | ||
+ | ## | ||
+ | # right-skewed distribution | ||
+ | # 1. | ||
+ | set.seed(1) | ||
+ | data <- rbeta(500, shape1 = 2, shape2 = 10) | ||
+ | hist(data, probability = TRUE, | ||
+ | main = " | ||
+ | xlab = " | ||
+ | col = " | ||
+ | |||
+ | # install.packages(" | ||
+ | library(fitdistrplus) | ||
+ | |||
+ | fit <- fitdist(data, | ||
+ | alpha_est <- fit$estimate[" | ||
+ | beta_est <- fit$estimate[" | ||
+ | |||
+ | # | ||
+ | curve(dbeta(x, | ||
+ | add = TRUE, col = " | ||
+ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | {{:b:head_first_statistics:pasted:20250903-082513.png}} | ||
+ | </ | ||
+ | <WRAP clear/> | ||
+ | |||
+ | ====== Histogram Modality====== | ||
+ | <WRAP column half> | ||
+ | Unimodal | ||
+ | < | ||
+ | ### unimodal data | ||
+ | set.seed(1) | ||
+ | d.1 <- rnorm(500, 10, 2) | ||
+ | hist(d.1, breaks = 30, probability = T, | ||
+ | main = "Hist with Unimodal distrib", | ||
+ | xlab = " | ||
+ | col = " | ||
+ | lines(density(d.1), | ||
+ | col = " | ||
+ | </ | ||
+ | </ | ||
+ | |||
+ | <WRAP column half> | ||
+ | {{:b:head_first_statistics: | ||
+ | </ | ||
+ | |||
+ | <WRAP clear/> | ||
+ | |||
+ | Bimodal distribution | ||
+ | <WRAP column half> | ||
+ | < | ||
+ | ### bimodal data | ||
+ | set.seed(1) | ||
+ | d.1 <- rnorm(500, 10, 2) | ||
+ | d.2 <- rnorm(500, 20, 2) | ||
+ | d.all <- c(d.1, d.2) | ||
+ | hist(d.all, breaks = 30, probability = T, | ||
+ | main = "Hist with bimodal distrib", | ||
+ | xlab = " | ||
+ | col = " | ||
+ | lines(density(d.all), | ||
+ | col = " | ||
+ | </ | ||
+ | </ | ||
+ | |||
+ | <WRAP column half> | ||
+ | {{:b:head_first_statistics:pasted:20250903-083524.png}} | ||
+ | </ | ||
+ | <WRAP clear/> | ||
+ | |||
+ | <WRAP column half> | ||
+ | < | ||
+ | ### multi-modal data | ||
+ | # Parameters for the first normal distribution (Mode 1) | ||
+ | m.1 <- 50 | ||
+ | sd.1 <- 5 | ||
+ | |||
+ | # Parameters for the second normal distribution (Mode 2) | ||
+ | m.2 <- 100 | ||
+ | sd.2 <- 15 | ||
+ | |||
+ | m.3 <- 160 | ||
+ | sd.3 <- 6 | ||
+ | |||
+ | # Mixing proportion for Mode 1 | ||
+ | prop.1 <- 0.3 | ||
+ | # Mixing proportion for Mode 2 | ||
+ | prop.2 <- 0.6 # This is 1 - prop1 | ||
+ | # Mixing proportion for Mode 2 | ||
+ | prop.3 <- 1.0 # This is 1 - prop1 | ||
+ | |||
+ | # Number of samples to generate | ||
+ | n.sam <- 1000 | ||
+ | |||
+ | # Create an empty vector to store the combined samples | ||
+ | |||
+ | mm.dist <- numeric(n.sam) | ||
+ | set.seed(1) | ||
+ | for (i in 1:n.sam) { | ||
+ | # Randomly choose which distribution to sample from | ||
+ | tmp <- runif(1) | ||
+ | if (tmp < prop.1) { | ||
+ | mm.dist[i] <- rnorm(1, mean = m.1, sd = sd.1) | ||
+ | } else if (tmp < prop.2) | ||
+ | mm.dist[i] <- rnorm(1, mean = m.2, sd = sd.2) | ||
+ | } else { | ||
+ | mm.dist[i] <- rnorm(1, mean = m.3, sd = sd.3) | ||
+ | } | ||
+ | |||
+ | } | ||
+ | |||
+ | hist(mm.dist, | ||
+ | main = " | ||
+ | xlab = " | ||
+ | freq = FALSE, probability = T, | ||
+ | col = " | ||
+ | lines(density(mm.dist), | ||
+ | col = " | ||
+ | |||
+ | </ | ||
+ | </ | ||
+ | <WRAP column half> | ||
+ | {{:b:head_first_statistics:pasted:20250908-082219.png}} | ||
+ | </ | ||
+ | <WRAP clear/> | ||
+ | |||
+ | |||
+ | ====== | ||
+ | <WRAP column half> | ||
< | < | ||
# Boxplot of MPG by Car Cylinders | # Boxplot of MPG by Car Cylinders | ||
Line 192: | Line 388: | ||
ylab=" | ylab=" | ||
</ | </ | ||
- | {{: | + | </ |
+ | <WRAP column half> | ||
+ | {{: | ||
+ | </ | ||
+ | <WRAP clear/> | ||
+ | ====== see also ====== | ||
+ | https:// | ||
b/head_first_statistics/visualization.1725406582.txt.gz · Last modified: 2024/09/04 08:36 by hkimscil