r:chi-square_test:output03
>
> #Author DataFlair
> df <- read.csv("https://goo.gl/j6lRXD") #Reading CSV
> head(df)
id treatment improvement
1 1 treated improved
2 2 treated improved
3 3 not-treated improved
4 4 treated improved
5 5 treated not-improved
6 6 treated not-improved
> tail(df)
id treatment improvement
100 100 not-treated improved
101 101 treated improved
102 102 treated improved
103 103 not-treated not-improved
104 104 treated improved
105 105 not-treated not-improved
> str(df)
'data.frame': 105 obs. of 3 variables:
$ id : int 1 2 3 4 5 6 7 8 9 10 ...
$ treatment : chr "treated" "treated" "not-treated" "treated" ...
$ improvement: chr "improved" "improved" "improved" "improved" ...
> df$treatment <- factor(df$treatment)
> df$improvement <- factor(df$improvement)
> str(df)
'data.frame': 105 obs. of 3 variables:
$ id : int 1 2 3 4 5 6 7 8 9 10 ...
$ treatment : Factor w/ 2 levels "not-treated",..: 2 2 1 2 2 2 1 2 1 2 ...
$ improvement: Factor w/ 2 levels "improved","not-improved": 1 1 1 1 2 2 2 2 1 1 ...
> df.tbl<-table(df$treatment, df$improvement)
> df.tbl
improved not-improved
not-treated 26 29
treated 35 15
>
> chisq <- chisq.test(df.tbl, correct=F)
> chisq
Pearson's Chi-squared test
data: df.tbl
X-squared = 5.5569, df = 1, p-value = 0.01841
>
> # or the below works also
> chisq <- chisq.test(df$treatment, df$improvement, correct=F)
> chisq
Pearson's Chi-squared test
data: df$treatment and df$improvement
X-squared = 5.5569, df = 1, p-value = 0.01841
>
> o <- chisq$observed
> e <- chisq$expected
> o
df$improvement
df$treatment improved not-improved
not-treated 26 29
treated 35 15
> round(e,2)
df$improvement
df$treatment improved not-improved
not-treated 31.95 23.05
treated 29.05 20.95
> r <- (e-o)/sqrt(e) # this is called residuals
> round(r, 3)
df$improvement
df$treatment improved not-improved
not-treated 1.053 -1.240
treated -1.104 1.300
> round(chisq$residuals, 3)
df$improvement
df$treatment improved not-improved
not-treated -1.053 1.240
treated 1.104 -1.300
>
> cs.tmp <- sum(chisq$residuals^2)
> cs.tmp
[1] 5.55692
> cs.cal <- sum((e-o)^2/e)
> pchisq(cs.cal, 1,lower.tail = F)
[1] 0.01840777
> cs.cal
[1] 5.55692
> chisq
Pearson's Chi-squared test
data: df$treatment and df$improvement
X-squared = 5.5569, df = 1, p-value = 0.01841
>
> library(corrplot)
> corrplot(chisq$residuals, is.cor = FALSE)
>
> contrib <- 100*chisq$residuals^2/chisq$statistic
> round(contrib, 3)
df$improvement
df$treatment improved not-improved
not-treated 19.955 27.664
treated 21.950 30.431
>
> # Visualize the contribution
> corrplot(contrib, is.cor = FALSE)
r/chi-square_test/output03.txt · Last modified: by hkimscil


