> > #Author DataFlair > df <- read.csv("https://goo.gl/j6lRXD") #Reading CSV > head(df) id treatment improvement 1 1 treated improved 2 2 treated improved 3 3 not-treated improved 4 4 treated improved 5 5 treated not-improved 6 6 treated not-improved > tail(df) id treatment improvement 100 100 not-treated improved 101 101 treated improved 102 102 treated improved 103 103 not-treated not-improved 104 104 treated improved 105 105 not-treated not-improved > str(df) 'data.frame': 105 obs. of 3 variables: $ id : int 1 2 3 4 5 6 7 8 9 10 ... $ treatment : chr "treated" "treated" "not-treated" "treated" ... $ improvement: chr "improved" "improved" "improved" "improved" ... > df$treatment <- factor(df$treatment) > df$improvement <- factor(df$improvement) > str(df) 'data.frame': 105 obs. of 3 variables: $ id : int 1 2 3 4 5 6 7 8 9 10 ... $ treatment : Factor w/ 2 levels "not-treated",..: 2 2 1 2 2 2 1 2 1 2 ... $ improvement: Factor w/ 2 levels "improved","not-improved": 1 1 1 1 2 2 2 2 1 1 ... > df.tbl<-table(df$treatment, df$improvement) > df.tbl improved not-improved not-treated 26 29 treated 35 15 > > chisq <- chisq.test(df.tbl, correct=F) > chisq Pearson's Chi-squared test data: df.tbl X-squared = 5.5569, df = 1, p-value = 0.01841 > > # or the below works also > chisq <- chisq.test(df$treatment, df$improvement, correct=F) > chisq Pearson's Chi-squared test data: df$treatment and df$improvement X-squared = 5.5569, df = 1, p-value = 0.01841 > > o <- chisq$observed > e <- chisq$expected > o df$improvement df$treatment improved not-improved not-treated 26 29 treated 35 15 > round(e,2) df$improvement df$treatment improved not-improved not-treated 31.95 23.05 treated 29.05 20.95 > r <- (e-o)/sqrt(e) # this is called residuals > round(r, 3) df$improvement df$treatment improved not-improved not-treated 1.053 -1.240 treated -1.104 1.300 > round(chisq$residuals, 3) df$improvement df$treatment improved not-improved not-treated -1.053 1.240 treated 1.104 -1.300 > > cs.tmp <- sum(chisq$residuals^2) > cs.tmp [1] 5.55692 > cs.cal <- sum((e-o)^2/e) > pchisq(cs.cal, 1,lower.tail = F) [1] 0.01840777 > cs.cal [1] 5.55692 > chisq Pearson's Chi-squared test data: df$treatment and df$improvement X-squared = 5.5569, df = 1, p-value = 0.01841 > > library(corrplot) > corrplot(chisq$residuals, is.cor = FALSE) > > contrib <- 100*chisq$residuals^2/chisq$statistic > round(contrib, 3) df$improvement df$treatment improved not-improved not-treated 19.955 27.664 treated 21.950 30.431 > > # Visualize the contribution > corrplot(contrib, is.cor = FALSE) residual value $(\text{sqrt}((e-o)^2/e))$ of each cell {{.:pasted:20251202-225427.png}} contribution of each cell {{.:pasted:20251202-225435.png}}