Table of Contents

chi square test eg. 1

  • code01
  • output01
Loading...
> # install.packages("MASS")
> library(MASS)     
> 
> print(str(survey))
'data.frame':	237 obs. of  12 variables:
 $ Sex   : Factor w/ 2 levels "Female","Male": 1 2 2 2 2 1 2 1 2 2 ...
 $ Wr.Hnd: num  18.5 19.5 18 18.8 20 18 17.7 17 20 18.5 ...
 $ NW.Hnd: num  18 20.5 13.3 18.9 20 17.7 17.7 17.3 19.5 18.5 ...
 $ W.Hnd : Factor w/ 2 levels "Left","Right": 2 1 2 2 2 2 2 2 2 2 ...
 $ Fold  : Factor w/ 3 levels "L on R","Neither",..: 3 3 1 3 2 1 1 3 3 3 ...
 $ Pulse : int  92 104 87 NA 35 64 83 74 72 90 ...
 $ Clap  : Factor w/ 3 levels "Left","Neither",..: 1 1 2 2 3 3 3 3 3 3 ...
 $ Exer  : Factor w/ 3 levels "Freq","None",..: 3 2 2 2 3 3 1 1 3 3 ...
 $ Smoke : Factor w/ 4 levels "Heavy","Never",..: 2 4 3 2 2 2 2 2 2 2 ...
 $ Height: num  173 178 NA 160 165 ...
 $ M.I   : Factor w/ 2 levels "Imperial","Metric": 2 1 NA 2 2 1 1 2 2 2 ...
 $ Age   : num  18.2 17.6 16.9 20.3 23.7 ...
NULL
> ??MASS::survevy
> 
> wh.clap = data.frame(survey$Clap,survey$W.Hnd)
> wh.clap.tbl = table(survey$Clap,survey$W.Hnd) 
> wh.clap.tbl
         
          Left Right
  Left       9    29
  Neither    5    45
  Right      4   143
> chisq <- chisq.test(wh.clap.tbl)
경고메시지(들): 
chisq.test(wh.clap.tbl)에서:
  카이제곱 approximation은 정확하지 않을수도 있습니다
> chisq

	Pearson's Chi-squared test

data:  wh.clap.tbl
X-squared = 19.252, df = 2, p-value = 6.598e-05

> o <- chisq$observed
> e <- chisq$expected
> o
         
          Left Right
  Left       9    29
  Neither    5    45
  Right      4   143
> round(e, 2)
         
           Left  Right
  Left     2.91  35.09
  Neither  3.83  46.17
  Right   11.26 135.74
> res <- (e-o)/sqrt(e)
> round(res,2)
         
           Left Right
  Left    -3.57  1.03
  Neither -0.60  0.17
  Right    2.16 -0.62
> chisq.cal <- sum(res^2)
> p.val <- pchisq(chisq.cal, df=2, lower.tail = F)
> chisq.cal
[1] 19.25239
> p.val
[1] 6.597765e-05
> chisq

	Pearson's Chi-squared test

data:  wh.clap.tbl
X-squared = 19.252, df = 2, p-value = 6.598e-05

> 
> barplot(wh.clap.tbl, beside = TRUE, 
+         col = c("red", "lightgreen", "blue"),
+         main = "wh vs ex",
+         xlab = "Exercise Level", ylab = "Number of Students")
> 
> legend("center", legend = rownames(wh.clap.tbl), 
+        fill = c("red", "lightgreen" ,"blue"))
>
>

chi square test eg. 2

  • code02
  • output02
Loading...
> ####################
> file_path <- "https://www.sthda.com/sthda/RDoc/data/housetasks.txt"
> housetasks <- read.delim(file_path, row.names = 1)
> housetasks
           Wife Alternating Husband Jointly
Laundry     156          14       2       4
Main_meal   124          20       5       4
Dinner       77          11       7      13
Breakfeast   82          36      15       7
Tidying      53          11       1      57
Dishes       32          24       4      53
Shopping     33          23       9      55
Official     12          46      23      15
Driving      10          51      75       3
Finances     13          13      21      66
Insurance     8           1      53      77
Repairs       0           3     160       2
Holidays      0           1       6     153
> 
> chisq <- chisq.test(housetasks)
> chisq

	Pearson's Chi-squared test

data:  housetasks
X-squared = 1944.5, df = 36, p-value < 2.2e-16

> 
> o <- chisq$observed
> e <- chisq$expected
> o
           Wife Alternating Husband Jointly
Laundry     156          14       2       4
Main_meal   124          20       5       4
Dinner       77          11       7      13
Breakfeast   82          36      15       7
Tidying      53          11       1      57
Dishes       32          24       4      53
Shopping     33          23       9      55
Official     12          46      23      15
Driving      10          51      75       3
Finances     13          13      21      66
Insurance     8           1      53      77
Repairs       0           3     160       2
Holidays      0           1       6     153
> round(e,2)
            Wife Alternating Husband Jointly
Laundry    60.55       25.63   38.45   51.37
Main_meal  52.64       22.28   33.42   44.65
Dinner     37.16       15.73   23.59   31.52
Breakfeast 48.17       20.39   30.58   40.86
Tidying    41.97       17.77   26.65   35.61
Dishes     38.88       16.46   24.69   32.98
Shopping   41.28       17.48   26.22   35.02
Official   33.03       13.98   20.97   28.02
Driving    47.82       20.24   30.37   40.57
Finances   38.88       16.46   24.69   32.98
Insurance  47.82       20.24   30.37   40.57
Repairs    56.77       24.03   36.05   48.16
Holidays   55.05       23.30   34.95   46.70
> cs.cal <- sum((e-o)^2/e)
> cs.cal
[1] 1944.456
> dim(housetasks)
[1] 13 4
> dim(housetasks)-1
[1] 12 3
> pchisq(cs.cal, 12*3, lower.tail = F)
[1] 0
> chisq

	Pearson's Chi-squared test

data:  housetasks
X-squared = 1944.5, df = 36, p-value < 2.2e-16

chi square test eg. 3

  • code03
  • output03
Loading...
> 
> #Author DataFlair
> df <- read.csv("https://goo.gl/j6lRXD")  #Reading CSV
> head(df)
  id   treatment  improvement
1  1     treated     improved
2  2     treated     improved
3  3 not-treated     improved
4  4     treated     improved
5  5     treated not-improved
6  6     treated not-improved
> tail(df)
     id   treatment  improvement
100 100 not-treated     improved
101 101     treated     improved
102 102     treated     improved
103 103 not-treated not-improved
104 104     treated     improved
105 105 not-treated not-improved
> str(df)
'data.frame':	105 obs. of  3 variables:
 $ id         : int  1 2 3 4 5 6 7 8 9 10 ...
 $ treatment  : chr  "treated" "treated" "not-treated" "treated" ...
 $ improvement: chr  "improved" "improved" "improved" "improved" ...
> df$treatment <- factor(df$treatment)
> df$improvement <- factor(df$improvement)
> str(df)
'data.frame':	105 obs. of  3 variables:
 $ id         : int  1 2 3 4 5 6 7 8 9 10 ...
 $ treatment  : Factor w/ 2 levels "not-treated",..: 2 2 1 2 2 2 1 2 1 2 ...
 $ improvement: Factor w/ 2 levels "improved","not-improved": 1 1 1 1 2 2 2 2 1 1 ...
> df.tbl<-table(df$treatment, df$improvement)
> df.tbl
             
              improved not-improved
  not-treated       26           29
  treated           35           15
> 
> chisq <- chisq.test(df.tbl, correct=F)
> chisq

	Pearson's Chi-squared test

data:  df.tbl
X-squared = 5.5569, df = 1, p-value = 0.01841

> 
> # or the below works also
> chisq <- chisq.test(df$treatment, df$improvement, correct=F)
> chisq

	Pearson's Chi-squared test

data:  df$treatment and df$improvement
X-squared = 5.5569, df = 1, p-value = 0.01841

> 
> o <- chisq$observed
> e <- chisq$expected
> o
             df$improvement
df$treatment  improved not-improved
  not-treated       26           29
  treated           35           15
> round(e,2)
             df$improvement
df$treatment  improved not-improved
  not-treated    31.95        23.05
  treated        29.05        20.95
> r <- (e-o)/sqrt(e) # this is called residuals
> round(r, 3)
             df$improvement
df$treatment  improved not-improved
  not-treated    1.053       -1.240
  treated       -1.104        1.300
> round(chisq$residuals, 3)
             df$improvement
df$treatment  improved not-improved
  not-treated   -1.053        1.240
  treated        1.104       -1.300
> 
> cs.tmp <- sum(chisq$residuals^2)
> cs.tmp
[1] 5.55692
> cs.cal <- sum((e-o)^2/e)
> pchisq(cs.cal, 1,lower.tail = F)
[1] 0.01840777
> cs.cal
[1] 5.55692
> chisq

	Pearson's Chi-squared test

data:  df$treatment and df$improvement
X-squared = 5.5569, df = 1, p-value = 0.01841

> 
> library(corrplot)
> corrplot(chisq$residuals, is.cor = FALSE)
> 
> contrib <- 100*chisq$residuals^2/chisq$statistic
> round(contrib, 3)
             df$improvement
df$treatment  improved not-improved
  not-treated   19.955       27.664
  treated       21.950       30.431
> 
> # Visualize the contribution
> corrplot(contrib, is.cor = FALSE)

residual value $(\text{sqrt}((e-o)^2/e))$ of each cell

contribution of each cell