Table of Contents

ANCOVA (Analysis of Covariance)

Multiple Regression에 대한 이해가 필요
그 중에서

Y = a + b1X1 + b2X2 의 예에서

위의 recap에 더해서

통계를 공부하는 방법이 세가지가 있다.

anorexia.csv

anorexia=read.csv("anorexia.csv")
anorexia
anorexia$Diff = anorexia$Postwt-anorexia$Prewt
anorexia

$ Diff = Treat + Error $

$ Diff = Prewt + Treat + Error $

$ Postwt = Prewt + Treat + Error $

levels(anorexia$Treat)
[1] "Cont" "CBT"  "FT"  
anorexia$Treat=factor(anorexia$Treat, levels = c("Cont", "CBT", "FT"))

model = lm (Postwt ~ Prewt + Treat, data = anorexia)
Anova(model)
Anova Table (Type II tests)

Response: Postwt
          Sum Sq Df F value    Pr(>F)    
Prewt      353.8  1  7.2655 0.0088500 ** 
Treat      766.3  2  7.8681 0.0008438 ***
Residuals 3311.3 68                      
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
summary(model)
Call:
lm(formula = Postwt ~ Prewt + Treat, data = anorexia)

Residuals:
     Min       1Q   Median       3Q      Max 
-14.1083  -4.2773  -0.5484   5.4838  15.2922 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  45.6740    13.2167   3.456 0.000950 ***
Prewt         0.4345     0.1612   2.695 0.008850 ** 
TreatCBT      4.0971     1.8935   2.164 0.033999 *  
TreatFT       8.6601     2.1931   3.949 0.000189 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6.978 on 68 degrees of freedom
Multiple R-squared:  0.2777,	Adjusted R-squared:  0.2458 
F-statistic: 8.713 on 3 and 68 DF,  p-value: 5.719e-05
library(multcomp)
dunnett = glht(model, linfct=mcp(Treat="Dunnett"))
summary(dunnett)

	 Simultaneous Tests for General Linear Hypotheses

Multiple Comparisons of Means: Dunnett Contrasts


Fit: lm(formula = Postwt ~ Prewt + Treat, data = anorexia)

Linear Hypotheses:
                Estimate Std. Error t value Pr(>|t|)    
CBT - Cont == 0    4.097      1.893   2.164 0.062939 .  
FT - Cont == 0     8.660      2.193   3.949 0.000373 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Adjusted p values reported -- single-step method)

ANCOVA (Analysis of Covariance) 2

Data

### --------------------------------------------------------------
### Analysis of covariance, cricket example
### pp. 228–229
### --------------------------------------------------------------

Input = (

"Species  Temp   Pulse
 ex       20.8   67.9 
 ex       20.8   65.1 
 ex       24     77.3 
 ex       24     78.7 
 ex       24     79.4 
 ex       24     80.4 
 ex       26.2   85.8 
 ex       26.2   86.6 
 ex       26.2   87.5 
 ex       26.2   89.1 
 ex       28.4   98.6 
 ex       29    100.8 
 ex       30.4   99.3 
 ex       30.4  101.7 
 niv      17.2   44.3 
 niv      18.3   47.2 
 niv      18.3   47.6 
 niv      18.3   49.6 
 niv      18.9   50.3 
 niv      18.9   51.8 
 niv      20.4   60 
 niv      21     58.5 
 niv      21     58.9 
 niv      22.1   60.7 
 niv      23.5   69.8 
 niv      24.2   70.9 
 niv      25.9   76.2 
 niv      26.5   76.1 
 niv      26.5   77 
 niv      26.5   77.7 
 niv      28.6   84.7
")

Data = read.table(textConnection(Input),header=TRUE)

Simple Plot

plot(x   = Data$Temp, 
     y   = Data$Pulse, 
     col = Data$Species, 
     pch = 16,
     xlab = "Temperature",
     ylab = "Pulse")

legend('bottomright', 
       legend = levels(Data$Species), 
       col = 1:2, 
       cex = 1,    
       pch = 16)

Analysis of covariance

options(contrasts = c("contr.treatment", "contr.poly"))
   
   ### These are the default contrasts in R


model.1 = lm (Pulse ~ Temp + Species + Temp:Species,
              data = Data)

library(car)
Anova(model.1, type="II")
 
Anova Table (Type II tests)
 
             Sum Sq Df  F value    Pr(>F)   
Temp         4376.1  1 1388.839 < 2.2e-16 ***
Species       598.0  1  189.789 9.907e-14 ***
Temp:Species    4.3  1    1.357    0.2542    
 
### Interaction is not significant, so the slope across groups
### is not different. 

Re-analysis

model.2 = lm (Pulse ~ Temp + Species,
              data = Data)

library(car)
Anova(model.2, type="II")
 
Anova Table (Type II tests)
 
          Sum Sq Df F value    Pr(>F)   
Temp      4376.1  1  1371.4 < 2.2e-16 ***
Species    598.0  1   187.4 6.272e-14 ***
 
### The category variable (Species) is significant,
### so the intercepts among groups are different
 
 
summary(model.2)
 
Coefficients:
             Estimate Std. Error t value Pr(>|t|)   
(Intercept)  -7.21091    2.55094  -2.827  0.00858 **
Temp          3.60275    0.09729  37.032  < 2e-16 ***
Speciesniv  -10.06529    0.73526 -13.689 6.27e-14 ***
 
### Note that these estimates are different than in the Handbook,
###   but the calculated results will be identical.
### The slope estimate is the same.
### The intercept for species 1 (ex) is (intercept).
### The intercept for species 2 (niv) is (intercept) + Speciesniv.
### This is determined from the contrast coding of the Species
### variable shown below, and the fact that Speciesniv is shown in
### coefficient table above.
 
 
contrasts(Data$Species)
 
    niv
ex    0
niv   1

Simple plot with fitted lines

I.nought = -7.21091
I1 = I.nought + 0
I2 = I.nought + -10.06529
B  = 3.60275 

plot(x   = Data$Temp, 
     y   = Data$Pulse, 
     col = Data$Species, 
     pch = 16,
     xlab = "Temperature",
     ylab = "Pulse")

legend('bottomright', 
       legend = levels(Data$Species), 
       col = 1:2, 
       cex = 1,    
       pch = 16)

abline(I1, B,
       lty=1, lwd=2, col = 1)

abline(I2, B,
       lty=1, lwd=2, col = 2)

p-value and R-squared of combined model

summary(model.2)

Checking assumptions of the model

hist(residuals(model.2), 
     col="darkgray")
plot(fitted(model.2), 
     residuals(model.2)
     )

Eg.

tri.csv
trt: treatment
pat: patient id
hgba1c: Hemoglobin A1c
trichg: Triglyceride changes

$ trichg = hgba1c + trt + e $