Multiple Regression에 대한 이해가 필요
그 중에서
Y = a + b1X1 + b2X2 의 예에서
위의 recap에 더해서
통계를 공부하는 방법이 세가지가 있다.
anorexia=read.csv("anorexia.csv") anorexia anorexia$Diff = anorexia$Postwt-anorexia$Prewt anorexia
$ Diff = Treat + Error $
$ Diff = Prewt + Treat + Error $
$ Postwt = Prewt + Treat + Error $
levels(anorexia$Treat) [1] "Cont" "CBT" "FT" anorexia$Treat=factor(anorexia$Treat, levels = c("Cont", "CBT", "FT")) model = lm (Postwt ~ Prewt + Treat, data = anorexia)
Anova(model) Anova Table (Type II tests) Response: Postwt Sum Sq Df F value Pr(>F) Prewt 353.8 1 7.2655 0.0088500 ** Treat 766.3 2 7.8681 0.0008438 *** Residuals 3311.3 68 --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
summary(model) Call: lm(formula = Postwt ~ Prewt + Treat, data = anorexia) Residuals: Min 1Q Median 3Q Max -14.1083 -4.2773 -0.5484 5.4838 15.2922 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 45.6740 13.2167 3.456 0.000950 *** Prewt 0.4345 0.1612 2.695 0.008850 ** TreatCBT 4.0971 1.8935 2.164 0.033999 * TreatFT 8.6601 2.1931 3.949 0.000189 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 6.978 on 68 degrees of freedom Multiple R-squared: 0.2777, Adjusted R-squared: 0.2458 F-statistic: 8.713 on 3 and 68 DF, p-value: 5.719e-05
library(multcomp) dunnett = glht(model, linfct=mcp(Treat="Dunnett"))
summary(dunnett) Simultaneous Tests for General Linear Hypotheses Multiple Comparisons of Means: Dunnett Contrasts Fit: lm(formula = Postwt ~ Prewt + Treat, data = anorexia) Linear Hypotheses: Estimate Std. Error t value Pr(>|t|) CBT - Cont == 0 4.097 1.893 2.164 0.062939 . FT - Cont == 0 8.660 2.193 3.949 0.000373 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 (Adjusted p values reported -- single-step method)
Data
### -------------------------------------------------------------- ### Analysis of covariance, cricket example ### pp. 228–229 ### -------------------------------------------------------------- Input = ( "Species Temp Pulse ex 20.8 67.9 ex 20.8 65.1 ex 24 77.3 ex 24 78.7 ex 24 79.4 ex 24 80.4 ex 26.2 85.8 ex 26.2 86.6 ex 26.2 87.5 ex 26.2 89.1 ex 28.4 98.6 ex 29 100.8 ex 30.4 99.3 ex 30.4 101.7 niv 17.2 44.3 niv 18.3 47.2 niv 18.3 47.6 niv 18.3 49.6 niv 18.9 50.3 niv 18.9 51.8 niv 20.4 60 niv 21 58.5 niv 21 58.9 niv 22.1 60.7 niv 23.5 69.8 niv 24.2 70.9 niv 25.9 76.2 niv 26.5 76.1 niv 26.5 77 niv 26.5 77.7 niv 28.6 84.7 ") Data = read.table(textConnection(Input),header=TRUE)
Simple Plot
plot(x = Data$Temp, y = Data$Pulse, col = Data$Species, pch = 16, xlab = "Temperature", ylab = "Pulse") legend('bottomright', legend = levels(Data$Species), col = 1:2, cex = 1, pch = 16)
Analysis of covariance
options(contrasts = c("contr.treatment", "contr.poly")) ### These are the default contrasts in R model.1 = lm (Pulse ~ Temp + Species + Temp:Species, data = Data) library(car) Anova(model.1, type="II") Anova Table (Type II tests) Sum Sq Df F value Pr(>F) Temp 4376.1 1 1388.839 < 2.2e-16 *** Species 598.0 1 189.789 9.907e-14 *** Temp:Species 4.3 1 1.357 0.2542 ### Interaction is not significant, so the slope across groups ### is not different.
Re-analysis
model.2 = lm (Pulse ~ Temp + Species, data = Data) library(car) Anova(model.2, type="II") Anova Table (Type II tests) Sum Sq Df F value Pr(>F) Temp 4376.1 1 1371.4 < 2.2e-16 *** Species 598.0 1 187.4 6.272e-14 *** ### The category variable (Species) is significant, ### so the intercepts among groups are different summary(model.2) Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -7.21091 2.55094 -2.827 0.00858 ** Temp 3.60275 0.09729 37.032 < 2e-16 *** Speciesniv -10.06529 0.73526 -13.689 6.27e-14 *** ### Note that these estimates are different than in the Handbook, ### but the calculated results will be identical. ### The slope estimate is the same. ### The intercept for species 1 (ex) is (intercept). ### The intercept for species 2 (niv) is (intercept) + Speciesniv. ### This is determined from the contrast coding of the Species ### variable shown below, and the fact that Speciesniv is shown in ### coefficient table above. contrasts(Data$Species) niv ex 0 niv 1
Simple plot with fitted lines
I.nought = -7.21091 I1 = I.nought + 0 I2 = I.nought + -10.06529 B = 3.60275 plot(x = Data$Temp, y = Data$Pulse, col = Data$Species, pch = 16, xlab = "Temperature", ylab = "Pulse") legend('bottomright', legend = levels(Data$Species), col = 1:2, cex = 1, pch = 16) abline(I1, B, lty=1, lwd=2, col = 1) abline(I2, B, lty=1, lwd=2, col = 2)
p-value and R-squared of combined model
summary(model.2)
Checking assumptions of the model
hist(residuals(model.2), col="darkgray")
plot(fitted(model.2), residuals(model.2) )
tri.csv
trt: treatment
pat: patient id
hgba1c: Hemoglobin A1c
trichg: Triglyceride changes
$ trichg = hgba1c + trt + e $