//Regression Assumptions Example //created by John M. Quick //http://www.johnmquick.com //December 7, 2009 > #read data into variable > datavar <- read.csv("dataset_enrollmentForecast.csv") > #attach data variable > attach(datavar) > #display all data > datavar YEAR ROLL UNEM HGRAD INC 1 1 5501 8.1 9552 1923 2 2 5945 7.0 9680 1961 3 3 6629 7.3 9731 1979 4 4 7556 7.5 11666 2030 5 5 8716 7.0 14675 2112 6 6 9369 6.4 15265 2192 7 7 9920 6.5 15484 2235 8 8 10167 6.4 15723 2351 9 9 11084 6.3 16501 2411 10 10 12504 7.7 16890 2475 11 11 13746 8.2 17203 2524 12 12 13656 7.5 17707 2674 13 13 13850 7.4 18108 2833 14 14 14145 8.2 18266 2863 15 15 14888 10.1 19308 2839 16 16 14991 9.2 18224 2898 17 17 14836 7.7 18997 3123 18 18 14478 5.7 19505 3195 19 19 14539 6.5 19800 3239 20 20 14395 7.5 19546 3129 21 21 14599 7.3 19117 3100 22 22 14969 9.2 18774 3008 23 23 15107 10.1 17813 2983 24 24 14831 7.5 17304 3069 25 25 15081 8.8 16756 3151 26 26 15127 9.1 16749 3127 27 27 15856 8.8 16925 3179 28 28 15938 7.8 17231 3207 29 29 16081 7.0 16816 3345 > #RESIDUALS PLOT > #get unstandardized predicted and residual values > unstandardizedPredicted <- predict(linearModelVar) > unstandardizedResiduals <- resid(linearModelVar) > #get standardized values > standardizedPredicted <- (unstandardizedPredicted - mean(unstandardizedPredicted)) / sd(unstandardizedPredicted) > standardizedResiduals <- (unstandardizedResiduals - mean(unstandardizedResiduals)) / sd(unstandardizedResiduals) > #create standardized residuals plot > plot(standardizedPredicted, standardizedResiduals, xlab = "Standardized Predicted Values", ylab = "Standardized Residuals") > plot(standardizedPredicted, standardizedResiduals, main = "Standardized Residuals Plot", xlab = "Standardized Predicted Values", ylab = "Standardized Residuals") > abline(0,0) > #add horizontal line > abline(0,0) > #RESIDUALS HISTOGRAM > #create residuals histogram > hist(standardizedResiduals, freq = FALSE) > #add normal curve > curve(dnorm, add = TRUE) > #PP PLOT > #get probability distribution for residuals > probDist <- pnorm(standardizedResiduals) > #create PP plot > plot(ppoints(length(standardizedResiduals)), sort(probDist), main = "PP Plot", xlab = "Observed Probability", ylab = "Expected Probability") > #add diagonal line > abline(0,1)