multicollinearity
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| multicollinearity [2016/04/27 06:47] – hkimscil | multicollinearity [2023/05/22 07:57] (current) – [Testing with correlation matrix] hkimscil | ||
|---|---|---|---|
| Line 6: | Line 6: | ||
| see [[: | see [[: | ||
| - | [[: | + | |
| + | ====== Testing multicollinearity with correlation matrix ====== | ||
| + | < | ||
| + | options(digits = 4) | ||
| + | HSGPA <- c(3.0, 3.2, 2.8, 2.5, 3.2, 3.8, 3.9, 3.8, 3.5, 3.1) | ||
| + | FGPA <- c(2.8, 3.0, 2.8, 2.2, 3.3, 3.3, 3.5, 3.7, 3.4, 2.9) | ||
| + | SATV <- c(500, 550, 450, 400, 600, 650, 700, 550, 650, 550) | ||
| + | |||
| + | scholar <- data.frame(FGPA, | ||
| + | |||
| + | # install.packages(" | ||
| + | # library(psych) | ||
| + | describe(scholar) # provides descrptive information about each variable | ||
| + | |||
| + | corrs <- cor(scholar) # find the correlations and set them into an object called ' | ||
| + | corrs # print corrs | ||
| + | |||
| + | pairs(scholar) | ||
| + | attach(scholar) | ||
| + | |||
| + | # install.packages(" | ||
| + | library(corrplot) | ||
| + | corrplot(cor(scholar), | ||
| + | </ | ||
| + | |||
| + | {{: | ||
| + | |||
| + | 독립변인인 SATV와 HSGPA 간의 상관관계가 상당히 높다는 것을 알 수 있다 (r = 0.87). | ||
| + | |||
| + | |||
| + | ====== using Tolerance ====== | ||
| + | < | ||
| + | m.tolerance <- lm(SATV~HSGPA, | ||
| + | summary(m.tolerance) | ||
| + | 1 - summary(m.tolerance)$r.squared | ||
| + | # 0.1보다 작으면 가능성이 아주 많음 | ||
| + | # 위의 이야기는 자신을 제외한 다른 독립변인들과의 상관관계가 0.9 이상이면 문제라는 이야기 | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | > m.tolerance <- lm(SATV~HSGPA, | ||
| + | > summary(m.tolerance) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = SATV ~ HSGPA, data = scholar) | ||
| + | |||
| + | Residuals: | ||
| + | Min 1Q Median | ||
| + | -101.86 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) | ||
| + | HSGPA 176.7 | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | ||
| + | |||
| + | Residual standard error: 48.2 on 8 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > tol <- 1 - summary(m.tolerance)$r.squared | ||
| + | > tol | ||
| + | [1] 0.2352 | ||
| + | </ | ||
| + | |||
| + | ====== using VIF (Variance Inflation Factors) ====== | ||
| + | Variance Inflation Factor 는 독립변인의 계수값에 인플레이션이 있는지 확인해 보는 방법으로 VIF = 1 인 경우, 해당 독립변인이 다른 변인들에 의해 영향을 받지 않았다는 것을 의미한다. 아래처럼 구한다. 일반적으로 VIF 값이 5 이상이면 주목하여 살펴본다. 10 이상이면 multicollinearity를 의미한다고 한다. | ||
| + | < | ||
| + | tol <- 1- summary(m.tolerance)$r.squared | ||
| + | tol | ||
| + | m.vif <- 1/tol | ||
| + | m.vif | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | > tol <- 1- summary(m.tolerance)$r.squared | ||
| + | > tol | ||
| + | [1] 0.2352 | ||
| + | > m.vif <- 1/tol | ||
| + | > m.vif | ||
| + | [1] 4.251 | ||
| + | > | ||
| + | </ | ||
| + | |||
| + | R 에서는 | ||
| + | < | ||
| + | m.a <- lm(FGPA ~ SATV+HSGPA, data = scholar) | ||
| + | summary(m.a) | ||
| + | # install.packages(" | ||
| + | # library(olsrr) | ||
| + | ols_vif_tol(m.a) | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | > m.a <- lm(FGPA ~ SATV+HSGPA, data = scholar) | ||
| + | > summary(m.a) | ||
| + | |||
| + | Call: | ||
| + | lm(formula = FGPA ~ SATV + HSGPA, data = scholar) | ||
| + | |||
| + | Residuals: | ||
| + | Min 1Q Median | ||
| + | -0.2431 -0.1125 -0.0286 | ||
| + | |||
| + | Coefficients: | ||
| + | Estimate Std. Error t value Pr(> | ||
| + | (Intercept) 0.233102 | ||
| + | SATV 0.000151 | ||
| + | HSGPA | ||
| + | --- | ||
| + | Signif. codes: | ||
| + | 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 | ||
| + | |||
| + | Residual standard error: 0.192 on 7 degrees of freedom | ||
| + | Multiple R-squared: | ||
| + | F-statistic: | ||
| + | |||
| + | > # install.packages(" | ||
| + | > # library(olsrr) | ||
| + | > ols_vif_tol(m.a) | ||
| + | Variables Tolerance | ||
| + | 1 SATV 0.2352 4.251 | ||
| + | 2 | ||
| + | > | ||
| + | </ | ||
| + | |||
| + | ====== using condition index ====== | ||
| + | < | ||
| + | ols_eigen_cindex(m.a) | ||
| + | </ | ||
| + | |||
| + | < | ||
| + | > ols_eigen_cindex(m.a) | ||
| + | Eigenvalue Condition Index intercept | ||
| + | 1 | ||
| + | 2 | ||
| + | 3 | ||
| + | > | ||
| + | </ | ||
| + | |||
| {{tag> | {{tag> | ||
multicollinearity.1461709037.txt.gz · Last modified: by hkimscil
