User Tools

Site Tools


multicolinearity

This is an old revision of the document!


Multi-colinearity check in r

required library:

  • corrplot
  • mctest
    • omcdiag
    • imcdiag
> cps <- read.csv("http://commres.net/wiki/_media/cps_85_wages.csv", header = T, sep = "\t")
> str(cps)
'data.frame':	534 obs. of  11 variables:
 $ education : int  8 9 12 12 12 13 10 12 16 12 ...
 $ south     : int  0 0 0 0 0 0 1 0 0 0 ...
 $ sex       : int  1 1 0 0 0 0 0 0 0 0 ...
 $ experience: int  21 42 1 4 17 9 27 9 11 9 ...
 $ union     : int  0 0 0 0 0 1 0 0 0 0 ...
 $ wage      : num  5.1 4.95 6.67 4 7.5 ...
 $ age       : int  35 57 19 22 35 28 43 27 33 27 ...
 $ race      : int  2 3 3 3 3 3 3 3 3 3 ...
 $ occupation: int  6 6 6 6 6 6 6 6 6 6 ...
 $ sector    : int  1 1 1 0 0 0 0 0 1 0 ...
 $ marr      : int  1 1 0 0 1 0 0 0 1 0 ...
> head(cps)
> head(cps)
  education south sex experience union  wage age race occupation sector marr
1         8     0   1         21     0  5.10  35    2          6      1    1
2         9     0   1         42     0  4.95  57    3          6      1    1
3        12     0   0          1     0  6.67  19    3          6      1    0
4        12     0   0          4     0  4.00  22    3          6      0    0
5        12     0   0         17     0  7.50  35    3          6      0    1
6        13     0   0          9     1 13.07  28    3          6      0    0
F-statistic: 74.91 on 1 and 58 DF,  p-value: 4.939e-12

> set.seed(1)
> x1 <- rnorm(25)
> x2 <- rnorm(25, x1)
> y <- x1-x2 + rnorm(25)
> pairs( cbind(y,x1,x2) )
> cor( cbind(y,x1,x2) )
             y         x1         x2
y   1.00000000 0.08089276 -0.2575073
x1  0.08089276 1.00000000  0.7872474
x2 -0.25750727 0.78724740  1.0000000
> summary(lm(y~x1))

Call:
lm(formula = y ~ x1)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.3178 -0.9417  0.1974  0.7032  2.6812 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept)   0.1145     0.2687   0.426    0.674
x1            0.1106     0.2841   0.389    0.701

Residual standard error: 1.322 on 23 degrees of freedom
Multiple R-squared:  0.006544,	Adjusted R-squared:  -0.03665 
F-statistic: 0.1515 on 1 and 23 DF,  p-value: 0.7007

> summary(lm(y~x2))

Call:
lm(formula = y ~ x2)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.88739 -0.93086  0.06246  0.58728  2.94566 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept)   0.1920     0.2604   0.737    0.469
x2           -0.2927     0.2290  -1.278    0.214

Residual standard error: 1.282 on 23 degrees of freedom
Multiple R-squared:  0.06631,	Adjusted R-squared:  0.02571 
F-statistic: 1.633 on 1 and 23 DF,  p-value: 0.214

> summary(lm(y~x1+x2))

Call:
lm(formula = y ~ x1 + x2)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.94803 -0.92496 -0.03868  0.42155  2.17441 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)   
(Intercept)   0.1541     0.2347   0.657   0.5181   
x1            1.0194     0.4016   2.539   0.0187 * 
x2           -0.9602     0.3340  -2.875   0.0088 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.153 on 22 degrees of freedom
Multiple R-squared:  0.2779,	Adjusted R-squared:  0.2122 
F-statistic: 4.232 on 2 and 22 DF,  p-value: 0.02785

> cor(x1,x2)
[1] 0.7872474
> cor.test(x1,x2)

	Pearson's product-moment correlation

data:  x1 and x2
t = 6.1227, df = 23, p-value = 3.026e-06
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.5691639 0.9018451
sample estimates:
      cor 
0.7872474 

> cps <- read.csv("http://commres.net/wiki/_export/code/r/data?codeblock=7", header = T, sep = "\t")
> cps
    癤풽ducation south sex experience union  wage age race
1              8     0   1         21     0  5.10  35    2
2              9     0   1         42     0  4.95  57    3
3             12     0   0          1     0  6.67  19    3
4             12     0   0          4     0  4.00  22    3
5             12     0   0         17     0  7.50  35    3
6             13     0   0          9     1 13.07  28    3
7             10     1   0         27     0  4.45  43    3
8             12     0   0          9     0 19.47  27    3
9             16     0   0         11     0 13.28  33    3
10            12     0   0          9     0  8.75  27    3
11            12     0   0         17     1 11.35  35    3
12            12     0   0         19     1 11.50  37    3
13             8     1   0         27     0  6.50  41    3
14             9     1   0         30     1  6.25  45    3
15             9     1   0         29     0 19.98  44    3
16            12     0   0         37     0  7.30  55    3
17             7     1   0         44     0  8.00  57    3
18            12     0   0         26     1 22.20  44    3
19            11     0   0         16     0  3.65  33    3
20            12     0   0         33     0 20.55  51    3
21            12     0   1         16     1  5.71  34    3
22             7     0   0         42     1  7.00  55    1
23            12     0   0          9     0  3.75  27    3
24            11     1   0         14     0  4.50  31    1
25            12     0   0         23     0  9.56  41    3
26             6     1   0         45     0  5.75  57    3
27            12     0   0          8     0  9.36  26    3
28            10     0   0         30     0  6.50  46    3
29            12     0   1          8     0  3.35  26    3
30            12     0   0          8     0  4.75  26    3
31            14     0   0         13     0  8.90  33    3
32            12     1   1         46     0  4.00  64    3
33             8     0   0         19     0  4.70  33    3
34            17     1   1          1     0  5.00  24    3
35            12     0   0         19     0  9.25  37    3
36            12     0   0         36     0 10.67  54    1
37            12     1   0         20     0  7.61  38    1
38            12     0   0         35     1 10.00  53    1
39            12     0   0          3     0  7.50  21    3
40            14     1   0         10     0 12.20  30    3
41            12     0   0          0     0  3.35  18    3
42            14     1   0         14     1 11.00  34    3
43            12     0   0         14     0 12.00  32    3
44             9     0   1         16     0  4.85  31    3
45            13     1   0          8     0  4.30  27    3
46             7     1   1         15     0  6.00  28    3
47            16     0   0         12     0 15.00  34    3
48            10     1   0         13     0  4.85  29    3
49             8     0   0         33     1  9.00  47    3
50            12     0   0          9     0  6.36  27    3
51            12     0   0          7     0  9.15  25    3
52            16     0   0         13     1 11.00  35    3
53            12     0   1          7     0  4.50  25    3
54            12     0   1         16     0  4.80  34    3
55            13     0   0          0     0  4.00  19    3
56            12     0   1         11     0  5.50  29    3
57            13     0   0         17     0  8.40  36    3
58            10     0   0         13     0  6.75  29    3
59            12     0   0         22     1 10.00  40    1
60            12     0   1         28     0  5.00  46    3
61            11     0   0         17     0  6.50  34    3
62            12     0   0         24     1 10.75  42    3
63             3     1   0         55     0  7.00  64    2
64            12     1   0          3     0 11.43  21    3
65            12     0   0          6     1  4.00  24    1
66            10     0   0         27     0  9.00  43    3
67            12     1   0         19     1 13.00  37    1
68            12     0   0         19     1 12.22  37    3
69            12     0   1         38     0  6.28  56    3
70            10     1   0         41     1  6.75  57    1
71            11     1   0          3     0  3.35  20    1
72            14     0   0         20     1 16.00  40    3
73            10     0   0         15     0  5.25  31    3
74             8     1   0          8     0  3.50  22    2
75             8     1   1         39     0  4.22  53    3
76             6     0   1         43     1  3.00  55    2
77            11     1   1         25     1  4.00  42    3
78            12     0   0         11     1 10.00  29    3
79            12     0   0         12     0  5.00  30    1
80            12     1   0         35     1 16.00  53    3
81            14     0   0         14     0 13.98  34    3
82            12     0   0         16     1 13.26  34    3
83            10     0   1         44     1  6.10  60    3
84            16     1   1         13     0  3.75  35    3
85            13     0   0          8     1  9.00  27    1
86            12     0   0         13     0  9.45  31    3
87            11     0   0         18     1  5.50  35    3
88            12     0   1         18     0  8.93  36    3
89            12     1   1          6     0  6.25  24    3
90            11     1   0         37     1  9.75  54    3
    occupation sector marr
1            6      1    1
2            6      1    1
3            6      1    0
4            6      0    0
5            6      0    1
6            6      0    0
7            6      0    0
8            6      0    0
9            6      1    1
10           6      0    0
11           6      0    1
12           6      1    0
13           6      0    1
14           6      0    0
15           6      0    1
16           6      2    1
17           6      0    1
18           6      1    1
19           6      0    0
20           6      0    1
21           6      1    1
22           6      1    1
23           6      0    0
24           6      0    1
25           6      0    1
26           6      1    1
27           6      1    1
28           6      0    1
29           6      1    1
30           6      0    1
31           6      0    0
32           6      0    0
33           6      0    1
34           6      0    0
35           6      1    0
36           6      0    0
37           6      2    1
38           6      2    1
39           6      0    0
40           6      1    1
41           6      0    0
42           6      1    1
43           6      1    1
44           6      1    1
45           6      2    0
46           6      1    1
47           6      1    1
48           6      0    0
49           6      0    1
50           6      1    1
51           6      0    1
52           6      1    1
53           6      1    1
54           6      1    1
55           6      0    0
56           6      1    0
57           6      1    0
58           6      1    1
59           6      1    0
60           6      1    1
61           6      0    0
62           6      2    1
63           6      1    1
64           6      2    0
65           6      1    0
66           6      2    1
67           6      1    1
68           6      2    1
69           6      1    1
70           6      1    1
71           6      1    0
72           6      0    1
73           6      0    1
74           6      1    1
75           6      1    1
76           6      1    1
77           6      1    1
78           6      0    1
79           6      0    1
80           6      1    1
81           6      0    0
82           6      0    1
83           6      1    0
84           6      0    0
85           6      1    0
86           6      1    0
87           6      0    1
88           6      0    1
89           6      0    0
90           6      1    1
 [ reached getOption("max.print") -- omitted 444 rows ]
> head(cps)
  癤풽ducation south sex experience union  wage age race
1            8     0   1         21     0  5.10  35    2
2            9     0   1         42     0  4.95  57    3
3           12     0   0          1     0  6.67  19    3
4           12     0   0          4     0  4.00  22    3
5           12     0   0         17     0  7.50  35    3
6           13     0   0          9     1 13.07  28    3
  occupation sector marr
1          6      1    1
2          6      1    1
3          6      1    0
4          6      0    0
5          6      0    1
6          6      0    0
> colnames(cps) <- c("education")
> head(cps)
  education NA NA NA NA    NA NA NA NA NA NA
1         8  0  1 21  0  5.10 35  2  6  1  1
2         9  0  1 42  0  4.95 57  3  6  1  1
3        12  0  0  1  0  6.67 19  3  6  1  0
4        12  0  0  4  0  4.00 22  3  6  0  0
5        12  0  0 17  0  7.50 35  3  6  0  1
6        13  0  0  9  1 13.07 28  3  6  0  0
> cps <- read.csv("http://commres.net/wiki/_export/code/r/data?codeblock=7", header = T, sep = "\t")
> head(cps)
  education south sex experience union  wage age race
1         8     0   1         21     0  5.10  35    2
2         9     0   1         42     0  4.95  57    3
3        12     0   0          1     0  6.67  19    3
4        12     0   0          4     0  4.00  22    3
5        12     0   0         17     0  7.50  35    3
6        13     0   0          9     1 13.07  28    3
  occupation sector marr
1          6      1    1
2          6      1    1
3          6      1    0
4          6      0    0
5          6      0    1
6          6      0    0
> cps2 <- read.csv("http://commres.net/wiki/_export/code/r/data?codeblock=7", header = T, sep = "\t")
> head(cps2)
  education south sex experience union  wage age race
1         8     0   1         21     0  5.10  35    2
2         9     0   1         42     0  4.95  57    3
3        12     0   0          1     0  6.67  19    3
4        12     0   0          4     0  4.00  22    3
5        12     0   0         17     0  7.50  35    3
6        13     0   0          9     1 13.07  28    3
  occupation sector marr
1          6      1    1
2          6      1    1
3          6      1    0
4          6      0    0
5          6      0    1
6          6      0    0
> cps2 <- read.csv("http://commres.net/wiki/_media/cps_85_wages.csv", header = T, sep = "\t")
> head(cps2)
  education south sex experience union  wage age race
1         8     0   1         21     0  5.10  35    2
2         9     0   1         42     0  4.95  57    3
3        12     0   0          1     0  6.67  19    3
4        12     0   0          4     0  4.00  22    3
5        12     0   0         17     0  7.50  35    3
6        13     0   0          9     1 13.07  28    3
  occupation sector marr
1          6      1    1
2          6      1    1
3          6      1    0
4          6      0    0
5          6      0    1
6          6      0    0
> fit_model1 = lm(log(data1$Wage) ~., data = cps)
Error in eval(predvars, data, env) : object 'data1' not found
> fit_model1 = lm(log(cps$Wage) ~., data = cps)
Error in log(cps$Wage) : non-numeric argument to mathematical function
> str(cps)
'data.frame':	534 obs. of  11 variables:
 $ education : int  8 9 12 12 12 13 10 12 16 12 ...
 $ south     : int  0 0 0 0 0 0 1 0 0 0 ...
 $ sex       : int  1 1 0 0 0 0 0 0 0 0 ...
 $ experience: int  21 42 1 4 17 9 27 9 11 9 ...
 $ union     : int  0 0 0 0 0 1 0 0 0 0 ...
 $ wage      : num  5.1 4.95 6.67 4 7.5 ...
 $ age       : int  35 57 19 22 35 28 43 27 33 27 ...
 $ race      : int  2 3 3 3 3 3 3 3 3 3 ...
 $ occupation: int  6 6 6 6 6 6 6 6 6 6 ...
 $ sector    : int  1 1 1 0 0 0 0 0 1 0 ...
 $ marr      : int  1 1 0 0 1 0 0 0 1 0 ...
> head(cps)
  education south sex experience union  wage age race
1         8     0   1         21     0  5.10  35    2
2         9     0   1         42     0  4.95  57    3
3        12     0   0          1     0  6.67  19    3
4        12     0   0          4     0  4.00  22    3
5        12     0   0         17     0  7.50  35    3
6        13     0   0          9     1 13.07  28    3
  occupation sector marr
1          6      1    1
2          6      1    1
3          6      1    0
4          6      0    0
5          6      0    1
6          6      0    0
> log(cps$wage)
  [1] 1.6292405 1.5993876 1.8976199 1.3862944 2.0149030
  [6] 2.5703195 1.4929041 2.9688748 2.5862591 2.1690537
 [11] 2.4292177 2.4423470 1.8718022 1.8325815 2.9947318
 [16] 1.9878743 2.0794415 3.1000923 1.2947272 3.0228609
 [21] 1.7422190 1.9459101 1.3217558 1.5040774 2.2575877
 [26] 1.7491999 2.2364453 1.8718022 1.2089603 1.5581446
 [31] 2.1860513 1.3862944 1.5475625 1.6094379 2.2246236
 [36] 2.3674361 2.0294632 2.3025851 2.0149030 2.5014360
 [41] 1.2089603 2.3978953 2.4849066 1.5789787 1.4586150
 [46] 1.7917595 2.7080502 1.5789787 2.1972246 1.8500284
 [51] 2.2137539 2.3978953 1.5040774 1.5686159 1.3862944
 [56] 1.7047481 2.1282317 1.9095425 2.3025851 1.6094379
 [61] 1.8718022 2.3749058 1.9459101 2.4362415 1.3862944
 [66] 2.1972246 2.5649494 2.5030740 1.8373700 1.9095425
 [71] 1.2089603 2.7725887 1.6582281 1.2527630 1.4398351
 [76] 1.0986123 1.3862944 2.3025851 1.6094379 2.7725887
 [81] 2.6376277 2.5847520 1.8082888 1.3217558 2.1972246
 [86] 2.2460147 1.7047481 2.1894164 1.8325815 2.2772673
 [91] 1.9065751 2.0515563 1.0473190 1.2089603 2.9947318
 [96] 2.1400662 2.2772673 2.7080502 2.0794415 2.4203681
[101] 2.6390573 2.3025851 1.8718022 2.2854389 2.9177707
[106] 2.5257286 3.2580965 2.6390573 2.3513753 2.3978953
[111] 2.5233258 2.5257286 2.7080502 1.7917595 2.2512918
[116] 1.6094379 1.3217558 2.5313130 1.9286187 1.7047481
[121] 1.9459101 1.5040774 1.8718022 2.4849066 1.6094379
[126] 1.8718022 1.9169226 2.1690537 1.3217558 1.5040774
[131] 1.7917595 1.7047481 2.5649494 1.7316555 1.5686159
[136] 1.9459101 1.6582281 1.2089603 2.1400662 1.7917595
[141] 1.9095425 2.1849270 2.6539459 2.3776926 2.1860513
[146] 2.0149030 1.5040774 2.4203681 2.5989791 1.7917595
[151] 1.5303947 2.3589654 1.6094379 2.1041342 1.8325815
[156] 2.1400662 3.2180755 2.8124102 1.8325815 1.5151272
[161] 2.4203681 3.0563569 2.5376572 2.0149030 2.3272777
[166] 1.2089603 2.5989791 1.5769147 3.2691886 1.8840347
[171] 3.7954892 2.7080502 2.4203681 1.9459101 2.3025851
[176] 2.6762155 2.9957323 3.1135153 1.2919837 2.3627390
[181] 3.2180755 1.7917595 2.9444390 2.5802168 3.1135153
[186] 2.7080502 1.9286187 2.4714836 2.7813007 2.6354795
[191] 2.5771819 1.6677068 1.5040774 2.3025851 2.3025851
[196] 2.3025851 2.2375131 1.7578579 2.8825636 0.0000000
[201] 2.1747517 2.1972246 2.8992214 2.0554050 2.3627390
[206] 1.5040774 2.8478121 2.3513753 2.2213750 2.7080502
[211] 3.1135153 1.5151272 2.1972246 2.5900171 2.7080502
[216] 2.0149030 1.4469190 2.5257286 1.6351057 1.2089603
[221] 2.4078456 1.3454724 1.8562980 1.7155981 2.3025851
[226] 1.7316555 2.4423470 1.2527630 1.2089603 1.5581446
[231] 2.9947318 1.2527630 1.3862944 1.9459101 1.8325815
[236] 1.5040774 2.6595600 1.6094379 2.6210388 2.6181255
[241] 2.0149030 1.3350011 1.6094379 2.2428351 1.7047481
[246] 1.3217558 1.2527630 1.7578579 2.4849066 1.6094379
[251] 2.1690537 2.3025851 2.1400662 2.1552445 2.1972246
[256] 1.7047481 2.4078456 2.3025851 1.6486586 2.0794415
[261] 1.2697605 1.6486586 2.4570214 2.4265711 2.0149030
[266] 1.7047481 1.6094379 2.0476928 1.6582281 2.1972246
[271] 2.2669579 1.6505799 1.9459101 2.4981519 1.6582281
[276] 2.3340838 1.2089603 2.0412203 2.2159373 2.1317968
[281] 1.3862944 1.4182774 1.0986123 1.4469190 2.0188950
[286] 2.3542283 1.6094379 2.7100482 2.4203681 1.8325815
[291] 1.2527630 1.9242487 2.5257286 2.4849066 1.7917595
[296] 2.2512918 1.4109870 2.3446863 1.6094379 2.0399208
[301] 1.7047481 1.8562980 2.5257286 1.8325815 2.0794415
[306] 2.2617631 2.2082744 2.0149030 1.6094379 1.9459101
[311] 1.2669476 2.1400662 1.5040774 2.0643279 1.6582281
[316] 1.6094379 2.2332350 2.3513753 2.0149030 2.2512918
[321] 2.2617631 1.7698546 2.3997118 1.6094379 1.7263317
[326] 2.5257286 2.3804716 1.6863990 1.9459101 1.5238800
[331] 1.7917595 2.4604432 1.7263317 1.7047481 1.5789787
[336] 1.9095425 1.4469190 1.7491999 1.2527630 1.2089603
[341] 2.3627390 2.0794415 1.5581446 2.1400662 2.1804175
[346] 2.0794415 1.7917595 1.9657128 1.2237754 1.7917595
[351] 1.3217558 2.1849270 1.4701758 2.5726122 1.4701758
[356] 1.2527630 1.3350011 1.6601310 1.2089603 2.7887081
[361] 1.4469190 1.5040774 2.0794415 1.3862944 2.0744290
[366] 1.3862944 1.4231083 1.7833912 1.2809338 2.1690537
[371] 1.2237754 1.4539530 1.6770966 1.6094379 2.0347056
[376] 1.9373018 2.0149030 1.2809338 0.5596158 1.2383742
[381] 2.2648832 2.1388890 2.1961128 1.2947272 1.2527630
[386] 1.2325603 1.7047481 1.9358598 1.2556160 1.3217558
[391] 1.4279160 2.2586332 2.6858046 2.5257286 1.7047481
[396] 1.6389967 2.0794415 1.7630170 1.2089603 1.9459101
[401] 2.3025851 2.0794415 1.9286187 1.7137979 2.0149030
[406] 2.1894164 2.1972246 1.2527630 1.7526721 3.2188758
[411] 1.9242487 1.8718022 1.3217558 1.2527630 1.5040774
[416] 0.6981347 1.4279160 2.5649494 1.3812818 2.0149030
[421] 2.5741378 1.3862944 1.3737156 2.5649494 2.1972246
[426] 1.5151272 2.2512918 1.5040774 2.1690537 2.3025851
[431] 2.8903718 3.2180755 2.4890647 3.0910425 2.1690537
[436] 3.1000923 2.8478121 1.7917595 2.0869136 2.2235419
[441] 2.4849066 2.3617970 1.7422190 2.3025851 2.8622009
[446] 2.7080502 2.0515563 2.0541237 2.3025851 3.2180755
[451] 2.3302003 2.7080502 2.4849066 2.3589654 1.7664417
[456] 2.4176979 2.1471002 2.6311692 1.7422190 2.7593768
[461] 2.0149030 2.4203681 1.8164521 2.5989791 1.8325815
[466] 1.8718022 2.4849066 2.1400662 2.0794415 1.7491999
[471] 2.7555697 2.2884862 2.6034302 1.6863990 1.8325815
[476] 1.7047481 1.6094379 1.8325815 1.7491999 3.0204249
[481] 1.6094379 1.9459101 2.8903718 2.4849066 3.0155349
[486] 3.1000923 2.7985001 2.1552445 2.9642416 2.6390573
[491] 2.3025851 2.7694588 2.9957323 2.3025851 3.2180755
[496] 2.4203681 3.1280755 2.3223877 2.3025851 2.6390573
[501] 2.5257286 1.7561323 3.2180755 1.4701758 2.4203681
[506] 1.8976199 2.0794415 2.8992214 2.4849066 2.1849270
[511] 2.2512918 2.6137395 2.4849066 2.7080502 2.5392370
[516] 1.9987736 2.7447035 2.0082140 1.8325815 1.8325815
[521] 2.2375131 3.1135153 2.0149030 1.9459101 1.7491999
[526] 2.0373166 2.5257286 2.7725887 2.4672517 2.4300984
[531] 1.8082888 3.1463051 2.9897142 2.7330680
> lm1 = lm(log(cps$wage) ~., data = cps)
> summary(lm1)

Call:
lm(formula = log(cps$wage) ~ ., data = cps)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.16246 -0.29163 -0.00469  0.29981  1.98248 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.078596   0.687514   1.569 0.117291    
education    0.179366   0.110756   1.619 0.105949    
south       -0.102360   0.042823  -2.390 0.017187 *  
sex         -0.221997   0.039907  -5.563 4.24e-08 ***
experience   0.095822   0.110799   0.865 0.387531    
union        0.200483   0.052475   3.821 0.000149 ***
age         -0.085444   0.110730  -0.772 0.440671    
race         0.050406   0.028531   1.767 0.077865 .  
occupation  -0.007417   0.013109  -0.566 0.571761    
sector       0.091458   0.038736   2.361 0.018589 *  
marr         0.076611   0.041931   1.827 0.068259 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4398 on 523 degrees of freedom
Multiple R-squared:  0.3185,	Adjusted R-squared:  0.3054 
F-statistic: 24.44 on 10 and 523 DF,  p-value: < 2.2e-16

> plot(lm1)
Hit <Return> to see next plot: 
Hit <Return> to see next plot: 
Hit <Return> to see next plot: 
Hit <Return> to see next plot: 
Warning messages:
1: not plotting observations with leverage one:
  444 
2: not plotting observations with leverage one:
  444 
> 
> 
> 
> library(corrplot)
corrplot 0.84 loaded
> 
> cps.cor = cor(cps)
> corrplot.mixed(cps.cor, lower.col = “black”, number.cex = .7)
Error: unexpected input in "corrplot.mixed(cps.cor, lower.col = ?
> corrplot.mixed(cps.cor, lower.col = "black", number.cex = .7)
> install.packages("mctest")
Installing package into ‘C:/Users/Hyo/Documents/R/win-library/3.5’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/3.5/mctest_1.2.zip'
Content type 'application/zip' length 68474 bytes (66 KB)
downloaded 66 KB

package ‘mctest’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in
	C:\Users\Hyo\AppData\Local\Temp\RtmpofABMJ\downloaded_packages
> library(mctest)
> omcdiag(cps[,c(1:5,7:11)], cps$wage) 

Call:
omcdiag(x = cps[, c(1:5, 7:11)], y = cps$wage)


Overall Multicollinearity Diagnostics

                       MC Results detection
Determinant |X'X|:         0.0001         1
Farrar Chi-Square:      4833.5751         1
Red Indicator:             0.1983         0
Sum of Lambda Inverse: 10068.8439         1
Theil's Method:            1.2263         1
Condition Number:        739.7337         1

1 --> COLLINEARITY is detected by the test 
0 --> COLLINEARITY is not detected by the test

> head(str)
                         
1 function (object, ...) 
2 UseMethod("str")       
> head(cps)
  education south sex experience union  wage age race occupation sector marr
1         8     0   1         21     0  5.10  35    2          6      1    1
2         9     0   1         42     0  4.95  57    3          6      1    1
3        12     0   0          1     0  6.67  19    3          6      1    0
4        12     0   0          4     0  4.00  22    3          6      0    0
5        12     0   0         17     0  7.50  35    3          6      0    1
6        13     0   0          9     1 13.07  28    3          6      0    0
> omcdiag(cps[,c(-6)], cps$wage) 

Call:
omcdiag(x = cps[, c(-6)], y = cps$wage)


Overall Multicollinearity Diagnostics

                       MC Results detection
Determinant |X'X|:         0.0001         1
Farrar Chi-Square:      4833.5751         1
Red Indicator:             0.1983         0
Sum of Lambda Inverse: 10068.8439         1
Theil's Method:            1.2263         1
Condition Number:        739.7337         1

1 --> COLLINEARITY is detected by the test 
0 --> COLLINEARITY is not detected by the test

> imcdiag(cps[,c(-6)],cps$wage) 

Call:
imcdiag(x = cps[, c(-6)], y = cps$wage)


All Individual Multicollinearity Diagnostics Result

                 VIF    TOL          Wi          Fi Leamer      CVIF Klein
education   231.1956 0.0043  13402.4982  15106.5849 0.0658  236.4725     1
south         1.0468 0.9553      2.7264      3.0731 0.9774    1.0707     0
sex           1.0916 0.9161      5.3351      6.0135 0.9571    1.1165     0
experience 5184.0939 0.0002 301771.2445 340140.5368 0.0139 5302.4188     1
union         1.1209 0.8922      7.0368      7.9315 0.9445    1.1464     0
age        4645.6650 0.0002 270422.7164 304806.1391 0.0147 4751.7005     1
race          1.0371 0.9642      2.1622      2.4372 0.9819    1.0608     0
occupation    1.2982 0.7703     17.3637     19.5715 0.8777    1.3279     0
sector        1.1987 0.8343     11.5670     13.0378 0.9134    1.2260     0
marr          1.0961 0.9123      5.5969      6.3085 0.9551    1.1211     0

1 --> COLLINEARITY is detected by the test 
0 --> COLLINEARITY is not detected by the test

education , south , experience , age , race , occupation , sector , marr , coefficient(s) are non-significant may be due to multicollinearity

R-square of y on all x: 0.2805 

* use method argument to check which regressors may be the reason of collinearity
===================================
> pcor(cps[,c(-6)],method = “pearson”)$estimate 
Error: unexpected input in "pcor(cps[,c(-6)],method = ?
> pcor(cps[,c(-6)], method = "pearson")$estimate 
              education        south          sex  experience        union         age         race   occupation
education   1.000000000 -0.031750193  0.051510483 -0.99756187 -0.007479144  0.99726160  0.017230877  0.029436911
south      -0.031750193  1.000000000 -0.030152499 -0.02231360 -0.097548621  0.02152507 -0.111197596  0.008430595
sex         0.051510483 -0.030152499  1.000000000  0.05497703 -0.120087577 -0.05369785  0.020017315 -0.142750864
experience -0.997561873 -0.022313605  0.054977034  1.00000000 -0.010244447  0.99987574  0.010888486  0.042058560
union      -0.007479144 -0.097548621 -0.120087577 -0.01024445  1.000000000  0.01223890 -0.107706183  0.212996388
age         0.997261601  0.021525073 -0.053697851  0.99987574  0.012238897  1.00000000 -0.010803310 -0.044140293
race        0.017230877 -0.111197596  0.020017315  0.01088849 -0.107706183 -0.01080331  1.000000000  0.057539374
occupation  0.029436911  0.008430595 -0.142750864  0.04205856  0.212996388 -0.04414029  0.057539374  1.000000000
sector     -0.021253493 -0.021518760 -0.112146760 -0.01326166 -0.013531482  0.01456575  0.006412099  0.314746868
marr       -0.040302967  0.030418218  0.004163264 -0.04097664  0.068918496  0.04509033  0.055645964 -0.018580965
                 sector         marr
education  -0.021253493 -0.040302967
south      -0.021518760  0.030418218
sex        -0.112146760  0.004163264
experience -0.013261665 -0.040976643
union      -0.013531482  0.068918496
age         0.014565751  0.045090327
race        0.006412099  0.055645964
occupation  0.314746868 -0.018580965
sector      1.000000000  0.036495494
marr        0.036495494  1.000000000
> pcor(cps[,c(-6)],method = “pearson”)$estimate 
Error: unexpected input in "pcor(cps[,c(-6)],method = ?
> pcor(cps[,c(-6)], method = "pearson")$estimate 
              education        south          sex  experience        union         age         race
education   1.000000000 -0.031750193  0.051510483 -0.99756187 -0.007479144  0.99726160  0.017230877
south      -0.031750193  1.000000000 -0.030152499 -0.02231360 -0.097548621  0.02152507 -0.111197596
sex         0.051510483 -0.030152499  1.000000000  0.05497703 -0.120087577 -0.05369785  0.020017315
experience -0.997561873 -0.022313605  0.054977034  1.00000000 -0.010244447  0.99987574  0.010888486
union      -0.007479144 -0.097548621 -0.120087577 -0.01024445  1.000000000  0.01223890 -0.107706183
age         0.997261601  0.021525073 -0.053697851  0.99987574  0.012238897  1.00000000 -0.010803310
race        0.017230877 -0.111197596  0.020017315  0.01088849 -0.107706183 -0.01080331  1.000000000
occupation  0.029436911  0.008430595 -0.142750864  0.04205856  0.212996388 -0.04414029  0.057539374
sector     -0.021253493 -0.021518760 -0.112146760 -0.01326166 -0.013531482  0.01456575  0.006412099
marr       -0.040302967  0.030418218  0.004163264 -0.04097664  0.068918496  0.04509033  0.055645964
             occupation       sector         marr
education   0.029436911 -0.021253493 -0.040302967
south       0.008430595 -0.021518760  0.030418218
sex        -0.142750864 -0.112146760  0.004163264
experience  0.042058560 -0.013261665 -0.040976643
union       0.212996388 -0.013531482  0.068918496
age        -0.044140293  0.014565751  0.045090327
race        0.057539374  0.006412099  0.055645964
occupation  1.000000000  0.314746868 -0.018580965
sector      0.314746868  1.000000000  0.036495494
marr       -0.018580965  0.036495494  1.000000000
> pcor(cps[,c(-6)], method = "pearson")$estimate 
              education        south          sex  experience        union         age         race
education   1.000000000 -0.031750193  0.051510483 -0.99756187 -0.007479144  0.99726160  0.017230877
south      -0.031750193  1.000000000 -0.030152499 -0.02231360 -0.097548621  0.02152507 -0.111197596
sex         0.051510483 -0.030152499  1.000000000  0.05497703 -0.120087577 -0.05369785  0.020017315
experience -0.997561873 -0.022313605  0.054977034  1.00000000 -0.010244447  0.99987574  0.010888486
union      -0.007479144 -0.097548621 -0.120087577 -0.01024445  1.000000000  0.01223890 -0.107706183
age         0.997261601  0.021525073 -0.053697851  0.99987574  0.012238897  1.00000000 -0.010803310
race        0.017230877 -0.111197596  0.020017315  0.01088849 -0.107706183 -0.01080331  1.000000000
occupation  0.029436911  0.008430595 -0.142750864  0.04205856  0.212996388 -0.04414029  0.057539374
sector     -0.021253493 -0.021518760 -0.112146760 -0.01326166 -0.013531482  0.01456575  0.006412099
marr       -0.040302967  0.030418218  0.004163264 -0.04097664  0.068918496  0.04509033  0.055645964
             occupation       sector         marr
education   0.029436911 -0.021253493 -0.040302967
south       0.008430595 -0.021518760  0.030418218
sex        -0.142750864 -0.112146760  0.004163264
experience  0.042058560 -0.013261665 -0.040976643
union       0.212996388 -0.013531482  0.068918496
age        -0.044140293  0.014565751  0.045090327
race        0.057539374  0.006412099  0.055645964
occupation  1.000000000  0.314746868 -0.018580965
sector      0.314746868  1.000000000  0.036495494
marr       -0.018580965  0.036495494  1.000000000
> round(pcor(cps[,c(-6)], method = "pearson")$estimate,4) 
           education   south     sex experience   union     age    race occupation  sector    marr
education     1.0000 -0.0318  0.0515    -0.9976 -0.0075  0.9973  0.0172     0.0294 -0.0213 -0.0403
south        -0.0318  1.0000 -0.0302    -0.0223 -0.0975  0.0215 -0.1112     0.0084 -0.0215  0.0304
sex           0.0515 -0.0302  1.0000     0.0550 -0.1201 -0.0537  0.0200    -0.1428 -0.1121  0.0042
experience   -0.9976 -0.0223  0.0550     1.0000 -0.0102  0.9999  0.0109     0.0421 -0.0133 -0.0410
union        -0.0075 -0.0975 -0.1201    -0.0102  1.0000  0.0122 -0.1077     0.2130 -0.0135  0.0689
age           0.9973  0.0215 -0.0537     0.9999  0.0122  1.0000 -0.0108    -0.0441  0.0146  0.0451
race          0.0172 -0.1112  0.0200     0.0109 -0.1077 -0.0108  1.0000     0.0575  0.0064  0.0556
occupation    0.0294  0.0084 -0.1428     0.0421  0.2130 -0.0441  0.0575     1.0000  0.3147 -0.0186
sector       -0.0213 -0.0215 -0.1121    -0.0133 -0.0135  0.0146  0.0064     0.3147  1.0000  0.0365
marr         -0.0403  0.0304  0.0042    -0.0410  0.0689  0.0451  0.0556    -0.0186  0.0365  1.0000
> lm2 = lm(log(cps$wage) ~ . -age , data = cps)
> summary(lm2)

Call:
lm(formula = log(cps$wage) ~ . - age, data = cps)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.16044 -0.29073 -0.00505  0.29994  1.97997 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.562676   0.160116   3.514 0.000479 ***
education    0.094135   0.008188  11.497  < 2e-16 ***
south       -0.103071   0.042796  -2.408 0.016367 *  
sex         -0.220344   0.039834  -5.532 5.02e-08 ***
experience   0.010335   0.001746   5.919 5.86e-09 ***
union        0.199987   0.052450   3.813 0.000154 ***
race         0.050643   0.028519   1.776 0.076345 .  
occupation  -0.006971   0.013091  -0.532 0.594619    
sector       0.091022   0.038717   2.351 0.019094 *  
marr         0.075152   0.041872   1.795 0.073263 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4397 on 524 degrees of freedom
Multiple R-squared:  0.3177,	Adjusted R-squared:  0.306 
F-statistic: 27.11 on 9 and 524 DF,  p-value: < 2.2e-16

> anova(lm1, lm2)
Analysis of Variance Table

Model 1: log(cps$wage) ~ education + south + sex + experience + union + 
    age + race + occupation + sector + marr
Model 2: log(cps$wage) ~ (education + south + sex + experience + union + 
    age + race + occupation + sector + marr) - age
  Res.Df    RSS Df Sum of Sq      F Pr(>F)
1    523 101.17                           
2    524 101.28 -1  -0.11518 0.5954 0.4407
> anova(lm2, lm1)
Analysis of Variance Table

Model 1: log(cps$wage) ~ (education + south + sex + experience + union + 
    age + race + occupation + sector + marr) - age
Model 2: log(cps$wage) ~ education + south + sex + experience + union + 
    age + race + occupation + sector + marr
  Res.Df    RSS Df Sum of Sq      F Pr(>F)
1    524 101.28                           
2    523 101.17  1   0.11518 0.5954 0.4407
> summary(lm1)

Call:
lm(formula = log(cps$wage) ~ ., data = cps)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.16246 -0.29163 -0.00469  0.29981  1.98248 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.078596   0.687514   1.569 0.117291    
education    0.179366   0.110756   1.619 0.105949    
south       -0.102360   0.042823  -2.390 0.017187 *  
sex         -0.221997   0.039907  -5.563 4.24e-08 ***
experience   0.095822   0.110799   0.865 0.387531    
union        0.200483   0.052475   3.821 0.000149 ***
age         -0.085444   0.110730  -0.772 0.440671    
race         0.050406   0.028531   1.767 0.077865 .  
occupation  -0.007417   0.013109  -0.566 0.571761    
sector       0.091458   0.038736   2.361 0.018589 *  
marr         0.076611   0.041931   1.827 0.068259 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4398 on 523 degrees of freedom
Multiple R-squared:  0.3185,	Adjusted R-squared:  0.3054 
F-statistic: 24.44 on 10 and 523 DF,  p-value: < 2.2e-16

> corrplot.mixed(cps.cor, lower.col = "black", number.cex = 1)
> lm3 = lm(log(cps$wage) ~ . -age -education , data = cps)
> summary(lm3)

Call:
lm(formula = log(cps$wage) ~ . - age - education, data = cps)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.35385 -0.34226 -0.02236  0.30725  1.84988 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.974436   0.114881  17.187  < 2e-16 ***
south       -0.171508   0.047380  -3.620 0.000323 ***
sex         -0.232997   0.044517  -5.234 2.40e-07 ***
experience   0.003031   0.001818   1.667 0.096145 .  
union        0.238528   0.058518   4.076 5.29e-05 ***
race         0.079273   0.031761   2.496 0.012870 *  
occupation  -0.036678   0.014348  -2.556 0.010858 *  
sector       0.050525   0.043105   1.172 0.241675    
marr         0.105542   0.046719   2.259 0.024286 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4915 on 525 degrees of freedom
Multiple R-squared:  0.1456,	Adjusted R-squared:  0.1326 
F-statistic: 11.18 on 8 and 525 DF,  p-value: 1.177e-14

> lm3 = lm(log(cps$wage) ~ . -age -experience , data = cps)
> summary(lm3)

Call:
lm(formula = log(cps$wage) ~ . - age - experience, data = cps)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.1519 -0.3309  0.0034  0.3028  1.8315 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.95654    0.15028   6.365 4.26e-10 ***
education    0.07650    0.00787   9.721  < 2e-16 ***
south       -0.11579    0.04411  -2.625 0.008912 ** 
sex         -0.20108    0.04097  -4.908 1.23e-06 ***
union        0.23924    0.05369   4.456 1.02e-05 ***
race         0.05157    0.02943   1.752 0.080287 .  
occupation  -0.01719    0.01339  -1.283 0.199910    
sector       0.10996    0.03982   2.762 0.005953 ** 
marr         0.13980    0.04171   3.352 0.000861 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4537 on 525 degrees of freedom
Multiple R-squared:  0.2721,	Adjusted R-squared:  0.261 
F-statistic: 24.53 on 8 and 525 DF,  p-value: < 2.2e-16

> factor()
factor(0)
Levels: 
> factor(cps$sex, levels= c("male", "female")
+ )
  [1] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
 [21] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
 [41] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
 [61] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
 [81] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[101] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[121] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[141] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[161] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[181] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[201] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[221] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[241] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[261] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[281] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[301] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[321] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[341] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[361] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[381] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[401] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[421] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[441] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[461] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[481] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[501] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
[521] <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
Levels: male female
> cps <- read.csv("http://commres.net/wiki/_media/cps_85_wages.csv", header = T, sep = "\t")
> str(cps)
'data.frame':	534 obs. of  11 variables:
 $ education : int  8 9 12 12 12 13 10 12 16 12 ...
 $ south     : int  0 0 0 0 0 0 1 0 0 0 ...
 $ sex       : int  1 1 0 0 0 0 0 0 0 0 ...
 $ experience: int  21 42 1 4 17 9 27 9 11 9 ...
 $ union     : int  0 0 0 0 0 1 0 0 0 0 ...
 $ wage      : num  5.1 4.95 6.67 4 7.5 ...
 $ age       : int  35 57 19 22 35 28 43 27 33 27 ...
 $ race      : int  2 3 3 3 3 3 3 3 3 3 ...
 $ occupation: int  6 6 6 6 6 6 6 6 6 6 ...
 $ sector    : int  1 1 1 0 0 0 0 0 1 0 ...
 $ marr      : int  1 1 0 0 1 0 0 0 1 0 ...
> head(cps)
  education south sex experience union  wage age race occupation sector marr
1         8     0   1         21     0  5.10  35    2          6      1    1
2         9     0   1         42     0  4.95  57    3          6      1    1
3        12     0   0          1     0  6.67  19    3          6      1    0
4        12     0   0          4     0  4.00  22    3          6      0    0
5        12     0   0         17     0  7.50  35    3          6      0    1
6        13     0   0          9     1 13.07  28    3          6      0    0
> factor(cps$sex)
  [1] 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0
 [53] 1 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 1 1 0 0 0 1 1 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0
[105] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0
[157] 0 0 0 1 1 0 1 0 1 0 0 0 0 1 1 0 1 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 1 1 0 1 1 0 0 1 0 0 0 0 0 0 1 1 1 0 1
[209] 1 0 0 1 0 0 0 0 1 0 1 1 0 1 1 0 0 1 0 1 1 1 0 1 1 0 1 0 1 1 0 0 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 1 1 1
[261] 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 0 1 1 0 0 1 1 1 0 1 1 1 1 0 1 0 1 0 0 1 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 0
[313] 1 0 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 0 1 1 0 0 1 1 0 1 1 1 1 0 1 1 0 0 1 0 1 1
[365] 1 0 0 0 1 0 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 0 0 0 1 1 1 1 0 1 0 0 1 0 1 1 1 1 1 1 0 0 1 1 0 0 0 0 1 0 0 0
[417] 1 0 1 1 1 0 1 0 0 1 1 0 1 0 0 0 1 0 0 0 0 1 0 0 1 1 0 1 0 0 1 0 0 1 1 1 1 0 0 0 1 1 0 0 1 1 1 0 1 1 1 1
[469] 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 0 0 0 0 1 0 1 1 1
[521] 0 0 0 0 1 1 0 0 1 0 1 1 0 0
Levels: 0 1
> cps <- factor(cps$sex)
> str(cps)
 Factor w/ 2 levels "0","1": 2 2 1 1 1 1 1 1 1 1 ...
> cps <- read.csv("http://commres.net/wiki/_media/cps_85_wages.csv", header = T, sep = "\t")
> factor(cps$sex)
  [1] 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0
 [53] 1 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 1 1 0 0 0 1 1 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0
[105] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0
[157] 0 0 0 1 1 0 1 0 1 0 0 0 0 1 1 0 1 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 1 1 0 1 1 0 0 1 0 0 0 0 0 0 1 1 1 0 1
[209] 1 0 0 1 0 0 0 0 1 0 1 1 0 1 1 0 0 1 0 1 1 1 0 1 1 0 1 0 1 1 0 0 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 1 1 1
[261] 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 0 1 1 0 0 1 1 1 0 1 1 1 1 0 1 0 1 0 0 1 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 0
[313] 1 0 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 0 1 1 0 0 1 1 0 1 1 1 1 0 1 1 0 0 1 0 1 1
[365] 1 0 0 0 1 0 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 0 0 0 1 1 1 1 0 1 0 0 1 0 1 1 1 1 1 1 0 0 1 1 0 0 0 0 1 0 0 0
[417] 1 0 1 1 1 0 1 0 0 1 1 0 1 0 0 0 1 0 0 0 0 1 0 0 1 1 0 1 0 0 1 0 0 1 1 1 1 0 0 0 1 1 0 0 1 1 1 0 1 1 1 1
[469] 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 0 0 0 0 1 0 1 1 1
[521] 0 0 0 0 1 1 0 0 1 0 1 1 0 0
Levels: 0 1
> str(cps)
'data.frame':	534 obs. of  11 variables:
 $ education : int  8 9 12 12 12 13 10 12 16 12 ...
 $ south     : int  0 0 0 0 0 0 1 0 0 0 ...
 $ sex       : int  1 1 0 0 0 0 0 0 0 0 ...
 $ experience: int  21 42 1 4 17 9 27 9 11 9 ...
 $ union     : int  0 0 0 0 0 1 0 0 0 0 ...
 $ wage      : num  5.1 4.95 6.67 4 7.5 ...
 $ age       : int  35 57 19 22 35 28 43 27 33 27 ...
 $ race      : int  2 3 3 3 3 3 3 3 3 3 ...
 $ occupation: int  6 6 6 6 6 6 6 6 6 6 ...
 $ sector    : int  1 1 1 0 0 0 0 0 1 0 ...
 $ marr      : int  1 1 0 0 1 0 0 0 1 0 ...
> factor(cps$sex)
  [1] 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0
 [53] 1 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 1 0 0 0 0 0 1 1 0 0 0 1 1 0 0 0 0 1 0 0 1 0 1 0 0 0 0 0
[105] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0
[157] 0 0 0 1 1 0 1 0 1 0 0 0 0 1 1 0 1 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 1 1 0 1 1 0 0 1 0 0 0 0 0 0 1 1 1 0 1
[209] 1 0 0 1 0 0 0 0 1 0 1 1 0 1 1 0 0 1 0 1 1 1 0 1 1 0 1 0 1 1 0 0 1 1 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 1 1 1
[261] 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 0 1 1 0 0 1 1 1 0 1 1 1 1 0 1 0 1 0 0 1 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 0
[313] 1 0 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 0 1 1 0 0 1 1 0 1 1 1 1 0 1 1 0 0 1 0 1 1
[365] 1 0 0 0 1 0 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 0 0 0 1 1 1 1 0 1 0 0 1 0 1 1 1 1 1 1 0 0 1 1 0 0 0 0 1 0 0 0
[417] 1 0 1 1 1 0 1 0 0 1 1 0 1 0 0 0 1 0 0 0 0 1 0 0 1 1 0 1 0 0 1 0 0 1 1 1 1 0 0 0 1 1 0 0 1 1 1 0 1 1 1 1
[469] 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 0 0 0 0 1 0 1 1 1
[521] 0 0 0 0 1 1 0 0 1 0 1 1 0 0
Levels: 0 1
> cps$sex <- factor(cps$sex)
> cps$union <- factor(cps$union)
> cps$race <- factor(cps$race)
> cps$sector <- factor(cps$sector)
> cps$occupation <- factor(cps$occupation)
> cps$marr <- factor(cps$marr)
> str(cps)
> lm1 = lm(log(cps$wage) ~., data = cps)
> summary(lm1)

Call:
lm(formula = log(cps$wage) ~ ., data = cps)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.16246 -0.29163 -0.00469  0.29981  1.98248 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.078596   0.687514   1.569 0.117291    
education    0.179366   0.110756   1.619 0.105949    
south       -0.102360   0.042823  -2.390 0.017187 *  
sex         -0.221997   0.039907  -5.563 4.24e-08 ***
experience   0.095822   0.110799   0.865 0.387531    
union        0.200483   0.052475   3.821 0.000149 ***
age         -0.085444   0.110730  -0.772 0.440671    
race         0.050406   0.028531   1.767 0.077865 .  
occupation  -0.007417   0.013109  -0.566 0.571761    
sector       0.091458   0.038736   2.361 0.018589 *  
marr         0.076611   0.041931   1.827 0.068259 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4398 on 523 degrees of freedom
Multiple R-squared:  0.3185,	Adjusted R-squared:  0.3054 
F-statistic: 24.44 on 10 and 523 DF,  p-value: < 2.2e-16
plot(lm1)


> library(corrplot)
> cps.cor = cor(cps)
> corrplot.mixed(cps.cor, lower.col = "black")

> install.packages("mctest")
> library(mctest)
> omcdiag(cps[,c(-6)], cps$wage) # or "omcdiag(cps[,c(1:5,7:11)], cps$wage)" will work as well.

Call:
omcdiag(x = cps[, c(-6)], y = cps$wage)


Overall Multicollinearity Diagnostics

                       MC Results detection
Determinant |X'X|:         0.0001         1
Farrar Chi-Square:      4833.5751         1
Red Indicator:             0.1983         0
Sum of Lambda Inverse: 10068.8439         1
Theil's Method:            1.2263         1
Condition Number:        739.7337         1

1 --> COLLINEARITY is detected by the test 
0 --> COLLINEARITY is not detected by the test

> 
> imcdiag(cps[,c(-6)],cps$wage) 

Call:
imcdiag(x = cps[, c(-6)], y = cps$wage)


All Individual Multicollinearity Diagnostics Result

                 VIF    TOL          Wi          Fi Leamer      CVIF Klein
education   231.1956 0.0043  13402.4982  15106.5849 0.0658  236.4725     1
south         1.0468 0.9553      2.7264      3.0731 0.9774    1.0707     0
sex           1.0916 0.9161      5.3351      6.0135 0.9571    1.1165     0
experience 5184.0939 0.0002 301771.2445 340140.5368 0.0139 5302.4188     1
union         1.1209 0.8922      7.0368      7.9315 0.9445    1.1464     0
age        4645.6650 0.0002 270422.7164 304806.1391 0.0147 4751.7005     1
race          1.0371 0.9642      2.1622      2.4372 0.9819    1.0608     0
occupation    1.2982 0.7703     17.3637     19.5715 0.8777    1.3279     0
sector        1.1987 0.8343     11.5670     13.0378 0.9134    1.2260     0
marr          1.0961 0.9123      5.5969      6.3085 0.9551    1.1211     0

1 --> COLLINEARITY is detected by the test 
0 --> COLLINEARITY is not detected by the test

education , south , experience , age , race , occupation , sector , marr , coefficient(s) are non-significant may be due to multicollinearity

R-square of y on all x: 0.2805 

* use method argument to check which regressors may be the reason of collinearity
===================================
> 
> round(pcor(cps[,c(-6)], method = "pearson")$estimate,4) 
           education   south     sex experience   union     age    race occupation  sector    marr
education     1.0000 -0.0318  0.0515    -0.9976 -0.0075  0.9973  0.0172     0.0294 -0.0213 -0.0403
south        -0.0318  1.0000 -0.0302    -0.0223 -0.0975  0.0215 -0.1112     0.0084 -0.0215  0.0304
sex           0.0515 -0.0302  1.0000     0.0550 -0.1201 -0.0537  0.0200    -0.1428 -0.1121  0.0042
experience   -0.9976 -0.0223  0.0550     1.0000 -0.0102  0.9999  0.0109     0.0421 -0.0133 -0.0410
union        -0.0075 -0.0975 -0.1201    -0.0102  1.0000  0.0122 -0.1077     0.2130 -0.0135  0.0689
age           0.9973  0.0215 -0.0537     0.9999  0.0122  1.0000 -0.0108    -0.0441  0.0146  0.0451
race          0.0172 -0.1112  0.0200     0.0109 -0.1077 -0.0108  1.0000     0.0575  0.0064  0.0556
occupation    0.0294  0.0084 -0.1428     0.0421  0.2130 -0.0441  0.0575     1.0000  0.3147 -0.0186
sector       -0.0213 -0.0215 -0.1121    -0.0133 -0.0135  0.0146  0.0064     0.3147  1.0000  0.0365
marr         -0.0403  0.0304  0.0042    -0.0410  0.0689  0.0451  0.0556    -0.0186  0.0365  1.0000
> lm2 = lm(log(cps$wage) ~ . -age , data = cps)
> summary(lm2)

Call:
lm(formula = log(cps$wage) ~ . - age, data = cps)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.16044 -0.29073 -0.00505  0.29994  1.97997 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.562676   0.160116   3.514 0.000479 ***
education    0.094135   0.008188  11.497  < 2e-16 ***
south       -0.103071   0.042796  -2.408 0.016367 *  
sex         -0.220344   0.039834  -5.532 5.02e-08 ***
experience   0.010335   0.001746   5.919 5.86e-09 ***
union        0.199987   0.052450   3.813 0.000154 ***
race         0.050643   0.028519   1.776 0.076345 .  
occupation  -0.006971   0.013091  -0.532 0.594619    
sector       0.091022   0.038717   2.351 0.019094 *  
marr         0.075152   0.041872   1.795 0.073263 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4397 on 524 degrees of freedom
Multiple R-squared:  0.3177,	Adjusted R-squared:  0.306 
F-statistic: 27.11 on 9 and 524 DF,  p-value: < 2.2e-16

> summary(lm1)

Call:
lm(formula = log(cps$wage) ~ ., data = cps)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.16246 -0.29163 -0.00469  0.29981  1.98248 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.078596   0.687514   1.569 0.117291    
education    0.179366   0.110756   1.619 0.105949    
south       -0.102360   0.042823  -2.390 0.017187 *  
sex         -0.221997   0.039907  -5.563 4.24e-08 ***
experience   0.095822   0.110799   0.865 0.387531    
union        0.200483   0.052475   3.821 0.000149 ***
age         -0.085444   0.110730  -0.772 0.440671    
race         0.050406   0.028531   1.767 0.077865 .  
occupation  -0.007417   0.013109  -0.566 0.571761    
sector       0.091458   0.038736   2.361 0.018589 *  
marr         0.076611   0.041931   1.827 0.068259 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4398 on 523 degrees of freedom
Multiple R-squared:  0.3185,	Adjusted R-squared:  0.3054 
F-statistic: 24.44 on 10 and 523 DF,  p-value: < 2.2e-16

> 
> 




multicolinearity.1545759460.txt.gz · Last modified: 2018/12/26 02:37 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki