r:neural_network
This is an old revision of the document!
Table of Contents
Neural Network
> install.packages("nnet")
> library(nnet)
> m <- nnet(Species ~ ., data=iris, size=3)
# weights: 27
initial value 191.494035
iter 10 value 65.618496
iter 20 value 40.493306
iter 30 value 8.542349
iter 40 value 6.034377
iter 50 value 6.000246
iter 60 value 5.998411
iter 70 value 5.983894
iter 80 value 5.972932
iter 90 value 5.968740
iter 100 value 5.965371
final value 5.965371
stopped after 100 iterations
> round(predict(m, newdata=iris),2)
setosa versicolor virginica
1 1 0.00 0.00
2 1 0.00 0.00
3 1 0.00 0.00
4 1 0.00 0.00
5 1 0.00 0.00
6 1 0.00 0.00
7 1 0.00 0.00
8 1 0.00 0.00
9 1 0.00 0.00
10 1 0.00 0.00
11 1 0.00 0.00
12 1 0.00 0.00
13 1 0.00 0.00
14 1 0.00 0.00
15 1 0.00 0.00
16 1 0.00 0.00
17 1 0.00 0.00
18 1 0.00 0.00
19 1 0.00 0.00
20 1 0.00 0.00
21 1 0.00 0.00
22 1 0.00 0.00
23 1 0.00 0.00
24 1 0.00 0.00
25 1 0.00 0.00
26 1 0.00 0.00
27 1 0.00 0.00
28 1 0.00 0.00
29 1 0.00 0.00
30 1 0.00 0.00
31 1 0.00 0.00
32 1 0.00 0.00
33 1 0.00 0.00
34 1 0.00 0.00
35 1 0.00 0.00
36 1 0.00 0.00
37 1 0.00 0.00
38 1 0.00 0.00
39 1 0.00 0.00
40 1 0.00 0.00
41 1 0.00 0.00
42 1 0.00 0.00
43 1 0.00 0.00
44 1 0.00 0.00
45 1 0.00 0.00
46 1 0.00 0.00
47 1 0.00 0.00
48 1 0.00 0.00
49 1 0.00 0.00
50 1 0.00 0.00
51 0 1.00 0.00
52 0 1.00 0.00
53 0 1.00 0.00
54 0 1.00 0.00
55 0 1.00 0.00
56 0 1.00 0.00
57 0 1.00 0.00
58 0 1.00 0.00
59 0 1.00 0.00
60 0 1.00 0.00
61 0 1.00 0.00
62 0 1.00 0.00
63 0 1.00 0.00
64 0 1.00 0.00
65 0 1.00 0.00
66 0 1.00 0.00
67 0 1.00 0.00
68 0 1.00 0.00
69 0 0.95 0.05
70 0 1.00 0.00
71 0 0.59 0.41
72 0 1.00 0.00
73 0 0.77 0.23
74 0 1.00 0.00
75 0 1.00 0.00
76 0 1.00 0.00
77 0 1.00 0.00
78 0 0.73 0.27
79 0 1.00 0.00
80 0 1.00 0.00
81 0 1.00 0.00
82 0 1.00 0.00
83 0 1.00 0.00
84 0 0.12 0.88
85 0 1.00 0.00
86 0 1.00 0.00
87 0 1.00 0.00
88 0 1.00 0.00
89 0 1.00 0.00
90 0 1.00 0.00
91 0 1.00 0.00
92 0 1.00 0.00
93 0 1.00 0.00
94 0 1.00 0.00
95 0 1.00 0.00
96 0 1.00 0.00
97 0 1.00 0.00
98 0 1.00 0.00
99 0 1.00 0.00
100 0 1.00 0.00
101 0 0.00 1.00
102 0 0.00 1.00
103 0 0.00 1.00
104 0 0.00 1.00
105 0 0.00 1.00
106 0 0.00 1.00
107 0 0.11 0.89
108 0 0.00 1.00
109 0 0.00 1.00
110 0 0.00 1.00
111 0 0.01 0.99
112 0 0.00 1.00
113 0 0.00 1.00
114 0 0.00 1.00
115 0 0.00 1.00
116 0 0.00 1.00
117 0 0.00 1.00
118 0 0.00 1.00
119 0 0.00 1.00
120 0 0.08 0.92
121 0 0.00 1.00
122 0 0.00 1.00
123 0 0.00 1.00
124 0 0.06 0.94
125 0 0.00 1.00
126 0 0.00 1.00
127 0 0.19 0.81
128 0 0.20 0.80
129 0 0.00 1.00
130 0 0.03 0.97
131 0 0.00 1.00
132 0 0.00 1.00
133 0 0.00 1.00
134 0 0.77 0.23
135 0 0.03 0.97
136 0 0.00 1.00
137 0 0.00 1.00
138 0 0.00 1.00
139 0 0.33 0.67
140 0 0.00 1.00
141 0 0.00 1.00
142 0 0.00 1.00
143 0 0.00 1.00
144 0 0.00 1.00
145 0 0.00 1.00
146 0 0.00 1.00
147 0 0.00 1.00
148 0 0.00 1.00
149 0 0.00 1.00
150 0 0.02 0.98
> predict(m, newdata=iris, type="class")
[1] "setosa" "setosa" "setosa" "setosa"
[5] "setosa" "setosa" "setosa" "setosa"
[9] "setosa" "setosa" "setosa" "setosa"
[13] "setosa" "setosa" "setosa" "setosa"
[17] "setosa" "setosa" "setosa" "setosa"
[21] "setosa" "setosa" "setosa" "setosa"
[25] "setosa" "setosa" "setosa" "setosa"
[29] "setosa" "setosa" "setosa" "setosa"
[33] "setosa" "setosa" "setosa" "setosa"
[37] "setosa" "setosa" "setosa" "setosa"
[41] "setosa" "setosa" "setosa" "setosa"
[45] "setosa" "setosa" "setosa" "setosa"
[49] "setosa" "setosa" "versicolor" "versicolor"
[53] "versicolor" "versicolor" "versicolor" "versicolor"
[57] "versicolor" "versicolor" "versicolor" "versicolor"
[61] "versicolor" "versicolor" "versicolor" "versicolor"
[65] "versicolor" "versicolor" "versicolor" "versicolor"
[69] "versicolor" "versicolor" "versicolor" "versicolor"
[73] "versicolor" "versicolor" "versicolor" "versicolor"
[77] "versicolor" "versicolor" "versicolor" "versicolor"
[81] "versicolor" "versicolor" "versicolor" "virginica"
[85] "versicolor" "versicolor" "versicolor" "versicolor"
[89] "versicolor" "versicolor" "versicolor" "versicolor"
[93] "versicolor" "versicolor" "versicolor" "versicolor"
[97] "versicolor" "versicolor" "versicolor" "versicolor"
[101] "virginica" "virginica" "virginica" "virginica"
[105] "virginica" "virginica" "virginica" "virginica"
[109] "virginica" "virginica" "virginica" "virginica"
[113] "virginica" "virginica" "virginica" "virginica"
[117] "virginica" "virginica" "virginica" "virginica"
[121] "virginica" "virginica" "virginica" "virginica"
[125] "virginica" "virginica" "virginica" "virginica"
[129] "virginica" "virginica" "virginica" "virginica"
[133] "virginica" "versicolor" "virginica" "virginica"
[137] "virginica" "virginica" "virginica" "virginica"
[141] "virginica" "virginica" "virginica" "virginica"
[145] "virginica" "virginica" "virginica" "virginica"
[149] "virginica" "virginica"
>
E.G. 2
wine.csv
for the description of the data: https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.names
wine <- read.csv("wine.csv")
head(wine)
summary(wine)
wine.scale <- cbind(wine[1], scale(wine[-1]))
summary(wine.scale)
apply(wine.scale[-1], 2, sd)
# Partitioning the data into training and test data
data.size <- nrow(wine.scale)
set.seed(1111)
samp <- c(sample(1:data.size, data.size * 0.7))
data.tr <- wine.scale[samp, ]
data.test <- wine.scale[-samp, ]
summary(data.tr)
summary(data.test)
# Fitting the neural network for the training data
library(nnet)
model.nnet <- nnet(Type ~ ., data = data.tr, size = 2, decay = 5e-04, maxit = 200)
names(model.nnet)
# Creating the confusion matrix for the model
predicted <- predict(model.nnet, data.test, type = "class")
predicted
actual <- data.test$Type
model.confusion.matrix <- table(actual, predicted)
model.confusion.matrix
confusion.matrix.rate = prop.table(model.confusion.matrix) * 100
round(confusion.matrix.rate, digit = 2)
diag.index <- cbind(1:3, 1:3)
error.overall = sum(confusion.matrix.rate) - sum(confusion.matrix.rate[diag.index])
paste("오차율 =", round(error.overall, digit = 2), "%")
> wine <- read.csv("wine.csv")
> head(wine)
Type Alcohol Malic Ash Alcalinity Magnesium Phenols Flavanoids
1 t1 14.23 1.71 2.43 15.6 127 2.80 3.06
2 t1 13.20 1.78 2.14 11.2 100 2.65 2.76
3 t1 13.16 2.36 2.67 18.6 101 2.80 3.24
4 t1 14.37 1.95 2.50 16.8 113 3.85 3.49
5 t1 13.24 2.59 2.87 21.0 118 2.80 2.69
6 t1 14.20 1.76 2.45 15.2 112 3.27 3.39
Nonflavanoids Proanthocyanins Color Hue Dilution Proline
1 0.28 2.29 5.64 1.04 3.92 1065
2 0.26 1.28 4.38 1.05 3.40 1050
3 0.30 2.81 5.68 1.03 3.17 1185
4 0.24 2.18 7.80 0.86 3.45 1480
5 0.39 1.82 4.32 1.04 2.93 735
6 0.34 1.97 6.75 1.05 2.85 1450
> summary(wine)
Type Alcohol Malic Ash
t1:59 Min. :11.03 Min. :0.740 Min. :1.360
t2:71 1st Qu.:12.36 1st Qu.:1.603 1st Qu.:2.210
t3:48 Median :13.05 Median :1.865 Median :2.360
Mean :13.00 Mean :2.336 Mean :2.367
3rd Qu.:13.68 3rd Qu.:3.083 3rd Qu.:2.558
Max. :14.83 Max. :5.800 Max. :3.230
Alcalinity Magnesium Phenols Flavanoids
Min. :10.60 Min. : 70.00 Min. :0.980 Min. :0.340
1st Qu.:17.20 1st Qu.: 88.00 1st Qu.:1.742 1st Qu.:1.205
Median :19.50 Median : 98.00 Median :2.355 Median :2.135
Mean :19.49 Mean : 99.74 Mean :2.295 Mean :2.029
3rd Qu.:21.50 3rd Qu.:107.00 3rd Qu.:2.800 3rd Qu.:2.875
Max. :30.00 Max. :162.00 Max. :3.880 Max. :5.080
Nonflavanoids Proanthocyanins Color
Min. :0.1300 Min. :0.410 Min. : 1.280
1st Qu.:0.2700 1st Qu.:1.250 1st Qu.: 3.220
Median :0.3400 Median :1.555 Median : 4.690
Mean :0.3619 Mean :1.591 Mean : 5.058
3rd Qu.:0.4375 3rd Qu.:1.950 3rd Qu.: 6.200
Max. :0.6600 Max. :3.580 Max. :13.000
Hue Dilution Proline
Min. :0.4800 Min. :1.270 Min. : 278.0
1st Qu.:0.7825 1st Qu.:1.938 1st Qu.: 500.5
Median :0.9650 Median :2.780 Median : 673.5
Mean :0.9574 Mean :2.612 Mean : 746.9
3rd Qu.:1.1200 3rd Qu.:3.170 3rd Qu.: 985.0
Max. :1.7100 Max. :4.000 Max. :1680.0
>
> wine.scale <- cbind(wine[1], scale(wine[-1]))
> summary(wine.scale)
Type Alcohol Malic Ash
t1:59 Min. :-2.42739 Min. :-1.4290 Min. :-3.66881
t2:71 1st Qu.:-0.78603 1st Qu.:-0.6569 1st Qu.:-0.57051
t3:48 Median : 0.06083 Median :-0.4219 Median :-0.02375
Mean : 0.00000 Mean : 0.0000 Mean : 0.00000
3rd Qu.: 0.83378 3rd Qu.: 0.6679 3rd Qu.: 0.69615
Max. : 2.25341 Max. : 3.1004 Max. : 3.14745
Alcalinity Magnesium Phenols
Min. :-2.663505 Min. :-2.0824 Min. :-2.10132
1st Qu.:-0.687199 1st Qu.:-0.8221 1st Qu.:-0.88298
Median : 0.001514 Median :-0.1219 Median : 0.09569
Mean : 0.000000 Mean : 0.0000 Mean : 0.00000
3rd Qu.: 0.600395 3rd Qu.: 0.5082 3rd Qu.: 0.80672
Max. : 3.145637 Max. : 4.3591 Max. : 2.53237
Flavanoids Nonflavanoids Proanthocyanins
Min. :-1.6912 Min. :-1.8630 Min. :-2.06321
1st Qu.:-0.8252 1st Qu.:-0.7381 1st Qu.:-0.59560
Median : 0.1059 Median :-0.1756 Median :-0.06272
Mean : 0.0000 Mean : 0.0000 Mean : 0.00000
3rd Qu.: 0.8467 3rd Qu.: 0.6078 3rd Qu.: 0.62741
Max. : 3.0542 Max. : 2.3956 Max. : 3.47527
Color Hue Dilution
Min. :-1.6297 Min. :-2.08884 Min. :-1.8897
1st Qu.:-0.7929 1st Qu.:-0.76540 1st Qu.:-0.9496
Median :-0.1588 Median : 0.03303 Median : 0.2371
Mean : 0.0000 Mean : 0.00000 Mean : 0.0000
3rd Qu.: 0.4926 3rd Qu.: 0.71116 3rd Qu.: 0.7864
Max. : 3.4258 Max. : 3.29241 Max. : 1.9554
Proline
Min. :-1.4890
1st Qu.:-0.7824
Median :-0.2331
Mean : 0.0000
3rd Qu.: 0.7561
Max. : 2.9631
> apply(wine.scale[-1], 2, sd)
Alcohol Malic Ash Alcalinity
1 1 1 1
Magnesium Phenols Flavanoids Nonflavanoids
1 1 1 1
Proanthocyanins Color Hue Dilution
1 1 1 1
Proline
1
>
> data.size <- nrow(wine.scale)
> set.seed(1111)
> samp <- c(sample(1:data.size, data.size * 0.7))
> data.tr <- wine.scale[samp, ]
> data.test <- wine.scale[-samp, ]
> summary(data.tr)
Type Alcohol Malic Ash
t1:41 Min. :-1.897718 Min. :-1.2947 Min. :-3.66881
t2:50 1st Qu.:-0.816822 1st Qu.:-0.6815 1st Qu.:-0.57051
t3:33 Median : 0.023875 Median :-0.4712 Median :-0.02375
Mean : 0.004802 Mean :-0.0207 Mean :-0.01053
3rd Qu.: 0.778346 3rd Qu.: 0.6992 3rd Qu.: 0.56857
Max. : 2.253415 Max. : 2.9662 Max. : 3.14745
Alcalinity Magnesium Phenols
Min. :-2.66350 Min. :-2.08238 Min. :-2.10132
1st Qu.:-0.74709 1st Qu.:-0.62955 1st Qu.:-0.79110
Median :-0.14821 Median :-0.08693 Median : 0.16759
Mean :-0.04002 Mean : 0.08867 Mean : 0.04969
3rd Qu.: 0.60039 3rd Qu.: 0.71825 3rd Qu.: 0.80672
Max. : 3.14564 Max. : 4.35908 Max. : 2.53237
Flavanoids Nonflavanoids Proanthocyanins
Min. :-1.69120 Min. :-1.86298 Min. :-2.06321
1st Qu.:-0.79267 1st Qu.:-0.81841 1st Qu.:-0.59560
Median : 0.13589 Median :-0.17560 Median : 0.05958
Mean : 0.02124 Mean :-0.05118 Mean : 0.08015
3rd Qu.: 0.88424 3rd Qu.: 0.54756 3rd Qu.: 0.66672
Max. : 3.05422 Max. : 2.15459 Max. : 3.47527
Color Hue Dilution
Min. :-1.62969 Min. :-2.08884 Min. :-1.88972
1st Qu.:-0.82737 1st Qu.:-0.69978 1st Qu.:-0.96365
Median :-0.17603 Median : 0.05491 Median : 0.28636
Mean : 0.00959 Mean :-0.02603 Mean : 0.01557
3rd Qu.: 0.51953 3rd Qu.: 0.59085 3rd Qu.: 0.80398
Max. : 3.42577 Max. : 2.15491 Max. : 1.95540
Proline
Min. :-1.38102
1st Qu.:-0.77926
Median :-0.21560
Mean : 0.01084
3rd Qu.: 0.72039
Max. : 2.96311
> summary(data.test)
Type Alcohol Malic Ash
t1:18 Min. :-2.42739 Min. :-1.42895 Min. :-2.42949
t2:21 1st Qu.:-0.76139 1st Qu.:-0.62109 1st Qu.:-0.58874
t3:15 Median : 0.06083 Median :-0.21156 Median :-0.18778
Mean :-0.01103 Mean : 0.04753 Mean : 0.02417
3rd Qu.: 0.90768 3rd Qu.: 0.61196 3rd Qu.: 0.92396
Max. : 1.69911 Max. : 3.10045 Max. : 2.01748
Alcalinity Magnesium Phenols
Min. :-2.4838 Min. :-1.5223 Min. :-1.6220
1st Qu.:-0.5974 1st Qu.:-0.8221 1st Qu.:-1.0188
Median : 0.1512 Median :-0.4370 Median :-0.1839
Mean : 0.0919 Mean :-0.2036 Mean :-0.1141
3rd Qu.: 0.6004 3rd Qu.: 0.1581 3rd Qu.: 0.7468
Max. : 2.6965 Max. : 2.3986 Max. : 1.6056
Flavanoids Nonflavanoids Proanthocyanins
Min. :-1.5610513 Min. :-1.7826 Min. :-1.6614
1st Qu.:-0.9828915 1st Qu.:-0.7381 1st Qu.:-0.7834
Median : 0.0007312 Median :-0.2158 Median :-0.2724
Mean :-0.0487698 Mean : 0.1175 Mean :-0.1841
3rd Qu.: 0.7315653 3rd Qu.: 1.0498 3rd Qu.: 0.3610
Max. : 1.6125708 Max. : 2.3956 Max. : 2.3920
Color Hue Dilution
Min. :-1.36225 Min. :-1.82634 Min. :-1.84747
1st Qu.:-0.74973 1st Qu.:-0.88572 1st Qu.:-0.80872
Median :-0.13721 Median : 0.01116 Median : 0.18073
Mean :-0.02202 Mean : 0.05977 Mean :-0.03576
3rd Qu.: 0.42247 3rd Qu.: 0.82053 3rd Qu.: 0.75820
Max. : 2.24386 Max. : 3.29241 Max. : 1.33567
Proline
Min. :-1.48899
1st Qu.:-0.77608
Median :-0.38708
Mean :-0.02489
3rd Qu.: 0.91092
Max. : 2.54077
> library(nnet) > model.nnet <- nnet(Type ~ ., data = data.tr, size = 2, decay = 5e-04, maxit = 200) # weights: 37 initial value 164.152084 iter 10 value 7.066118 iter 20 value 1.286566 iter 30 value 0.542334 iter 40 value 0.444873 iter 50 value 0.365821 iter 60 value 0.335803 iter 70 value 0.311584 iter 80 value 0.298015 iter 90 value 0.288199 iter 100 value 0.281759 iter 110 value 0.276935 iter 120 value 0.270048 iter 130 value 0.261452 iter 140 value 0.258495 iter 150 value 0.257073 iter 160 value 0.256035 iter 170 value 0.255792 iter 180 value 0.255743 iter 190 value 0.255725 iter 200 value 0.255714 final value 0.255714 stopped after 200 iterations > > names(model.nnet) [1] "n" "nunits" "nconn" [4] "conn" "nsunits" "decay" [7] "entropy" "softmax" "censored" [10] "value" "wts" "convergence" [13] "fitted.values" "residuals" "lev" [16] "call" "terms" "coefnames" [19] "xlevels" >
> # Creating the confusion matrix for the model > predicted <- predict(model.nnet, data.test, type = "class") > predicted [1] "t1" "t1" "t1" "t1" "t1" "t1" "t1" "t1" "t1" "t1" "t1" "t1" [13] "t1" "t1" "t1" "t1" "t1" "t1" "t2" "t2" "t2" "t2" "t2" "t2" [25] "t2" "t2" "t2" "t2" "t2" "t2" "t2" "t2" "t2" "t2" "t2" "t2" [37] "t2" "t2" "t2" "t3" "t2" "t3" "t3" "t3" "t3" "t3" "t3" "t3" [49] "t3" "t3" "t3" "t3" "t3" "t3" >
> actual <- data.test$Type
> model.confusion.matrix <- table(actual, predicted)
> model.confusion.matrix
predicted
actual t1 t2 t3
t1 18 0 0
t2 0 21 0
t3 0 1 14
> confusion.matrix.rate = prop.table(model.confusion.matrix) * 100
> round(confusion.matrix.rate, digit = 2)
predicted
actual t1 t2 t3
t1 33.33 0.00 0.00
t2 0.00 38.89 0.00
t3 0.00 1.85 25.93
> diag.index <- cbind(1:3, 1:3)
>
> error.overall = sum(confusion.matrix.rate) - sum(confusion.matrix.rate[diag.index])
> paste("오차율 =", round(error.overall, digit = 2), "%")
[1] "오차율 = 1.85 %"
E.G. 3
The dataset contains information on different clients who received a loan at least 10 years ago. The variables income (yearly), age, loan (size in euros) and LTI (the loan to yearly income ratio) are available. Our goal is to devise a model which predicts, based on the input variables LTI and age, whether or not a default will occur within 10 years.
set.seed(1234567890)
library("neuralnet")
dataset <- read.csv("creditset.csv")
head(dataset)
# extract a set to train the NN
trainset <- dataset[1:800, ]
# select the test set
testset <- dataset[801:2000, ]
## build the neural network (NN)
creditnet <- neuralnet(default10yr ~ LTI + age, trainset, hidden = 4, lifesign = "minimal",
linear.output = FALSE, threshold = 0.1)
## plot the NN
plot(creditnet, rep = "best")
test the resulting output
temp_test <- subset(testset, select = c("LTI", "age"))
creditnet.results <- compute(creditnet, temp_test)
head(temp_test)
results <- data.frame(actual = testset$default10yr, prediction = creditnet.results$net.result)
results[100:115, ]
results$prediction <- round(results$prediction)
results[100:115, ]
r/neural_network.1481674139.txt.gz · Last modified: by hkimscil
