r:document_classification
This is an old revision of the document!
Docu Classification
> c <- crude
> c <- tm_map(c, content_transformer(tolower))
> c <- tm_map(c, content_transformer(removeNumbers))
> c <- tm_map(c, content_transformer(removeNumbers))
> x <- TermDocumentMatrix(c)
> inspect(x[1:10, 1:10])
<<TermDocumentMatrix (terms: 10, documents: 10)>>
Non-/sparse entries: 7/93
Sparsity : 93%
Maximal term length: 10
Weighting : term frequency (tf)
Docs
Terms 127 144 191 194 211 236 237 242 246 248
-/, 0 0 0 0 0 0 0 4 0 0
-foot 0 0 0 0 0 0 0 0 0 0
-hour 0 0 0 0 0 0 0 0 0 0
-member 0 0 0 0 0 0 0 1 0 0
-nation 0 0 0 0 0 1 0 0 0 0
"(it) 0 0 0 0 0 0 1 0 0 0
"demand 0 1 0 0 0 0 0 0 0 0
"expansion 0 0 0 0 0 0 0 0 0 0
"for 0 0 0 0 0 0 1 0 0 0
"growth 0 0 0 0 0 0 1 0 0 0
> c <- tm_map(c, content_transformer(removePunctuation))
> x <- TermDocumentMatrix(c)
> inspect(x[1:10, 1:10])
<<TermDocumentMatrix (terms: 10, documents: 10)>>
Non-/sparse entries: 13/87
Sparsity : 87%
Maximal term length: 9
Weighting : term frequency (tf)
Docs
Terms 127 144 191 194 211 236 237 242 246 248
abdulaziz 0 0 0 0 0 0 0 0 5 0
ability 0 2 0 0 0 3 0 0 0 0
able 0 0 0 0 0 0 0 0 0 0
about 0 1 0 0 1 0 1 0 2 2
above 0 2 0 0 0 3 0 0 0 2
abroad 0 0 0 0 0 1 0 0 0 0
accept 0 0 0 0 0 0 0 0 0 0
accord 0 0 0 0 0 0 0 0 0 5
according 0 0 0 0 0 0 0 0 0 0
across 0 0 0 0 0 0 0 0 0 0
> c <- crude
> c <- tm_map(c, content_transformer(tolower))
> c <- tm_map(c, content_transformer(removePunctuation))
> c <- tm_map(c, content_transformer(removeNumbers))
> x <- TermDocumentMatrix(c)
> inspect(x[1:10, 1:10])
<<TermDocumentMatrix (terms: 10, documents: 10)>>
Non-/sparse entries: 13/87
Sparsity : 87%
Maximal term length: 9
Weighting : term frequency (tf)
Docs
Terms 127 144 191 194 211 236 237 242 246 248
abdulaziz 0 0 0 0 0 0 0 0 5 0
ability 0 2 0 0 0 3 0 0 0 0
able 0 0 0 0 0 0 0 0 0 0
about 0 1 0 0 1 0 1 0 2 2
above 0 2 0 0 0 3 0 0 0 2
abroad 0 0 0 0 0 1 0 0 0 0
accept 0 0 0 0 0 0 0 0 0 0
accord 0 0 0 0 0 0 0 0 0 5
according 0 0 0 0 0 0 0 0 0 0
across 0 0 0 0 0 0 0 0 0 0
>
> y <- TermDocumentMatrix(x, control=list(weighting=weightTfIdf))
Error in UseMethod("TermDocumentMatrix", x) :
no applicable method for 'TermDocumentMatrix' applied to an object of class "c('TermDocumentMatrix', 'simple_triplet_matrix')"
> y <- TermDocumentMatrix(c, control=list(weighting=weightTfIdf))
> inspect(y[1:10, 1:10])
<<TermDocumentMatrix (terms: 10, documents: 10)>>
Non-/sparse entries: 13/87
Sparsity : 87%
Maximal term length: 9
Weighting : term frequency - inverse document frequency (normalized) (tf-idf)
Docs
Terms 127 144 191 194 211 236
abdulaziz 0 0.000000000 0 0 0.0000000 0.00000000
ability 0 0.015079700 0 0 0.0000000 0.02268204
able 0 0.000000000 0 0 0.0000000 0.00000000
about 0 0.003641675 0 0 0.0181086 0.00000000
above 0 0.012792992 0 0 0.0000000 0.01924250
abroad 0 0.000000000 0 0 0.0000000 0.01193903
accept 0 0.000000000 0 0 0.0000000 0.00000000
accord 0 0.000000000 0 0 0.0000000 0.00000000
according 0 0.000000000 0 0 0.0000000 0.00000000
across 0 0.000000000 0 0 0.0000000 0.00000000
Docs
Terms 237 242 246 248
abdulaziz 0.00000000 0 0.08575254 0.000000000
ability 0.00000000 0 0.00000000 0.000000000
able 0.00000000 0 0.00000000 0.000000000
about 0.00378776 0 0.01049149 0.009408741
above 0.00000000 0 0.00000000 0.016526179
abroad 0.00000000 0 0.00000000 0.000000000
accept 0.00000000 0 0.00000000 0.000000000
accord 0.00000000 0 0.00000000 0.048700455
according 0.00000000 0 0.00000000 0.000000000
across 0.00000000 0 0.00000000 0.000000000
> freqTerms <- findFreqTerms(x, lowfreq=10)
> freqTerms
[1] "about" "and" "are" "barrel"
[5] "barrels" "bpd" "but" "crude"
[9] "dlrs" "for" "from" "government"
[13] "has" "industry" "its" "kuwait"
[17] "last" "market" "meeting" "minister"
[21] "mln" "new" "not" "official"
[25] "oil" "one" "opec" "pct"
[29] "price" "prices" "production" "reuter"
[33] "said" "saudi" "sheikh" "that"
[37] "the" "they" "this" "was"
[41] "were" "will" "with" "world"
[45] "would"
> findAssocs(x, "oil", 0.7)
$oil
opec named clearly late prices trying
0.87 0.81 0.79 0.79 0.79 0.79
who winter markets said analysts agreement
0.79 0.79 0.78 0.78 0.77 0.76
emergency that above they buyers fixed
0.74 0.74 0.73 0.73 0.71 0.71
through
0.70
>
r/document_classification.1481678347.txt.gz · Last modified: by hkimscil
