text_mining_example_with_korean_songs
Differences
This shows you the differences between two versions of the page.
Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
text_mining_example_with_korean_songs [2016/12/07 10:43] – [Twitter] hkimscil | text_mining_example_with_korean_songs [2017/12/14 10:12] (current) – [Twitter] hkimscil | ||
---|---|---|---|
Line 9: | Line 9: | ||
library(wordcloud) | library(wordcloud) | ||
library(XLConnect) | library(XLConnect) | ||
+ | library(twitteR) | ||
# set your data dir in which the save file is located. | # set your data dir in which the save file is located. | ||
Line 85: | Line 86: | ||
hkimscil # I don't twitt much . . . . | hkimscil # I don't twitt much . . . . | ||
jaemyung_lee # He does a lot! | jaemyung_lee # He does a lot! | ||
+ | |||
+ | see https:// | ||
+ | |||
+ | |||
< | < | ||
Line 111: | Line 116: | ||
df <- do.call(" | df <- do.call(" | ||
- | #dim(df) | + | dim(df) |
library(tm) | library(tm) | ||
Line 123: | Line 128: | ||
# remove numbers | # remove numbers | ||
myCorpus <- tm_map(myCorpus, | myCorpus <- tm_map(myCorpus, | ||
- | myCorpus <- tm_map(myCorpus, | + | # remove stopwords |
+ | # keep " | ||
+ | myStopwords <- c(stopwords(' | ||
+ | idx <- which(myStopwords == " | ||
+ | myStopwords <- myStopwords[-idx] | ||
+ | myCorpus <- tm_map(myCorpus, | ||
# remove stopwords | # remove stopwords | ||
Line 131: | Line 141: | ||
myStopwords <- myStopwords[-idx] | myStopwords <- myStopwords[-idx] | ||
myCorpus <- tm_map(myCorpus, | myCorpus <- tm_map(myCorpus, | ||
+ | |||
dictCorpus <- myCorpus | dictCorpus <- myCorpus | ||
Line 143: | Line 154: | ||
inspect(myCorpus[1: | inspect(myCorpus[1: | ||
- | myDtm <- TermDocumentMatrix(myCorpus, | + | myDtm <- TermDocumentMatrix(myCorpus, |
- | + | </ | |
text_mining_example_with_korean_songs.1481076833.txt.gz · Last modified: 2016/12/07 10:43 by hkimscil