User Tools

Site Tools


text_mining_example_with_korean_songs

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
text_mining_example_with_korean_songs [2017/11/13 15:49] – [Twitter] hkimsciltext_mining_example_with_korean_songs [2017/12/14 10:12] (current) – [Twitter] hkimscil
Line 9: Line 9:
 library(wordcloud) library(wordcloud)
 library(XLConnect) library(XLConnect)
 +library(twitteR)
  
 # set your data dir in which the save file is located. # set your data dir in which the save file is located.
Line 85: Line 86:
 hkimscil # I don't twitt much . . . . hkimscil # I don't twitt much . . . .
 jaemyung_lee # He does a lot! jaemyung_lee # He does a lot!
 +
 +see https://dev.twitter.com/apps/new
 +
 +
  
 <code>#get your own one  <code>#get your own one 
Line 111: Line 116:
  
 df <- do.call("rbind", lapply(rdmTweets, as.data.frame)) df <- do.call("rbind", lapply(rdmTweets, as.data.frame))
-#dim(df)+dim(df)
  
 library(tm) library(tm)
Line 123: Line 128:
 # remove numbers # remove numbers
 myCorpus <- tm_map(myCorpus, removeNumbers) myCorpus <- tm_map(myCorpus, removeNumbers)
-myCorpus <- tm_map(myCorpus, PlainTextDocument)+# remove stopwords 
 +# keep "r" by removing it from stopwords 
 +myStopwords <- c(stopwords('english'), "available", "via"
 +idx <- which(myStopwords == "r"
 +myStopwords <- myStopwords[-idx] 
 +myCorpus <- tm_map(myCorpus, removeWords, myStopwords)
  
 # remove stopwords # remove stopwords
Line 131: Line 141:
 myStopwords <- myStopwords[-idx] myStopwords <- myStopwords[-idx]
 myCorpus <- tm_map(myCorpus, removeWords, myStopwords) myCorpus <- tm_map(myCorpus, removeWords, myStopwords)
 +
  
 dictCorpus <- myCorpus dictCorpus <- myCorpus
Line 142: Line 153:
 myCorpus <- tm_map(myCorpus, stemCompletion, dictionary=dictCorpus) myCorpus <- tm_map(myCorpus, stemCompletion, dictionary=dictCorpus)
 inspect(myCorpus[1:3]) inspect(myCorpus[1:3])
- 
-myDtm <- TermDocumentMatrix(myCorpus, control = list(minWordLength = 1)) 
  
 myDtm <- TermDocumentMatrix(myCorpus, control = list(minWordLength = 1)) myDtm <- TermDocumentMatrix(myCorpus, control = list(minWordLength = 1))
text_mining_example_with_korean_songs.1510557594.txt.gz · Last modified: 2017/11/13 15:49 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki