text_mining_example_with_korean_songs
                Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| text_mining_example_with_korean_songs [2016/12/07 10:19] – hkimscil | text_mining_example_with_korean_songs [2017/12/14 10:12] (current) – [Twitter] hkimscil | ||
|---|---|---|---|
| Line 9: | Line 9: | ||
| library(wordcloud) | library(wordcloud) | ||
| library(XLConnect) | library(XLConnect) | ||
| + | library(twitteR) | ||
| # set your data dir in which the save file is located. | # set your data dir in which the save file is located. | ||
| Line 85: | Line 86: | ||
| hkimscil # I don't twitt much . . . . | hkimscil # I don't twitt much . . . . | ||
| jaemyung_lee # He does a lot! | jaemyung_lee # He does a lot! | ||
| + | |||
| + | see https:// | ||
| + | |||
| + | |||
| < | < | ||
| Line 96: | Line 101: | ||
| Goto https:// | Goto https:// | ||
| + | < | ||
| + | api_key <- " | ||
| + | api_secret <- " | ||
| + | access_token <- " | ||
| + | access_secret <- " | ||
| + | setup_twitter_oauth(api_key, | ||
| + | library(twitteR) | ||
| + | # retrieve the first 100 tweets (or all tweets if fewer than 100) | ||
| + | # from the user timeline of @rdatammining | ||
| + | rdmTweets <- userTimeline(" | ||
| + | n <- length(rdmTweets) | ||
| + | rdmTweets[1: | ||
| + | df <- do.call(" | ||
| + | dim(df) | ||
| + | |||
| + | library(tm) | ||
| + | # build a corpus, which is a collection of text documents | ||
| + | # VectorSource specifies that the source is character vectors. | ||
| + | myCorpus <- Corpus(VectorSource(df$text)) | ||
| + | |||
| + | myCorpus <- tm_map(myCorpus, | ||
| + | # remove punctuation | ||
| + | myCorpus <- tm_map(myCorpus, | ||
| + | # remove numbers | ||
| + | myCorpus <- tm_map(myCorpus, | ||
| + | # remove stopwords | ||
| + | # keep " | ||
| + | myStopwords <- c(stopwords(' | ||
| + | idx <- which(myStopwords == " | ||
| + | myStopwords <- myStopwords[-idx] | ||
| + | myCorpus <- tm_map(myCorpus, | ||
| + | |||
| + | # remove stopwords | ||
| + | # keep " | ||
| + | myStopwords <- c(stopwords(' | ||
| + | idx <- which(myStopwords == " | ||
| + | myStopwords <- myStopwords[-idx] | ||
| + | myCorpus <- tm_map(myCorpus, | ||
| + | |||
| + | |||
| + | dictCorpus <- myCorpus | ||
| + | # stem words in a text document with the snowball stemmers, | ||
| + | # which requires packages Snowball, RWeka, rJava, RWekajars | ||
| + | myCorpus <- tm_map(myCorpus, | ||
| + | # inspect the first three ``documents" | ||
| + | inspect(myCorpus[1: | ||
| + | |||
| + | # stem completion | ||
| + | myCorpus <- tm_map(myCorpus, | ||
| + | inspect(myCorpus[1: | ||
| + | |||
| + | myDtm <- TermDocumentMatrix(myCorpus, | ||
| + | </ | ||
text_mining_example_with_korean_songs.1481075351.txt.gz · Last modified:  by hkimscil
                
                