User Tools

Site Tools


text_mining_example_with_korean_songs

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
text_mining_example_with_korean_songs [2016/12/07 07:49] hkimsciltext_mining_example_with_korean_songs [2017/12/14 10:12] (current) – [Twitter] hkimscil
Line 1: Line 1:
 +====== Lylics in Music ======
 +{{:mm.xlsx}}
 +
 <code>library(bitops) <code>library(bitops)
 library(RCurl) library(RCurl)
Line 6: Line 9:
 library(wordcloud) library(wordcloud)
 library(XLConnect) library(XLConnect)
 +library(twitteR)
  
- +# set your data dir in which the save file is located.
 setwd ("D:/Users/Hyo/Clouds/Cs-Ds/CS/MusicStudy") setwd ("D:/Users/Hyo/Clouds/Cs-Ds/CS/MusicStudy")
  
Line 15: Line 18:
  
 music90s <- readWorksheetFromFile(music, sheet="1990s") music90s <- readWorksheetFromFile(music, sheet="1990s")
 +# use VectorSource 
 lyrics<- Corpus(VectorSource(music90s$lyrics)) lyrics<- Corpus(VectorSource(music90s$lyrics))
 result.text <- lyrics result.text <- lyrics
  
-removeTwitSign <- function(x) { gsub("@[[:graph:]]*","",x) } # may not be used in this case +inspect(result.text[1:5]) 
-removeURL <- function(x) { gsub("http://[[:graph:]]*","",x) } # may not be used in this case + 
-removeEnter <- function(x) { gsub("\n","",x) }  +removeTwitSign <- function(x) { gsub("@[[:graph:]]*","",x) } # may not be used in this case 
-exNouns <- function(x) { paste(extractNoun(x), collapse=" ")}+removeURL <- function(x) { gsub("http://[[:graph:]]*","",x) } # may not be used in this case 
 +removeEnter <- function(x) { gsub("\n","",x) }  
 +exNouns <- function(x) { paste(extractNoun(x), collapse=" ")}
  
 # NA -> "" 로 변환 # NA -> "" 로 변환
Line 76: Line 82:
  
 {{90s.jpg}} {{90s.jpg}}
 +
 +====== Twitter ======
 +hkimscil # I don't twitt much . . . .
 +jaemyung_lee # He does a lot!
 +
 +see https://dev.twitter.com/apps/new
 +
 +
 +
 +<code>#get your own one 
 +api_key <- "xxxx"
 +api_secret <- "xxxx"
 +access_token <- "xxxx"
 +access_secret <- "xxxx"
 +setup_twitter_oauth(api_key, api_secret, access_token, access_secret)
 +</code>
 +
 +Goto https://apps.twitter.com/
 +
 +<code>
 +api_key <- "TglWL7ysGLdwIP7g8CzTw"
 +api_secret <- "7oWf4jfYBOV57GX2sFeBCIFv23sJNkm72lQ83GTnnAs"
 +access_token <- "24853107-PnJgDNnZgoGR22ffvAiJFq2anqx84prSPlsRSV3te"
 +access_secret <- "rkptQl92SusirGmGRX9Ch7WDhkzwU45LlhBJ2GSE"
 +setup_twitter_oauth(api_key, api_secret, access_token, access_secret)
 +
 +library(twitteR)
 +# retrieve the first 100 tweets (or all tweets if fewer than 100)
 +# from the user timeline of @rdatammining
 +rdmTweets <- userTimeline("rdatamining", n=100)
 +n <- length(rdmTweets)
 +rdmTweets[1:3]
 +
 +df <- do.call("rbind", lapply(rdmTweets, as.data.frame))
 +dim(df)
 +
 +library(tm)
 +# build a corpus, which is a collection of text documents
 +# VectorSource specifies that the source is character vectors.
 +myCorpus <- Corpus(VectorSource(df$text))
 +
 +myCorpus <- tm_map(myCorpus, tolower)
 +# remove punctuation
 +myCorpus <- tm_map(myCorpus, removePunctuation)
 +# remove numbers
 +myCorpus <- tm_map(myCorpus, removeNumbers)
 +# remove stopwords
 +# keep "r" by removing it from stopwords
 +myStopwords <- c(stopwords('english'), "available", "via")
 +idx <- which(myStopwords == "r")
 +myStopwords <- myStopwords[-idx]
 +myCorpus <- tm_map(myCorpus, removeWords, myStopwords)
 +
 +# remove stopwords
 +# keep "r" by removing it from stopwords
 +myStopwords <- c(stopwords('english'), "available", "via")
 +idx <- which(myStopwords == "r")
 +myStopwords <- myStopwords[-idx]
 +myCorpus <- tm_map(myCorpus, removeWords, myStopwords)
 +
 +
 +dictCorpus <- myCorpus
 +# stem words in a text document with the snowball stemmers,
 +# which requires packages Snowball, RWeka, rJava, RWekajars
 +myCorpus <- tm_map(myCorpus, stemDocument)
 +# inspect the first three ``documents"
 +inspect(myCorpus[1:3])
 +
 +# stem completion
 +myCorpus <- tm_map(myCorpus, stemCompletion, dictionary=dictCorpus)
 +inspect(myCorpus[1:3])
 +
 +myDtm <- TermDocumentMatrix(myCorpus, control = list(minWordLength = 1))
 +</code>
 +
 +
text_mining_example_with_korean_songs.1481066398.txt.gz · Last modified: 2016/12/07 07:49 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki