User Tools

Site Tools


text_mining_example_with_korean_songs

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
Last revisionBoth sides next revision
text_mining_example_with_korean_songs [2016/12/07 07:49] hkimsciltext_mining_example_with_korean_songs [2017/12/04 09:59] hkimscil
Line 1: Line 1:
 +====== Lylics in Music ======
 +{{:mm.xlsx}}
 +
 <code>library(bitops) <code>library(bitops)
 library(RCurl) library(RCurl)
Line 6: Line 9:
 library(wordcloud) library(wordcloud)
 library(XLConnect) library(XLConnect)
 +library(twitteR)
  
- +# set your data dir in which the save file is located.
 setwd ("D:/Users/Hyo/Clouds/Cs-Ds/CS/MusicStudy") setwd ("D:/Users/Hyo/Clouds/Cs-Ds/CS/MusicStudy")
  
Line 15: Line 18:
  
 music90s <- readWorksheetFromFile(music, sheet="1990s") music90s <- readWorksheetFromFile(music, sheet="1990s")
 +# use VectorSource 
 lyrics<- Corpus(VectorSource(music90s$lyrics)) lyrics<- Corpus(VectorSource(music90s$lyrics))
 result.text <- lyrics result.text <- lyrics
  
-removeTwitSign <- function(x) { gsub("@[[:graph:]]*","",x) } # may not be used in this case +inspect(result.text[1:5]) 
-removeURL <- function(x) { gsub("http://[[:graph:]]*","",x) } # may not be used in this case + 
-removeEnter <- function(x) { gsub("\n","",x) }  +removeTwitSign <- function(x) { gsub("@[[:graph:]]*","",x) } # may not be used in this case 
-exNouns <- function(x) { paste(extractNoun(x), collapse=" ")}+removeURL <- function(x) { gsub("http://[[:graph:]]*","",x) } # may not be used in this case 
 +removeEnter <- function(x) { gsub("\n","",x) }  
 +exNouns <- function(x) { paste(extractNoun(x), collapse=" ")}
  
 # NA -> "" 로 변환 # NA -> "" 로 변환
Line 76: Line 82:
  
 {{90s.jpg}} {{90s.jpg}}
 +
 +====== Twitter ======
 +hkimscil # I don't twitt much . . . .
 +jaemyung_lee # He does a lot!
 +
 +<code>#get your own one 
 +api_key <- "xxxx"
 +api_secret <- "xxxx"
 +access_token <- "xxxx"
 +access_secret <- "xxxx"
 +setup_twitter_oauth(api_key, api_secret, access_token, access_secret)
 +</code>
 +
 +Goto https://apps.twitter.com/
 +
 +<code>
 +api_key <- "TglWL7ysGLdwIP7g8CzTw"
 +api_secret <- "7oWf4jfYBOV57GX2sFeBCIFv23sJNkm72lQ83GTnnAs"
 +access_token <- "24853107-PnJgDNnZgoGR22ffvAiJFq2anqx84prSPlsRSV3te"
 +access_secret <- "rkptQl92SusirGmGRX9Ch7WDhkzwU45LlhBJ2GSE"
 +setup_twitter_oauth(api_key, api_secret, access_token, access_secret)
 +
 +library(twitteR)
 +# retrieve the first 100 tweets (or all tweets if fewer than 100)
 +# from the user timeline of @rdatammining
 +rdmTweets <- userTimeline("rdatamining", n=100)
 +n <- length(rdmTweets)
 +rdmTweets[1:3]
 +
 +df <- do.call("rbind", lapply(rdmTweets, as.data.frame))
 +dim(df)
 +
 +library(tm)
 +# build a corpus, which is a collection of text documents
 +# VectorSource specifies that the source is character vectors.
 +myCorpus <- Corpus(VectorSource(df$text))
 +
 +myCorpus <- tm_map(myCorpus, tolower)
 +# remove punctuation
 +myCorpus <- tm_map(myCorpus, removePunctuation)
 +# remove numbers
 +myCorpus <- tm_map(myCorpus, removeNumbers)
 +# remove stopwords
 +# keep "r" by removing it from stopwords
 +myStopwords <- c(stopwords('english'), "available", "via")
 +idx <- which(myStopwords == "r")
 +myStopwords <- myStopwords[-idx]
 +myCorpus <- tm_map(myCorpus, removeWords, myStopwords)
 +
 +# remove stopwords
 +# keep "r" by removing it from stopwords
 +myStopwords <- c(stopwords('english'), "available", "via")
 +idx <- which(myStopwords == "r")
 +myStopwords <- myStopwords[-idx]
 +myCorpus <- tm_map(myCorpus, removeWords, myStopwords)
 +
 +
 +dictCorpus <- myCorpus
 +# stem words in a text document with the snowball stemmers,
 +# which requires packages Snowball, RWeka, rJava, RWekajars
 +myCorpus <- tm_map(myCorpus, stemDocument)
 +# inspect the first three ``documents"
 +inspect(myCorpus[1:3])
 +
 +# stem completion
 +myCorpus <- tm_map(myCorpus, stemCompletion, dictionary=dictCorpus)
 +inspect(myCorpus[1:3])
 +
 +myDtm <- TermDocumentMatrix(myCorpus, control = list(minWordLength = 1))
 +</code>
 +
 +
text_mining_example_with_korean_songs.txt · Last modified: 2017/12/14 10:12 by hkimscil

Donate Powered by PHP Valid HTML5 Valid CSS Driven by DokuWiki