c:nmp:using_open_api_example
This is an old revision of the document!
library(bitops) library(RCurl) library(rjson) library(twitteR) library(digest) library(ROAuth) library(KoNLP) library(rJava) library(tm) library(wordcloud) library(XLConnect) setwd ("D:/Users/Hyo/Clouds/CS-DS/CS/MovieStudy") rm(list=ls()) ani<- file.path("all_comb.xlsx") anisheet <- readWorksheetFromFile(ani, sheet="imdb") ani.text <- Corpus(VectorSource(anisheet$plotStory)) result.text <- ani.text myCorpus <- Corpus(VectorSource(result.text)) myCorpus <- tm_map(myCorpus, removePunctuation) myCorpus <- tm_map(myCorpus, removeNumbers) myCorpus <- tm_map(myCorpus, tolower) myStopwords <- c(stopwords('english'), "rt") myCorpus <-tm_map(myCorpus, removeWords, myStopwords) inspect(myCorpus[1:5]) myTdm <- TermDocumentMatrix(myCorpus, control=list(wordLengths=c(2,Inf))) mat <- as.data.frame(as.matrix(myTdm)) write.table(mat, file="myTdm-ani.txt", col.names=FALSE, row.names=TRUE,sep="\t") pal <- brewer.pal(12,"Paired") # 폰트 세팅. 띄어쓰기나 대소문자에 민감하다는 점에 주의 # 맑은고딕 : windowsFonts(malgun=windowsFont("맑은 고딕")) # 나눔고딕 : windowsFonts(malgun=windowsFont("나눔고딕")) windowsFonts(malgun=windowsFont("서울남산체 B")) m <- as.matrix(myTdm) # calculate the frequency of words v <- sort(rowSums(m), decreasing=TRUE) myNames <- names(v) k <- which(names(v)=="apple") myNames[k] <- "apple" d <- data.frame(word=myNames, freq=v) #wordcloud(d$word, d$freq, scale=c(4,0.5), min.freq=3, random.order=F, rot.per=.1, family="malgun") wordcloud(d$word, d$freq, scale=c(4,0.7), min.freq=2, random.order=F, rot.per=.1, colors=pal, family="malgun")
c/nmp/using_open_api_example.1494206642.txt.gz · Last modified: 2017/05/08 09:54 by hkimscil