#install.packages("twitteR")
#install.packages("RCurl")
#install.packages("tm")
#install.packages("wordcloud")
#install.packages("SnowballC")

#library("twitteR")
#library("RCurl")
#library("tm")
#library("wordcloud")
#library("SnowballC")

consumer_key <- 'your value'
consumer_secret <- 'your value'
access_token <- 'your value'
access_token_secret <- 'your value'

setup_twitter_oauth(consumer_key,consumer_secret,access_token,access_token_secret)

netflix_tweets <- searchTwitter("netflix", n=20, lang = "en")
netflix_tweets

# save text
netflix_tweets_text <- sapply(netflix_tweets, function(x) x$getText() )

# The below would be the same
# Analyze review
docs <- Corpus(VectorSource(netflix_tweets_text))

# If using Mac OS run this
# netflix_tweets_text2 <- sapply(netflix_tweets_text,function(row) iconv(row,"latin1","ASCII",""))
# docs <- Corpus(VectorSource(netflix_tweets_text2))


toSpace <- content_transformer(function (x , pattern ) gsub(pattern, " ", x))
docs <- tm_map(docs, toSpace, "/")
docs <- tm_map(docs, toSpace, "@")
docs <- tm_map(docs, toSpace, "\\|")

# Convert the text to lower case
docs <- tm_map(docs, content_transformer(tolower))

# Remove numbers
docs <- tm_map(docs, removeNumbers)

# Remove english common stopwords
docs <- tm_map(docs, removeWords, stopwords("english"))

# Remove your own stop word
# specify your stopwords as a character vector
docs <- tm_map(docs, removeWords, c("blabla1", "blabla2")) 

# Remove punctuations
docs <- tm_map(docs, removePunctuation)

# Eliminate extra white spaces
docs <- tm_map(docs, stripWhitespace)

# Text stemming
docs <- tm_map(docs, stemDocument)

# Word counts
dtm <- TermDocumentMatrix(docs)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
head(d, 10)

# set.seed(1234)
wordcloud(words = d$word, freq = d$freq, min.freq = 1,max.words=200, random.order=FALSE, rot.per=0.35,colors=brewer.pal(8, "Dark2"))

# association of words
# draw dendrogram
dtm_top <- removeSparseTerms(dtm,sparse=0.8) # distance between .89 and .9
df <- as.data.frame(inspect(dtm_top))
dscale <- scale(df)
distance <- dist(dscale, method="euclidean") # distance matrix
fit <- hclust(distance, method="complete")
plot(fit)

# association of words
asso <- findAssocs(dtm,'netflix',0.1)
asso