Я хочу изменить дату для графика настроений и сделать так, чтобы дата соответствовала датам биржевых данных. Например, если я получаю ежедневные данные о ценах на акции для Ecolab за 1 год, тогда код должен также получать твиты за прошедший 1 год, а также рассчитывать и отображать настроения за тот же период времени.
install.packages(c("devtools", "rjson", "bit64", "httr"))
#RESTART R session!
library(devtools)
library(twitteR)
install.packages("SentimentAnalysis")
install.packages(c('ROAuth','RCurl'))
install.packages('base64enc')
library("openssl")
library("httpuv")
library(dplyr)
library(tidyr)
library(ggplot2)
library(httr)
library(stringr)
library(twitteR)
library(magrittr)
library(SentimentAnalysis)
library(broom)
require(gridExtra)
require('ROAuth')
require('RCurl')
#Import Ecolab stock price data from csv file downloaded from Yahoo Finance and stored to github
a <- read.csv("ECL.csv")
#Setup Twitter
setup_twitter_oauth(consumer_key = "API Consumer key",
consumer_secret = "API consumer secret",
access_token = "API access token",
access_secret = "API access secret" )
#Set criteria and get tweets
numberOfTweets <- 750
#Scrape tweets containing "#ecolab" and "@ecolab"
tweets <- searchTwitter(searchString="#ecolab", n = numberOfTweets, lang="en")
tweets <- searchTwitter(searchString="#ECL", n = numberOfTweets, lang="en")
tweets2 <- searchTwitter(searchString="@Ecolab", n = numberOfTweets, lang="en")
tweetsDF <- twListToDF(tweets)
tweetsDF2 <- twListToDF(tweets2)
tweetsFullDF <- rbind(tweetsDF, tweetsDF2)
#Create subset of data
a <- subset(a, select = c(Date, Close))
#Convert factors to dates
a$Date <- as.Date(a$Date)
#Convert to dataframe and encode to native
x <- tweetsFullDF
x$text <- enc2native(x$text)
#Clean text
x$text <- gsub("^[[:space:]]*","",x$text) # Remove leading whitespaces
x$text <- gsub("[[:space:]]*$","",x$text) # Remove trailing whitespaces
x$text <- gsub(" +"," ",x$text) #Remove extra whitespaces
x$text <- gsub("'", "%%", x$text) #Replace apostrophes with %%
x$text <- iconv(x$text, "latin1", "ASCII", sub="") # Remove emojis
x$text <- gsub("<(.*)>", "", x$text) #Remove Unicodes like <U+A>
x$text <- gsub("\\ \\. ", " ", x$text) #Replace orphaned fullstops with space
x$text <- gsub(" ", " ", x$text) #Replace double space with single space
x$text <- gsub("%%", "\'", x$text) #Change %% back to apostrophes
x$text <- gsub("https(.*)*$", "", x$text) #Remove tweet URL
x$text <- gsub("\\n", "-", x$text) #Replace line breaks with "-"
x$text <- gsub("--", "-", x$text) #Remove double "-" from double line breaks
x$text <- gsub("&", "&", x$text) #Fix ampersand &
x$text[x$text == " "] <- "<no text>"
for (i in 1:nrow(x)) {
if (x$truncated[i] == TRUE) {
x$text[i] <- gsub("[[:space:]]*$","...",x$text[i])
}
}
#Select desired column
cleanTweets <- x %>%
select("text")
#Analyze sentiment
sentiment <- analyzeSentiment(cleanTweets)
#Extract dictionary-based sentiment according to the QDAP dictionary
sentiment2 <- sentiment$SentimentQDAP
#View sentiment direction (i.e. positive, neutral and negative)
sentiment3 <- convertToDirection(sentiment$SentimentQDAP)
#Extract and convert 'date' column
date <- x$created
date <- str_extract(date, "\\d{4}-\\d{2}-\\d{2}")
date <- as.Date(date)
date <- as.Date(date, format = "%m/%d/%y")
#Create new dataframe with desired columns
df <- cbind(cleanTweets, sentiment2, sentiment3, date)
#Remove rows with NA
df <- df[complete.cases(df), ]
#Calculate the average of daily sentiment score
df2 <- df %>%
group_by(date) %>%
summarize(meanSentiment = mean(sentiment2, na.rm=TRUE))
DT::datatable(df2, editable = TRUE)
#Get frquency of each sentiment i.e. positive, neutral, and negative
freq <- df %>%
group_by(date,sentiment3) %>%
summarise(Freq=n())
#Convert data from long to wide
freq2 <- freq %>%
spread(key = sentiment3, value = Freq)
DT::datatable(freq2, editable = TRUE)
ggplot() +
geom_bar(mapping = aes(x = freq$date, y = freq$Freq, fill = freq$sentiment3), stat = "identity") +
ylab('Sentiment Frequency') +
xlab('Date')
#Calculate z-Scores of Ecolab closing stock prices
mu <- mean(a$Close)
sd <- sd(a$Close)
a2 <- a %>%
mutate(zScore = (a$Close-mu)/sd)
#Plot mean sentiment scores
p1 <- ggplot(data=df2, aes(x=date,y=meanSentiment, group=1)) +
geom_line()+
geom_point() +
ylab("Mean Twitter Sentiment Score")
#plot Ecolab Nasdaq z-score prices
p2 <- ggplot(data=a2, aes(x=Date,y=zScore, group=1)) +
geom_line()+
geom_point() +
ylab("Z-Score of closing stock price")
scale_x_date(date_breaks = "1 day",
limits = as.Date(c('2019-05-03','2019-05-12')))
plot1 <- p1
plot2 <- p2
grid.arrange(plot1, plot2, nrow=2)
#Plot both data on same plot
ggplot() +
geom_line(mapping = aes(x = a2$Date, y = a2$zScore), size = 1) +
geom_line(mapping = aes(x = df2$date, y = df2$meanSentiment*20), size = 1, color = "blue") +
scale_x_date(name = "Date", labels = NULL) +
scale_y_continuous(name = "z-Score of Closing Stock Price",
#Scale 2nd y-axis by factor of 20
sec.axis = sec_axis(~./20, name = "Sentiment Score")) +
theme(
axis.title.y = element_text(color = "grey"),
axis.title.y.right = element_text(color = "blue"))
#Plot both data on same plot
#Shift stock prices back one day
plot(df2$date,df2$meanSentiment, type="l", col="red3", xlab='Date', ylab='Mean Sentiment Score')
par(new=TRUE)
plot(a2$Date,a2$zScore, type="l", axes=F, xlab=NA, ylab=NA, col="blue")
axis(side = 4)
mtext(side = 4, line = 3, 'Closing Stock Price z-Score')
legend("topright",
legend=c("Mean Sentiment Score"),
lty=c(1,0), col=c("red3"))
install.packages("xlsx")
library(xlsx)
z<-df
write_xlsx(z,"x.xlsx")