Настройки даты изменения кода R для Twitter API извлекают данные и выводят их на график - PullRequest
0 голосов
/ 08 июля 2019

Я хочу изменить дату для графика настроений и сделать так, чтобы дата соответствовала датам биржевых данных. Например, если я получаю ежедневные данные о ценах на акции для Ecolab за 1 год, тогда код должен также получать твиты за прошедший 1 год, а также рассчитывать и отображать настроения за тот же период времени.

install.packages(c("devtools", "rjson", "bit64", "httr")) 

#RESTART R session! 

library(devtools) 
library(twitteR) 
install.packages("SentimentAnalysis") 
install.packages(c('ROAuth','RCurl')) 
install.packages('base64enc') 
library("openssl") 
library("httpuv") 
library(dplyr) 
library(tidyr) 
library(ggplot2) 
library(httr) 
library(stringr) 
library(twitteR) 
library(magrittr) 
library(SentimentAnalysis) 
library(broom) 

require(gridExtra) 
require('ROAuth') 
require('RCurl')  

#Import Ecolab stock price data from csv file downloaded from Yahoo Finance and stored to github 
a <- read.csv("ECL.csv") 

#Setup Twitter 
setup_twitter_oauth(consumer_key = "API Consumer key", 
                    consumer_secret = "API consumer secret", 
                    access_token = "API access token", 
                    access_secret = "API access secret" ) 

#Set criteria and get tweets 
numberOfTweets <- 750 
#Scrape tweets containing "#ecolab" and "@ecolab" 
tweets <- searchTwitter(searchString="#ecolab", n = numberOfTweets, lang="en") 
tweets <- searchTwitter(searchString="#ECL", n = numberOfTweets, lang="en") 
tweets2 <- searchTwitter(searchString="@Ecolab", n = numberOfTweets, lang="en") 
tweetsDF <- twListToDF(tweets) 
tweetsDF2 <- twListToDF(tweets2) 
tweetsFullDF <- rbind(tweetsDF, tweetsDF2) 

#Create subset of data 
a <- subset(a, select = c(Date, Close)) 
#Convert factors to dates 
a$Date <- as.Date(a$Date) 

#Convert to dataframe and encode to native 
x <- tweetsFullDF 
x$text <- enc2native(x$text) 

#Clean text 
x$text <- gsub("^[[:space:]]*","",x$text) # Remove leading whitespaces 
x$text <- gsub("[[:space:]]*$","",x$text) # Remove trailing whitespaces 
x$text <- gsub(" +"," ",x$text) #Remove extra whitespaces 
x$text <- gsub("'", "%%", x$text) #Replace apostrophes with %% 
x$text <- iconv(x$text, "latin1", "ASCII", sub="") # Remove emojis 
x$text <- gsub("<(.*)>", "", x$text) #Remove Unicodes like <U+A>
x$text <- gsub("\\ \\. ", " ", x$text) #Replace orphaned fullstops with space 
x$text <- gsub("  ", " ", x$text) #Replace double space with single space 
x$text <- gsub("%%", "\'", x$text) #Change %% back to apostrophes 
x$text <- gsub("https(.*)*$", "", x$text) #Remove tweet URL 
x$text <- gsub("\\n", "-", x$text) #Replace line breaks with "-" 
x$text <- gsub("--", "-", x$text) #Remove double "-" from double line breaks 
x$text <- gsub("&", "&", x$text) #Fix ampersand & 
x$text[x$text == " "] <- "<no text>" 

for (i in 1:nrow(x)) { 
  if (x$truncated[i] == TRUE) { 
    x$text[i] <- gsub("[[:space:]]*$","...",x$text[i]) 
  } 
} 

#Select desired column 
cleanTweets <- x %>% 
  select("text") 

#Analyze sentiment 
sentiment <- analyzeSentiment(cleanTweets) 
#Extract dictionary-based sentiment according to the QDAP dictionary 
sentiment2 <- sentiment$SentimentQDAP 
#View sentiment direction (i.e. positive, neutral and negative) 
sentiment3 <- convertToDirection(sentiment$SentimentQDAP) 

#Extract and convert 'date' column 
date <- x$created 
date <- str_extract(date, "\\d{4}-\\d{2}-\\d{2}") 
date <- as.Date(date) 
date <- as.Date(date, format = "%m/%d/%y") 

#Create new dataframe with desired columns 
df <- cbind(cleanTweets, sentiment2, sentiment3, date) 
#Remove rows with NA 
df <- df[complete.cases(df), ] 


#Calculate the average of daily sentiment score 
df2 <- df %>% 
  group_by(date) %>% 
  summarize(meanSentiment = mean(sentiment2, na.rm=TRUE)) 


DT::datatable(df2, editable = TRUE) 


#Get frquency of each sentiment i.e. positive, neutral, and negative   
freq <- df %>% 
  group_by(date,sentiment3) %>% 
  summarise(Freq=n()) 

#Convert data from long to wide 
freq2 <- freq %>% 
  spread(key = sentiment3, value = Freq) 

DT::datatable(freq2, editable = TRUE) 


ggplot() + 
  geom_bar(mapping = aes(x = freq$date, y = freq$Freq, fill = freq$sentiment3), stat = "identity") + 
  ylab('Sentiment Frequency') + 
  xlab('Date') 

#Calculate z-Scores of Ecolab closing stock prices 
mu <- mean(a$Close) 
sd <- sd(a$Close) 
a2 <- a %>% 
  mutate(zScore = (a$Close-mu)/sd) 

#Plot mean sentiment scores 
p1 <- ggplot(data=df2, aes(x=date,y=meanSentiment, group=1)) + 
  geom_line()+ 
  geom_point() + 
  ylab("Mean Twitter Sentiment Score") 

#plot Ecolab Nasdaq z-score prices 
p2 <- ggplot(data=a2, aes(x=Date,y=zScore, group=1)) + 
  geom_line()+ 
  geom_point() + 
  ylab("Z-Score of closing stock price") 
scale_x_date(date_breaks = "1 day", 
             limits = as.Date(c('2019-05-03','2019-05-12'))) 

plot1 <- p1 
plot2 <- p2 
grid.arrange(plot1, plot2, nrow=2) 

#Plot both data on same plot 
ggplot() + 
  geom_line(mapping = aes(x = a2$Date, y = a2$zScore), size = 1) + 
  geom_line(mapping = aes(x = df2$date, y = df2$meanSentiment*20), size = 1, color = "blue") + 
  scale_x_date(name = "Date", labels = NULL) + 
  scale_y_continuous(name = "z-Score of Closing Stock Price", 
                     #Scale 2nd y-axis by factor of 20 
                     sec.axis = sec_axis(~./20, name = "Sentiment Score")) + 
  theme( 
    axis.title.y = element_text(color = "grey"), 
    axis.title.y.right = element_text(color = "blue")) 

#Plot both data on same plot 
#Shift stock prices back one day 
plot(df2$date,df2$meanSentiment, type="l", col="red3",  xlab='Date', ylab='Mean Sentiment Score') 

par(new=TRUE) 

plot(a2$Date,a2$zScore, type="l", axes=F, xlab=NA, ylab=NA, col="blue") 
axis(side = 4) 
mtext(side = 4, line = 3, 'Closing Stock Price z-Score') 
legend("topright", 
       legend=c("Mean Sentiment Score"), 
       lty=c(1,0), col=c("red3")) 
install.packages("xlsx") 
library(xlsx) 
z<-df 

write_xlsx(z,"x.xlsx")
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...