Question

При выполнении этого кода в формате RMarkdown для анализа восприятия бренда с использованием методов анализа и визуализации текста я получаю сообщение об ошибке в анализе настроений - график по дате.

# Sentiment analysis - plot by date - understanding cummulative sentiment score movement

```{r}
mysentimentvalues <- data.frame(get_sentiment(tweet_en_df$text))
colnames(mysentimentvalues)<- "polarity"
mysentimentvalues$date <- tweet_en_df$created_at
result <- aggregate(polarity ~ date, data = mysentimentvalues, sum)
result
```

Ниже приведена ошибка.

Ошибка в aggregate.data.frame (mf [1L], mf [-1L], FUN = FUN, ...): нет строк # to aggregate Вызовы: ... aggregate -> aggregate. формула -> # aggregate.data.frame Выполнение остановлено

Пожалуйста, дайте мне знать, как решить эту проблему.

Полный код проблемы.

```{r}
library(readxl)
library(tm)
library(ggplot2)
library(gridExtra)
library(RColorBrewer)
library(wordcloud)
library(topicmodels)
library(syuzhet)
```


```{r}
tweet_df <- read.csv("C:/Users/HP/Desktop/BA/Text Analytics/Assignment/Tweets.csv")
```



# Convert tweets from factor to char and created date from factor to correct date format


```{r echo=FALSE}
tweet_en_df<-subset(tweet_df,tweet_df$lang=='en')
tweet_en_df$created_at <- as.Date(tweet_en_df$created_at,format= "%m/%d/%y")
#tweet_en_df$created_at <- as.Date(tweet_en_df$created_at,format, tryFormats = c("%m/%d/%y"))

tweet_en_df$text <- as.character(tweet_en_df$text)
str(tweet_en_df)
```

Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.

**Cleaning the text data by removing links, tags and delimiters.**

```{r}
mycorpus <- VCorpus(VectorSource(tweet_en_df$text))
writeLines(strwrap(mycorpus[[1]]$content))
```


```{r}
mycorpus <- tm_map(mycorpus,content_transformer(tolower))
removeurl <- function(x) 
gsub("http[^[:space:]]*", "", x)
mycorpus <- tm_map(mycorpus, content_transformer(removeurl))
writeLines(strwrap(mycorpus[[5]]$content,60))
```


```{r}
removeUsername <- function(x) gsub("@[^[:space:]]*", "", x)  
mycorpus <- tm_map(mycorpus, content_transformer(removeUsername))
writeLines(strwrap(mycorpus[[5]]$content,60))
```


```{r}
myStopWords<- c((stopwords('english')),c("will","via","new","foldi","2019","samsung's","phone"))
mycorpus<- tm_map(mycorpus,removeWords , myStopWords)
writeLines(strwrap(mycorpus[[5]]$content,60))
```


```{r}
removeSingle <- function(x) gsub(" . ", " ", x)   
mycorpus <- tm_map(mycorpus, content_transformer(removeSingle))
writeLines(strwrap(mycorpus[[5]]$content,60))
```


```{r}
mycorpus <- tm_map(mycorpus,removePunctuation)
writeLines(strwrap(mycorpus[[5]]$content,60))
```


```{r}
myCorpus <- tm_map(mycorpus,stripWhitespace)
writeLines(strwrap(mycorpus[[5]]$content,60))
```


```{r}
length(myCorpus)
```


Finding the term which is being used most frequiently

```{r}
tdm <- TermDocumentMatrix(mycorpus)
tdm
```


```{r}
freq.terms <- findFreqTerms(tdm,lowfreq = 35)
freq.terms
```


# Used to replace words with the proper ones

```{r}
replaceWord <- function(corpus, oldword, newword)
{
 tm_map(corpus, content_transformer(gsub), pattern=oldword, replacement=newword)
}
mycorpus<- replaceWord(mycorpus, "samsung's", "samsung")
mycorpus<- replaceWord(mycorpus, "samsungs", "samsung")
```

# Term frequency calculation

```{r}
term.freq<- rowSums(as.matrix(tdm))
term.freq<- subset(term.freq,term.freq>30)
df30 <- data.frame(term=names(term.freq),freq=term.freq)

term.freq<- subset(term.freq,term.freq>40)
df40 <- data.frame(term=names(term.freq),freq=term.freq)
df40
```


# Plotting the graph of frequent terms

```{r}
p30=ggplot(df30, aes(reorder(term, freq),freq)) + theme_bw() + geom_bar(stat = "identity")  + coord_flip() +labs(list(title="Term Frequency @30", x="Terms", y="Term Counts")) 

p40=ggplot(df40, aes(reorder(term, freq),freq)) + theme_bw() + geom_bar(stat = "identity")  + coord_flip() +labs(list(title="Term Frequency @40", x="Terms", y="Term Counts"))

grid.arrange(p30,p40,ncol=2)
```

#Plotting the word cloud

```{r}
word.freq <-sort(rowSums(as.matrix(tdm)), decreasing= F)
pal<- brewer.pal(8, "Dark2")
wordcloud(words = names(word.freq), freq = word.freq, min.freq = 10, random.order = F, colors = pal, max.words = 150)
```

#Find association

```{r}
list1 <- findAssocs(tdm,"smartphone",0.2)
corrdf1 <- t(data.frame(t(sapply(list1,c))))
corrdf1
```


# Topic Modeling to identify latent/hidden topics using LDA technique

```{r}
dtm <- as.DocumentTermMatrix(tdm)

rowTotals <- apply(dtm , 1, sum)

NullDocs <- dtm[rowTotals==0, ]
dtm   <- dtm[rowTotals> 0, ]
#NullDocs$dimnames
#NullDocs$dimnames$Docs
length(NullDocs$dimnames$Docs)
```


```{r}
if (length(NullDocs$dimnames$Docs) > 0) {
tweet_en_df <- tweet_en_df[-as.numeric(NullDocs$dimnames$Docs),]
}

lda <- LDA(dtm, k = 5) # find 5 topic
term <- terms(lda, 7) # first 7 terms of every topic
(term <- apply(term, MARGIN = 2, paste, collapse = ", "))
```


# Sentiment analysis

```{r}
emotions <- get_nrc_sentiment(tweet_en_df$text)
emo_bar<- colSums(emotions)
emo_sum <- data.frame(count=emo_bar,emotion=names(emo_bar))
emo_sum$emotion<- factor(emo_sum$emotion,levels = emo_sum$emotion[order(emo_sum$count,decreasing = TRUE)])
```

# Visulaize the emotions from NRC sentiment

```{r}
library(plotly)
```



```{r}
plot_ly(emo_sum,x=~emotion,y=~count,type = "bar",color =~emotion) %>%
  layout(xaxis=list(title=""),showlegend=FALSE,title="Twitter Sentiment Analysis")
```



# Sentiment analysis - plot by date - understanding cummulative sentiment score movement

```{r}
mysentimentvalues <- data.frame(get_sentiment(tweet_en_df$text))
colnames(mysentimentvalues)<- "polarity"
mysentimentvalues$date <- tweet_en_df$created_at
result <- aggregate(polarity ~ date, data = mysentimentvalues, sum)
result
```


# Plot the curve

```{r}
plot(result,type="l")
```



#Sentiment analysis - plot by date - understanding cummulative sentiment score movement




```{r}
result1 <- aggregate(polarity ~ date, data = mysentimentvalues, mean)
result1
```



# Plot the curve

```{r}
plot(result1,type="l")
```

Я получаю сообщение об ошибке при запуске R-кода для анализа восприятия бренда с помощью анализа текста

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 0 ]

Я получаю сообщение об ошибке при запуске R-кода для анализа восприятия бренда с помощью анализа текста

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 0 ]

Похожие темы