Я бы заказал пакет tibbletime
:
library(tibbletime)
library(tidyverse)
sentiment_dataset %>%
arrange(created_at) %>%
as_tbl_time(index = created_at) %>%
collapse_by("2 months", clean = TRUE) %>%
group_by(created_at) %>%
summarise(negative = mean(negative),
positive = mean(positive))
# A time tibble: 48 x 3
# Index: created_at
created_at negative positive
<dttm> <dbl> <dbl>
1 2010-09-01 00:00:00 0.143 1.43
2 2010-11-01 00:00:00 0.273 0.727
3 2011-01-01 00:00:00 0.208 0.792
4 2011-03-01 00:00:00 0.5 1.38
5 2011-05-01 00:00:00 0.25 0.75
6 2011-07-01 00:00:00 1 1
7 2011-09-01 00:00:00 0 1.5
8 2011-11-01 00:00:00 0.333 1
9 2012-01-01 00:00:00 0 0
10 2012-03-01 00:00:00 0 1.6
# ... with 38 more rows
Естественно, вы можете передать команду gather()
после этого ... например:
sentiment_dataset %>%
arrange(created_at) %>%
as_tbl_time(index = created_at) %>%
collapse_by("2 months", clean = TRUE) %>%
group_by(created_at) %>%
summarise(negative = mean(negative),
positive = mean(positive)) %>%
gather(sentiment, mean_sentiment, -created_at) %>%
ggplot(., aes(created_at, mean_sentiment, color = sentiment)) +
geom_point() +
geom_line() +
geom_smooth()