Начиная со следующего кадра данных:
head(df)
# A tibble: 6 x 4
Date Time Sender Message
<date> <chr> <chr> <fct>
1 2020-01-01 00:00:00 Person1 C
2 2020-01-01 01:00:00 Person1 C
3 2020-01-01 02:00:00 Person1 B
4 2020-01-01 03:00:00 Person1 B
5 2020-01-01 04:00:00 Person1 C
6 2020-01-01 05:00:00 Person1 E
Вы можете сначала отфильтровать заданные c часы, установив столбец Date_Time, используя пакет lubridate
и функцию ymd_hms
, и используйте filter
функция от dplyr
, чтобы получать только сообщения, отправленные между 9:00 и 17:00.
library(lubridate)
library(dplyr)
df %>% mutate(Date_Time = ymd_hms(paste(Date, Time))) %>%
filter(hour(Date_Time) >= 9 & hour(Date_Time) <= 17)
# A tibble: 18 x 5
Date Time Sender Message Date_Time
<date> <chr> <chr> <fct> <dttm>
1 2020-01-01 09:00:00 Person1 C 2020-01-01 09:00:00
2 2020-01-01 10:00:00 Person1 E 2020-01-01 10:00:00
3 2020-01-01 11:00:00 Person1 C 2020-01-01 11:00:00
4 2020-01-01 12:00:00 Person1 C 2020-01-01 12:00:00
5 2020-01-01 13:00:00 Person1 A 2020-01-01 13:00:00
6 2020-01-01 14:00:00 Person1 D 2020-01-01 14:00:00
7 2020-01-01 15:00:00 Person1 A 2020-01-01 15:00:00
8 2020-01-02 16:00:00 Person1 A 2020-01-02 16:00:00
9 2020-01-02 17:00:00 Person1 E 2020-01-02 17:00:00
10 2020-01-01 09:00:00 Person2 D 2020-01-01 09:00:00
11 2020-01-01 10:00:00 Person2 E 2020-01-01 10:00:00
12 2020-01-01 11:00:00 Person2 E 2020-01-01 11:00:00
13 2020-01-01 12:00:00 Person2 C 2020-01-01 12:00:00
14 2020-01-01 13:00:00 Person2 A 2020-01-01 13:00:00
15 2020-01-01 14:00:00 Person2 B 2020-01-01 14:00:00
16 2020-01-01 15:00:00 Person2 E 2020-01-01 15:00:00
17 2020-01-02 16:00:00 Person2 E 2020-01-02 16:00:00
18 2020-01-02 17:00:00 Person2 D 2020-01-02 17:00:00
Затем вы можете group_by
каждого отправителя и сообщения рассчитать частоту каждого сообщения, а затем отфильтровать максимальную частоту для каждого отправителя.
df %>% mutate(Date_Time = ymd_hms(paste(Date, Time))) %>%
filter(hour(Date_Time) >= 9 & hour(Date_Time) <= 17) %>%
group_by(Sender, Message) %>% count() %>%
group_by(Sender) %>%
filter(n == max(n))
# A tibble: 3 x 3
# Groups: Sender [2]
Sender Message n
<chr> <fct> <int>
1 Person1 A 3
2 Person1 C 3
3 Person2 E 4
Если вы хотите узнать количество сообщений, отправленных каждым отправителем за определенный период времени, вы можете сделать:
df %>% mutate(Date_Time = ymd_hms(paste(Date, Time))) %>%
filter(hour(Date_Time) >= 9 & hour(Date_Time) <= 17) %>%
group_by(Sender) %>% count()
# A tibble: 2 x 2
# Groups: Sender [2]
Sender n
<chr> <int>
1 Person1 9
2 Person2 9
Отвечает ли он на ваш вопрос?
Данные
structure(list(Date = structure(c(18262, 18262, 18262, 18262,
18262, 18262, 18262, 18262, 18262, 18262, 18262, 18262, 18262,
18262, 18262, 18262, 18263, 18263, 18263, 18263, 18263, 18263,
18263, 18263, 18263, 18262, 18262, 18262, 18262, 18262, 18262,
18262, 18262, 18262, 18262, 18262, 18262, 18262, 18262, 18262,
18262, 18263, 18263, 18263, 18263, 18263, 18263, 18263, 18263,
18263), class = "Date"), Time = c("00:00:00", "01:00:00", "02:00:00",
"03:00:00", "04:00:00", "05:00:00", "06:00:00", "07:00:00", "08:00:00",
"09:00:00", "10:00:00", "11:00:00", "12:00:00", "13:00:00", "14:00:00",
"15:00:00", "16:00:00", "17:00:00", "18:00:00", "19:00:00", "20:00:00",
"21:00:00", "22:00:00", "23:00:00", "00:00:00", "00:00:00", "01:00:00",
"02:00:00", "03:00:00", "04:00:00", "05:00:00", "06:00:00", "07:00:00",
"08:00:00", "09:00:00", "10:00:00", "11:00:00", "12:00:00", "13:00:00",
"14:00:00", "15:00:00", "16:00:00", "17:00:00", "18:00:00", "19:00:00",
"20:00:00", "21:00:00", "22:00:00", "23:00:00", "00:00:00"),
Sender = c("Person1", "Person1", "Person1", "Person1", "Person1",
"Person1", "Person1", "Person1", "Person1", "Person1", "Person1",
"Person1", "Person1", "Person1", "Person1", "Person1", "Person1",
"Person1", "Person1", "Person1", "Person1", "Person1", "Person1",
"Person1", "Person1", "Person2", "Person2", "Person2", "Person2",
"Person2", "Person2", "Person2", "Person2", "Person2", "Person2",
"Person2", "Person2", "Person2", "Person2", "Person2", "Person2",
"Person2", "Person2", "Person2", "Person2", "Person2", "Person2",
"Person2", "Person2", "Person2"), Message = structure(c(3L,
3L, 2L, 2L, 3L, 5L, 4L, 1L, 2L, 3L, 5L, 3L, 3L, 1L, 4L, 1L,
1L, 5L, 3L, 2L, 2L, 1L, 3L, 4L, 1L, 3L, 5L, 4L, 2L, 5L, 1L,
1L, 2L, 3L, 4L, 5L, 5L, 3L, 1L, 2L, 5L, 5L, 4L, 5L, 2L, 1L,
1L, 3L, 1L, 5L), .Label = c("A", "B", "C", "D", "E"), class = "factor")), row.names = c(NA,
-50L), class = c("tbl_df", "tbl", "data.frame"))