С dplyr
и tidyr
:
library(tidyr)
library(dplyr)
library(stringr)
dat %>%
mutate_at(1, str_extract, "load|play|seek|pause|stop") %>%
unite(video_event_type, video, event_type) %>%
count(id, video_event_type) %>%
spread(video_event_type, n)
# # A tibble: 2 x 9
# id `Video - Math and Speed_load` `Video - Math and Speed_pause` `Video - Math and Speed_play` `Video - Math and Speed_stop` `Video -math_load` `Video -math_pause` `Video -math_play` `Video -math_seek`
# <int> <int> <int> <int> <int> <int> <int> <int> <int>
# 1 21 NA NA NA NA 2 2 2 2
# 2 22 1 1 1 1 NA NA NA NA
Редактировать: немного более сложное решение с использованием complete
для получения ожидаемых нулей:
dat %>%
mutate_at(1, str_extract, "load|play|seek|pause|stop") %>%
count(id, video, event_type) %>%
complete(nesting(id, video), event_type, fill = list(n = 0L)) %>%
unite(video_event_type, video, event_type, sep = ".") %>%
spread(video_event_type, n)
# # A tibble: 2 x 11
# id `Video - Math and Speed.load` `Video - Math and Speed.pause` `Video - Math and Speed.play` `Video - Math and Speed.seek` `Video - Math and Speed.stop` `Video -math.load` `Video -math.pause` `Video -math.play` `Video -math.seek` `Video -math.stop`
# <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
# 1 21 NA NA NA NA NA 2 2 2 2 0
# 2 22 1 1 1 0 1 NA NA NA NA NA
(где dat
:
dat <- read.table(text =
'event_type | video |id
load_video" | Video -math | 21
load_video" | Video -math | 21
load_video" | Video - Math and Speed | 22
play_video" | Video -math | 21
seek_video" | Video -math | 21
pause_video" | Video -math | 21
seek_video" | Video -math | 21
play_video" | Video -math | 21
pause_video" | Video -math | 21
play_video" | Video - Math and Speed | 22
pause_video" | Video - Math and Speed | 22
stop_video" | Video - Math and Speed | 22
', header = TRUE, sep = "|", quote = "",
strip.white = TRUE, stringsAsFactors = FALSE)