Другим подходом может быть
library(tidyverse)
library(data.table)
#prepare data to count consecutive 0 or 1
df1 <- df %>%
mutate(val = gsub("[()]", "", val)) %>%
separate_rows(val, sep = ",") %>%
separate("val", c("val_pre", "val_post"))
#identify consecutive 0 or 1 - TRUE in 'flag' column indicates consecutive 0 or 1
setDT(df1)[, seq_ind := seq(.N), by = .(date_col, rleid(val_pre))
][, flag := shift(seq_ind, type="lead",) > 1 | seq_ind > 1, by = date_col]
#filter consecutive rows. In there zero's repetition is replaced with min value & 1's repetition with max value
df2 <- setDF(df1) %>%
filter(flag == T) %>%
group_by(date_col, val_pre) %>%
mutate(val_post = ifelse(val_pre == 0, min(val_post), max(val_post))) %>%
#row-bind non-consecutive rows as is
bind_rows(setDF(df1) %>% filter(flag == F | is.na(flag))) %>%
select(-seq_ind, -flag) %>%
distinct() %>%
mutate(cal_val = paste0("(", val_pre, "_", val_post, ")")) %>%
group_by(date_col) %>%
summarise(cal_val = paste(cal_val, collapse = ","))
, который дает
df2
date_col cal_val
1 1/12/2017 0:15 (0_04),(1_08),(0_12),(1_14)
2 1/12/2017 0:30 (0_22),(1_29)
3 1/12/2017 0:45 (1_38),(0_40),(1_44)
4 1/12/2017 1:00 (1_57),(0_59)
5 1/12/2017 1:15 (0_07),(1_14)
Пример данных:
df <- structure(list(date_col = c("1/12/2017 0:15", "1/12/2017 0:30",
"1/12/2017 0:45", "1/12/2017 1:00", "1/12/2017 1:15"), val = c("(0_04),(1_08),(0_12),(1_14)",
"(0_22),(0_25),(1_29)", "(1_34),(1_38),(0_40),(1_44)", "(1_47),(1_49),(1_53),(1_57),(0_59)",
"(0_07),(0_09),(0_10),(0_13),(1_14)")), .Names = c("date_col",
"val"), class = "data.frame", row.names = c(NA, -5L))