Мы можем преобразовать переменные времени в класс POSIXct
, затем для каждой комбинации species
и date
получить минимальное значение start.time
, максимальное значение end.time
и получить unique
значение remarks
.
library(dplyr)
df %>%
mutate_at(vars(ends_with('time')), as.POSIXct) %>%
group_by(species, date) %>%
summarise(start.time = min(start.time, na.rm = TRUE),
end.time = max(end.time, na.rm = TRUE),
remarks = toString(unique(na.omit(remarks))))
# species date start.time end.time remarks
# <fct> <fct> <dttm> <dttm> <chr>
#1 A 2020-02-01 2020-02-01 08:00:00 2020-02-02 00:00:00 slow
#2 A 2020-02-02 2020-02-01 08:00:00 2020-02-01 11:30:00 medium
Делать это в базе R:
#Convert time to POSIXct
df[3:4] <- lapply(df[3:4], as.POSIXct)
#Get min start.time
df1 <- aggregate(start.time~species + date, df, min, na.rm = TRUE,
na.action = "na.pass")
#Get max end.time
df2 <- aggregate(end.time~species + date, df, max, na.rm = TRUE,
na.action = "na.pass")
#Get combined remarks
df3 <- aggregate(remarks~species + date, df, function(x)
toString(unique(na.omit(x))), na.action = "na.pass")
#merge into one dataset
Reduce(merge, list(df1, df2, df3))
data
df <- structure(list(species = structure(c(1L, 1L, 1L, 1L), .Label = "A",
class = "factor"), date = structure(c(1L, 1L, 2L, 2L), .Label = c("2020-02-01",
"2020-02-02"), class = "factor"), start.time = structure(c(1L,
2L, 1L, NA), .Label = c("2020-02-018:00:00", "2020-02-019:00:00"
), class = "factor"), end.time = structure(c(2L, 3L, NA,
1L), .Label = c("2020-02-0111:30:00", "2020-02-0112:00:00",
"2020-02-0200:00:00"), class = "factor"), remarks = structure(c(2L,
NA, 1L, NA), .Label = c("medium", "slow"), class = "factor")),
class = "data.frame", row.names = c(NA, -4L))