Решение Base R:
# Convert factors to dates:
cleaned_df <- within(df, {
Date_start <- as.Date(sapply(Date_start, as.character), "%Y-%m-%d")
Date <- as.Date(sapply(Date, as.character), "%Y-%m-%d")
}
)
# Aggregate to find the min Date per id:
data.frame(do.call("rbind", lapply(split(cleaned_df, cleaned_df$ID),
function(x){
data.frame(ID = unique(x$ID), Date = x$Date[which.min(x$Date_start - x$Date)])
}
)
),
row.names = NULL
)
Решение Tidyverse:
library(tidyverse)
df %>%
mutate_if(str_detect(tolower(names(.)), "date"), funs(as.Date(., "%Y-%m-%d"))) %>%
group_by(ID) %>%
summarise(Date = Date[which.min(Date - Date_start)]) %>%
ungroup()
Данные спасибо @Ronak Shah:
df <-
structure(
list(
ID = c(1L, 1L, 2L, 2L, 2L),
Date_start = structure(
c(1L,
1L, 2L, 2L, 2L),
.Label = c("2016-11-02", "2019-12-22"),
class = "factor"
),
Date = structure(
c(5L, 1L, 3L, 4L, 2L),
.Label = c("2015-1-18",
"2017-12-1", "2017-3-2", "2019-2-9", "2020-2-22"),
class = "factor"
)
),
class = "data.frame",
row.names = c(NA,-5L)
)