Мы можем выбрать вторую последнюю custody_start
дату и last
custody_end
дату для каждого child_id
.
library(dplyr)
df %>%
group_by(child_id) %>%
summarise(custody_start = nth(custody_start, max(n() - 1,1)),
custody_end = last(custody_end))
# child_id custody_start custody_end
# <int> <fct> <fct>
#1 1 04/15/2018 05/01/2018
#2 2 05/29/2018 06/16/2018
#3 3 01/09/2019 06/09/2019
#4 4 03/15/2020 03/29/2020
данные
df <- structure(list(child_id = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L,
4L), custody_start = structure(c(1L, 5L, 6L, 7L, 8L, 10L, 4L,
2L, 9L, 3L), .Label = c("01/01/2018", "01/09/2019", "03/15/2020",
"03/22/2018", "04/15/2018", "05/01/2018", "05/26/2018", "05/29/2018",
"06/09/2019", "06/16/2018"), class = "factor"), custody_end = structure(c(1L,
3L, 4L, 6L, 8L, 9L, 10L, 5L, 7L, 2L), .Label = c("03/29/2018",
"03/29/2020", "04/30/2018", "05/01/2018", "05/09/2019", "05/28/2018",
"06/09/2019", "06/15/2018", "06/16/2018", "07/15/2019"), class = "factor")),
class = "data.frame", row.names = c(NA, -10L))