Один из подходов может быть
library(tidyverse)
library(lubridate)
library(anytime)
#convert character columns of sample data to date columns
df <- df %>%
mutate_if(grepl("/", .), ~ as.Date(., format = "%d/%m/%Y"))
#identify STARTDATE + 10 (20, 30 ...) years and then process data to have the desired result
df %>%
group_by(ID, DOB) %>%
mutate(temp = paste(seq.Date(unique(DOB), max(STARTDATE), by = "10 year")[-1], collapse = ',')) %>%
separate_rows(temp, sep = ',') %>%
group_by(STARTDATE, add = T) %>%
filter(anydate(temp) == min(anydate(temp)[anydate(temp) > STARTDATE])) %>%
ungroup() %>% #STARTDATE + 10 (20, 30 ...) years are identified here
right_join(df) %>%
mutate(STARTDATE = ifelse(!is.na(temp), paste(STARTDATE, temp, sep = ','), as.character(STARTDATE))) %>%
separate_rows(STARTDATE, sep = ',') %>%
select(-temp)
, что дает
ID DOB STARTDATE ENDDATE FAILURE
1 1 1975-10-10 1984-10-05 2003-05-15 0
2 2 1976-09-15 1984-03-10 1992-06-19 0
3 2 1976-09-15 1986-09-15 1992-06-19 0
4 2 1976-09-15 1993-02-22 1999-01-15 0
5 2 1976-09-15 1996-09-15 1999-01-15 0
6 2 1976-09-15 1999-04-15 2003-01-15 0
Пример данных:
df <- structure(list(ID = c(1L, 2L, 2L, 2L), DOB = c("10/10/1975",
"15/9/1976", "15/9/1976", "15/9/1976"), STARTDATE = c("5/10/1984",
"10/3/1984", "22/2/1993", "15/4/1999"), ENDDATE = c("15/5/2003",
"19/6/1992", "15/1/1999", "15/1/2003"), FAILURE = c(0L, 0L, 0L,
0L)), .Names = c("ID", "DOB", "STARTDATE", "ENDDATE", "FAILURE"
), class = "data.frame", row.names = c(NA, -4L))
# ID DOB STARTDATE ENDDATE FAILURE
#1 1 10/10/1975 5/10/1984 15/5/2003 0
#2 2 15/9/1976 10/3/1984 19/6/1992 0
#3 2 15/9/1976 22/2/1993 15/1/1999 0
#4 2 15/9/1976 15/4/1999 15/1/2003 0