Мы можем использовать data.table
library(data.table)
setDT(df)[df_filter, on = .(id)][year != i.year,
.(year = year[which(year < i.year)[1]]), id]
# id year
#1: 1 2005
#2: 2 2017
#3: 3 1998
Или использовать non-equi
join
setDT(df)[, year1 := year][df_filter, .(id, year),
on = .(id, year1 < year), mult = 'first']
# id year
#1: 1 2005
#2: 2 2017
#3: 3 1998
Или без присвоения (:=
) в исходном наборе данных
setDT(df)[, .(year1 = year, year, id)][df_filter, .(id, year),
on = .(id, year1 < year), mult = 'first']
Или, как прокомментировал @thelatemail
setDT(df)[df_filter, on=.(id, year < year), .(yearM = max(x.year)),
by=.EACHI][, .(id, year = yearM)]
Или используя tidyverse
с fuzzyjoin
library(tidyverse)
library(fuzzyjoin)
fuzzy_left_join(df, df_filter, by = c("id", "year"),
match_fun = list(`==`, `<`)) %>%
group_by(id = id.x) %>%
summarise(year = year.x[which(year.x < year.y)[1]])
# A tibble: 3 x 2
# id year
# <int> <int>
#1 1 2005
#2 2 2017
#3 3 1998
data
df <- structure(list(id = c(1L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 3L),
year = c(2019L, 2018L, 2005L, 2004L, 2018L, 2017L, 1998L,
1997L, 1996L, 1995L)), class = "data.frame", row.names = c(NA,
-10L))
df_filter <- structure(list(id = 1:3, year = c(2017L, 2018L, 2000L)),
class = "data.frame", row.names = c(NA, -3L))