При большом наборе данных (> 40 миллионов строк) пакет data.table может быть хорошим выбором:
library(data.table)
cond1 <- c("D95", "A01")
setDT(df)[, condit := ifelse(any(icpc %in% cond1 | icpc2 %in% cond1), "yes","no"), by=id]
df
id icpc icpc2 reg.date condit
1: 123 D95 F15 19JUN2015 yes
2: 123 F85 15AUG2016 yes
3: 332 A01 16MAR2010 yes
4: 332 A04 20JAN2018 yes
5: 332 K20 20FEB2017 yes
6: 100 B10 01JUN2017 no
7: 100 A04 11JAN2008 no
8: 113 T08 18MAR2018 yes
9: 113 P28 19JAN2017 yes
10: 113 D95 A01 16JAN2013 yes
11: 113 A04 01MAY2009 yes
12: 551 B12 A01 03APR2011 yes
13: 551 D95 09MAY2015 yes
Данные :
df <- structure(list(id = c(123L, 123L, 332L, 332L, 332L, 100L, 100L,
113L, 113L, 113L, 113L, 551L, 551L), icpc = c("D95", "F85", "A01",
"A04", "K20", "B10", "A04", "T08", "P28", "D95", "A04", "B12",
"D95"), icpc2 = c("F15", "", "", "", "", "", "", "", "", "A01",
"", "A01", ""), reg.date = c("19JUN2015", "15AUG2016", "16MAR2010",
"20JAN2018", "20FEB2017", "01JUN2017", "11JAN2008", "18MAR2018",
"19JAN2017", "16JAN2013", "01MAY2009", "03APR2011", "09MAY2015"
)), class = "data.frame", row.names = c(NA, -13L))
Редактировать : для нескольких условий:
cond1 <- c("D95", "A01") # A
cond2 <- c("A04", "T08") # B
cond3 <- "B10" # C
setDT(df)[, condit := if(any(icpc %in% cond1 | icpc2 %in% cond1)) "A" else
if(any(icpc %in% cond2 | icpc2 %in% cond2)) "B" else
if(any(icpc %in% cond3 | icpc2 %in% cond3)) "C" else "", by=id]
id icpc icpc2 reg.date condit
1: 123 D95 F15 19JUN2015 A
2: 123 F85 15AUG2016 A
3: 332 A01 16MAR2010 A
4: 332 A04 20JAN2018 A
5: 332 K20 20FEB2017 A
6: 100 B10 01JUN2017 B
7: 100 A04 11JAN2008 B
8: 113 T08 18MAR2018 A
9: 113 P28 19JAN2017 A
10: 113 D95 A01 16JAN2013 A
11: 113 A04 01MAY2009 A
12: 551 B12 B10 03APR2011 C
13: 551 D96 09MAY2015 C
Данные : (слегка изменено по сравнению с оригиналом, так как не найдено условие "C".
df <- structure(list(id = c(123L, 123L, 332L, 332L, 332L, 100L, 100L,
113L, 113L, 113L, 113L, 551L, 551L), icpc = c("D95", "F85", "A01",
"A04", "K20", "B10", "A04", "T08", "P28", "D95", "A04", "B12",
"D96"), icpc2 = c("F15", "", "", "", "", "", "", "", "", "A01",
"", "B10", ""), reg.date = c("19JUN2015", "15AUG2016", "16MAR2010",
"20JAN2018", "20FEB2017", "01JUN2017", "11JAN2008", "18MAR2018",
"19JAN2017", "16JAN2013", "01MAY2009", "03APR2011", "09MAY2015"
)), class = "data.frame", row.names = c(NA, -13L))
Проверено на фрейме данных с 40M строками: system.time (...)
# user system elapsed
# 111.11 1.17 111.97
Использование dplyr:
# Error: cannot allocate vector of size 274.7 Mb
# Timing stopped at: 4.19 1.11 5.39