Самый быстрый способ сделать это - использовать мощный пакет data.table .
library(data.table)
dt = data.table( Customer = c("A", "A", "B", "B"),
Cluster = rep("D", 4),
Segment = c("A", "B", "C", "D"),
Visit = c( 2018, 2019, 2017, 2018),
Earliest_Visit = c( 2018, 2018, 2017, 2017)
)
dt[ , dup := duplicated( dt, by = "Earliest_Visit" ) ]
dt[ (dup), Earliest_Visit := NA_integer_ ]
dt[ , dup := NULL ]
> dt
Customer Cluster Segment Visit Earliest_Visit
1: A D A 2018 2018
2: A D B 2019 NA
3: B D C 2017 2017
4: B D D 2018 NA