Мы можем replace
значений в 'динамике' до NA на основе 'dup', а затем fill
с предыдущим значением, отличным от NA
library(dplyr)
library(tidyr)
df %>%
mutate(speaker = replace(speaker, as.logical(dup), NA)) %>%
fill(speaker)
# id speaker text dup
#1 1 GHS how are you 0
#2 2 GHS yea 1
#3 3 CHA where is it 0
#4 4 CHA I cant find it 0
#5 5 CHA did you 0
#6 6 CHA what 1
#7 7 CHA did you find it 0
Или за один шаг с na.locf0
из zoo
library(zoo)
df$speaker <- with(df, na.locf0(replace(speaker, as.logical(dup), NA)))
Или, если есть только отдельные случаи,
with(df, ifelse(dup ==1, lag(speaker), speaker))
данные
df <- structure(list(id = 1:7, speaker = c("GHS", "yea", "CHA", "CHA",
"CHA", "what", "CHA"), text = c("how are you", "yea", "where is it",
"I cant find it", "did you", "what", "did you find it"), dup = c(0L,
1L, 0L, 0L, 0L, 1L, 0L)), class = "data.frame", row.names = c(NA,
-7L))