Вот простое dplyr
решение
# example of full dataset
df_full = data.frame(subject = c(1,1,1,1,2,2,2,2,3,3,3,3,4),
latency = 1:13)
# example of smaller dataset
df_small = data.frame(subject = c(1,2,2,2,3,3,3),
latency = c(2,5,6,7,8,10,12))
library(dplyr)
df_full %>% count(subject) %>% # count rows for each subject in full dataset
left_join(df_small %>% count(subject), by="subject") %>% # count rows for each subject in small dataset and join
filter(n.y / n.x >= 0.75) %>% # keep only subjects where we have 75% or more of their data
pull(subject) -> subj_vec # save the subjects as a vector
# use that vector to filter your smaller dataset
df_small %>% filter(subject %in% subj_vec)
# subject latency
# 1 2 5
# 2 2 6
# 3 2 7
# 4 3 8
# 5 3 10
# 6 3 12