Есть два способа сделать это. Выберите тот, который соответствует вашим предпочтениям.
1-й вариант:
#here I left join twice and select columns each time to ensure there is no duplication like '.x' '.y'
finalDf = DS1 %>%
dplyr::left_join(DS2 %>%
dplyr::select(ID,EducationLevel1=EducationLevel),by=c('ID')) %>%
dplyr::left_join(DS2 %>%
dplyr::select(Name,EducationLevel2=EducationLevel),by=c('Name')) %>%
dplyr::mutate(FinalEducationLevel = ifelse(is.na(EducationLevel1),EducationLevel2,EducationLevel1))
2-й вариант:
#first find the IDs which are present in the 2nd dataset
commonIds = DS1 %>%
dplyr::inner_join(DS2 %>%
dplyr::select(ID,EducationLevel),by=c('ID'))
#now the records where ID was not present in DS2
idsNotPresent = DS1 %>%
dplyr::filter(!ID %in% commonIds$ID) %>%
dplyr::left_join(DS2 %>%
dplyr::select(Name,EducationLevel),by=c('Name'))
#bind these two dfs to get the final df
finalDf = bind_rows(commonIds,idsNotPresent)
Дайте мне знать, если это работает.