library('data.table')
setDT(data)
setDT(key_df)
setnames(key_df, "names", "author") # rename names to author column in key_df
data[key_df, on = c('author', 'year'), nomatch = 0] # match on only author and year values between data and key_df datatables, and show the matched rows of data.
# DocID author year jobs
# 1: 11 Susan 85 VP
# 2: 2 Jim 85-86 VP
# 3: 13 Gail 85-86 Director
# 4: 8 Emily 85-86 Assistant
# 5: 16 Emily 85-86 Assistant
# 6: 5 Anne 85-86 Intern
# 7: 6 Anne 85-86 Intern
ИЛИ
# if you want to add jobs column in your data
data[key_df, on = c('author', 'year'), jobs := jobs]
data
вектор длины (автор), который возвращает исправленное название должности
data[key_df, on = c('author', 'year'), nomatch = 0, author]
# [1] "Susan" "Jim" "Gail" "Emily" "Emily" "Anne" "Anne"
# unique authors
data[key_df, on = c('author', 'year'), nomatch = 0, unique(author)]
# [1] "Susan" "Jim" "Gail" "Emily" "Anne"
Данные:
jobs <- c("Prez", "CEO", "VP", "VP", "Director", "Assistant", "Secretary", "Intern")
names <- c("Susan", "Bob", "Susan", "Jim", "Gail", "Emily", "Dan", "Anne")
year <- c("86", "85-86", "85", "85-86", "85-86", "85-86", "85-86", "85-86")
key_df <- data.frame(jobs, names, year, stringsAsFactors = FALSE)
DocID <- c(1:20)
set.seed(2L)
author <- sample(c(jobs,names), 20, replace=T)
data <- data.frame(DocID, author, stringsAsFactors = FALSE)
data$year <- c( key_df$year, key_df$year, key_df$year[1:4])