В терминах SQL это трехстороннее соединение. Это можно сделать в одном SQL-запросе, например:
library(sqldf)
sqldf("
select s.*, es.empl_num, em.team, em.skill_lvl
from SalesData s
left join EmployeeSales es
using (sale_id)
left join EmployeeMap em
on es.empl_num = em.empl_num and s.sale_date between em.start_date and em.end_date
")
Используя данные в примечании в конце (на основе данных, показанных в вопросе), мы получаем следующее. В данных EmployeeMap, показанных в вопросе, присутствуют только первые 4 числа сотрудников, а левые объединения гарантируют, что мы получим значения NA для команды и уровень квалификации других вместо того, чтобы эти строки SalesData были отброшены из-за несоответствия.
sale_id sale_amt int_rate sale_date sale_status empl_num team skill_lvl
1 1 7000 10.71 2008-05-01 Fully Paid 4 Red Beg
2 2 10800 13.57 2009-11-01 Fully Paid 4 Red Int
3 3 7500 10.08 2008-04-01 Fully Paid 9 <NA> <NA>
4 4 3000 14.26 2009-09-01 Fully Paid 1 Red Int
5 5 5600 14.96 2010-02-01 Charged Off 1 Red Int
6 6 2800 11.49 2010-08-01 Fully Paid 6 <NA> <NA>
7 7 10000 8.59 2009-10-01 Fully Paid 9 <NA> <NA>
8 8 18000 10.39 2008-03-01 Fully Paid 9 <NA> <NA>
9 9 5000 15.13 2008-04-01 Fully Paid 7 <NA> <NA>
10 10 9600 12.29 2008-03-01 Fully Paid 9 <NA> <NA>
Примечание
Исходные данные в воспроизводимом виде:
SalesData <- structure(list(sale_id = 1:10, sale_amt = c(7000L, 10800L, 7500L,
3000L, 5600L, 2800L, 10000L, 18000L, 5000L, 9600L), int_rate = c(10.71,
13.57, 10.08, 14.26, 14.96, 11.49, 8.59, 10.39, 15.13, 12.29),
sale_date = structure(c(3L, 6L, 2L, 4L, 7L, 8L, 5L, 1L, 2L,
1L), .Label = c("2008-03-01", "2008-04-01", "2008-05-01",
"2009-09-01", "2009-10-01", "2009-11-01", "2010-02-01", "2010-08-01"
), class = "factor"), sale_status = structure(c(2L, 2L, 2L,
2L, 1L, 2L, 2L, 2L, 2L, 2L), .Label = c("Charged Off", "Fully Paid"
), class = "factor")), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10"))
EmployeeSales <-
structure(list(sale_id = 1:10, empl_name = structure(c(2L, 2L,
5L, 1L, 1L, 3L, 5L, 5L, 4L, 5L), .Label = c("Adel", "Dakota",
"Farah", "Ida", "Kami"), class = "factor"), empl_num = c(4L,
4L, 9L, 1L, 1L, 6L, 9L, 9L, 7L, 9L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10"))
EmployeeMap <- structure(list(empl_num = c(1L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L,
4L), empl_name = structure(c(1L, 1L, 1L, 2L, 2L, 3L, 3L, 4L,
4L, 4L), .Label = c("Adel", "Bailey", "Casey", "Dakota"), class = "factor"),
skill_lvl = structure(c(2L, 3L, 1L, 2L, 2L, 2L, 3L, 2L, 3L,
1L), .Label = c("Adv", "Beg", "Int"), class = "factor"),
team = structure(c(2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L
), .Label = c("Blue", "Red"), class = "factor"), start_date = structure(c(1L,
2L, 6L, 4L, 8L, 4L, 7L, 1L, 3L, 5L), .Label = c("2007-06-01",
"2008-06-01", "2009-09-01", "2010-08-01", "2010-09-01", "2010-11-01",
"2011-01-01", "2011-05-01"), class = "factor"), end_date = structure(c(1L,
4L, 8L, 6L, 8L, 5L, 8L, 2L, 3L, 7L), .Label = c("2008-05-31",
"2009-08-30", "2010-08-30", "2010-10-31", "2010-12-31", "2011-04-30",
"2011-08-30", "2999-12-12"), class = "factor")), class = "data.frame",
row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"))