Мы можем использовать слияние. Вот подход data.table.
library(data.table)
setDT(OriginalData)
setDT(NewData)
Merge <- merge(OriginalData,NewData,on = c("sex","Pclass","Embarked"))
Merge[, Age := as.numeric(Age)]
Merge[is.na(Age), Age := age]
Merge[,.(Name,Pclass,Embarked,Age)]
# Name Pclass Embarked Age
#1: Cumings, Mrs. John Bradley (Florence Briggs Thayer) 1 C 38.0000
#2: Futrelle, Mrs. Jacques Heath (Lily May Peel) 1 S 35.0000
#3: Heikkinen, Miss. Laina 3 S 26.0000
#4: Moran, Mr. James 3 Q 26.7381
#5: Braund, Mr. Owen Harris 3 S 22.0000
#6: Allen, Mr. William Henry 3 S 35.0000
Данные
OriginalData <- structure(list(Row = 1:6, PassengerId = 1:6, Survived = c(0L,
1L, 1L, 1L, 0L, 0L), Pclass = c(3L, 1L, 3L, 1L, 3L, 3L), Name = structure(c(2L,
3L, 5L, 4L, 1L, 6L), .Label = c("Allen, Mr. William Henry", "Braund, Mr. Owen Harris",
"Cumings, Mrs. John Bradley (Florence Briggs Thayer)", "Futrelle, Mrs. Jacques Heath (Lily May Peel)",
"Heikkinen, Miss. Laina", "Moran, Mr. James"), class = "factor"),
Sex = structure(c(2L, 1L, 1L, 1L, 2L, 2L), .Label = c("female",
"male"), class = "factor"), Age = c(22L, 38L, 26L, 35L, 35L,
NA), SibSp = c(1L, 1L, 0L, 1L, 0L, 0L), Parch = c(0L, 0L,
0L, 0L, 0L, 0L), Ticket = structure(c(4L, 5L, 6L, 1L, 3L,
2L), .Label = c("113803", "330877", "373450", "A/5 21171",
"PC 17599", "STON/O2. 3101282"), class = "factor"), Fare = c(7.25,
71.2833, 7.925, 53.1, 8.05, 8.4583), Cabin = structure(c(NA,
2L, NA, 1L, NA, NA), .Label = c("C123", "C85"), class = "factor"),
Embarked = structure(c(3L, 1L, 3L, 3L, 3L, 2L), .Label = c("C",
"Q", "S"), class = "factor"), isTrain_set = c(TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE)), class = "data.frame", row.names = c(NA,
-6L))
NewData <- structure(list(Pclass = c(3L, 2L, 3L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L), Embarked = structure(c(1L,
1L, 3L, 1L, 2L, 3L, 2L, 1L, 3L, 2L, 3L, 2L, 3L, 1L, 1L, 3L, 2L,
2L), .Label = c("C", "Q", "S"), class = "factor"), Sex = structure(c(1L,
1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
2L), .Label = c("female", "male"), class = "factor"), age = c(16.81818,
19.36364, 22.85477, 24.12947, 24.33333, 26.14624, 26.7381, 27.26923,
28.45516, 30, 30.4917, 35, 36.04545, 38.10769, 40.04762, 41.70598,
44, 53.75)), class = "data.frame", row.names = c("1", "2", "3",
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15",
"16", "17", "18"))