Вот решение data.table
.Это похоже на @Parfait за исключением того, что я заранее добавляю идентификатор, чтобы я мог сжать его без необходимости повторного соединения с исходной таблицей data.table.
library(data.table)
A_dt[, ID := seq_len(.N), by = Year_month]
A_dt[B_dt
, on = 'Year_month'
, .(Year_month, ID, Res = X1 - Longitude, X1, Longitude, CHL)
, allow.cartesian = T
][, .SD[which.min(Res), ] , by = .(Year_month, ID)]
Year_month ID Res X1 Longitude CHL
1: 1999_1 1 6.9570 19.3945 12.4375 12.58700
2: 1999_1 2 6.9415 19.3790 12.4375 12.58700
3: 1999_1 3 6.7698 19.2073 12.4375 12.58700
4: 2000_1 1 6.6420 19.2670 12.6250 13.06914
5: 2000_1 2 6.1350 18.7600 12.6250 13.06914
6: 2000_1 3 6.7255 19.3505 12.6250 13.06914
Вот базовое решение R, аналогичноеподход.
A$ID <- ave(seq_len(nrow(A)), A$Year_month, FUN = seq_along)
A2 <- merge(A, B, by = 'Year_month')
A2$Difference <- A2$X1 - A2$Longitude
A2$Min_Diff <- ave(A2$Difference, A2$Year_month, A2$ID, FUN = min)
A2[A2$Min_Diff == A2$Difference, c('Year_month', 'ID', 'Difference', 'X1', 'Longitude', 'CHL')]
Наконец, поскольку были некоторые проблемы с типами данных, вот несколько способов, которые я затем преобразовал:
A <- data.frame("X1"=c("19.3945","19.379", "19.2073", "19.267", "18.760", "19.3505"),
"Year_month" = c("1999_1", "1999_1", "1999_1", "2000_1", "2000_1", "2000_1"))
B <- data.frame( "Longitude"=c("12.3125", "12.375", "12.4375","12.5", "12.5625", "12.625" ),
"Year_month"=c("1999_1", "1999_1", "1999_1", "2000_1", "2000_1", "2000_1"),
"CHL"=c( 12.70245, 12.63853, 12.58700, 12.61019, 12.75727, 13.06914))
#If the whole data.frame is a factor, we can just remake the data.frame
A <- data.frame(lapply(A, as.character), stringsAsFactors = F)
A$X1 <- as.numeric(A$X1)
#For the B data.frame, I didn't want to use the lapply trick because 'CHL' was already good.
B$Longitude <- as.numeric(as.character(B$Longitude))
B$Year_Month <- as.character(B$Year_month)
# Alternatively, you address the data types on making your data.frames
# Note, you can add "stringsAsFactors = F" to the data.frame call and we could have skipped a step.
A_dt <- data.table(X1 = as.numeric(c("19.3945","19.379", "19.2073", "19.267", "18.760", "19.3505"))
, Year_month = c("1999_1", "1999_1", "1999_1", "2000_1", "2000_1", "2000_1"))
B_dt <- data.table(Longitude= as.numeric(c("12.3125", "12.375", "12.4375","12.5", "12.5625", "12.625"))
, Year_month=c("1999_1", "1999_1", "1999_1", "2000_1", "2000_1", "2000_1")
, CHL=c( 12.70245, 12.63853, 12.58700, 12.61019, 12.75727, 13.06914))