Я добавил решение с евклидовым расстоянием. Я сравнил расстояние между всеми парами и получил наименьшее из них.
# Sample data:
df<-structure(list(ID=c(1:10), lat = c(1.4, 5.8, 4, 6.5, 5.1, 9.3, 8.6, 8.9, 4.4, 1.8)
, lon = c(8.1, 4.8, 7.4, 4.8, 0.4, 5, 6.2, 1, 5, 5.2))
, .Names = c("ID","lat", "lon"), row.names = c(NA, -10L), class = c("data.table","data.frame"))
ref<-structure(list(ID=letters[1:10], lat = c(1:10), lon = c(1:10))
, .Names = c("ID","lat", "lon"), row.names = c(NA, -10L), class = c("data.table","data.frame"))
#Setting to data.table format
setDT(df)
setDT(ref)
#creating a table with cartesian join
df1<-setkey(df[,c(k=1,.SD)],k)[ref[,c(k=1,.SD)],allow.cartesian=TRUE][,k:=NULL]
#calculating the Euclidean distance and giving a rank in ascending order of distance
df1[,EuDist:=sqrt((lat-i.lat)^2+(lon-i.lon)^2)][,distRank:=rank(EuDist,ties="random"),by=.(ID)]
#selecting the shortest distance
df1<-df1[distRank==1]