Во-первых, я объединяю два фрейма данных путем перекрестного соединения. И затем я нашел расстояние между двумя точками, используя map
в python. Я использую map
, потому что в большинстве случаев он намного быстрее, чем apply
, itertuples
, iterrows
и т. Д. 1051 *. (Ссылка: { ссылка })
Наконец, я группирую по фрейму данных и выбираю минимальные значения расстояния.
Вот библиотеки,
import pandas as pd
import geopandas
import geopy.distance
from math import radians, cos, sin, asin, sqrt
Здесь используются функции,
def dist1(p1, p2):
lon1, lat1, lon2, lat2 = map(radians, [p1.x, p1.y, p2.x, p2.y])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
return c * 6373
def dist2(p1, p2):
lon1, lat1, lon2, lat2 = map(radians, [p1[0], p1[1], p2[0], p2[1]])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
return c * 6373
def dist3(p1, p2):
x = p1.y, p1.x
y = p2.y, p2.x
return geopy.distance.geodesic(x, y).km
def dist4(p1, p2):
x = p1[1], p1[0]
y = p2[1], p2[0]
return geopy.distance.geodesic(x, y).km
И данные,
city1 = [
{
'City': 'Buenos Aires',
'Country': 'Argentina',
'Latitude': -34.58,
'Longitude': -58.66
},
{
'City': 'Brasilia',
'Country': 'Brazil',
'Latitude': -15.78,
'Longitude': -70.66
},
{
'City': 'Santiago',
'Country': 'Chile ',
'Latitude': -33.45,
'Longitude': -70.66
}
]
city2 = [
{
'City': 'Bogota',
'Country': 'Colombia ',
'Latitude': 4.6,
'Longitude': -74.08
},
{
'City': 'Caracas',
'Country': 'Venezuela',
'Latitude': 10.48,
'Longitude': -66.86
}
]
city1df = pd.DataFrame(city1)
city2df = pd.DataFrame(city2)
Перекрестное соединение с geopandas
фреймами данных,
gcity1df = geopandas.GeoDataFrame(
city1df,
geometry=geopandas.points_from_xy(city1df.Longitude, city1df.Latitude)
)
gcity2df = geopandas.GeoDataFrame(
city2df,
geometry=geopandas.points_from_xy(city2df.Longitude, city2df.Latitude)
)
# cross join geopandas
gcity1df['key'] = 1
gcity2df['key'] = 1
merged = gcity1df.merge(gcity2df, on='key')
math
функций и geopandas
,
# 6.64 ms ± 588 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
%%timeit
# find distance
merged['dist'] = list(map(dist1, merged['geometry_x'], merged['geometry_y']))
mapping = {
'City_x': 'City',
'Country_x': 'Country',
'Latitude_x': 'Latitude',
'Longitude_x': 'Longitude',
'geometry_x': 'geometry',
'City_y': 'Nearest',
'dist': 'Distance'
}
nearest = merged.loc[merged.groupby(['City_x', 'Country_x'])['dist'].idxmin()]
nearest.rename(columns=mapping)[list(mapping.values())]
City Country Latitude Longitude geometry \
2 Brasilia Brazil -15.78 -70.66 POINT (-70.66000 -15.78000)
0 Buenos Aires Argentina -34.58 -58.66 POINT (-58.66000 -34.58000)
4 Santiago Chile -33.45 -70.66 POINT (-70.66000 -33.45000)
Nearest Distance
2 Bogota 2297.922808
0 Bogota 4648.004515
4 Bogota 4247.586882
geopy
и geopandas
,
# 9.99 ms ± 764 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
%%timeit
# find distance
merged['dist'] = list(map(dist3, merged['geometry_x'], merged['geometry_y']))
mapping = {
'City_x': 'City',
'Country_x': 'Country',
'Latitude_x': 'Latitude',
'Longitude_x': 'Longitude',
'geometry_x': 'geometry',
'City_y': 'Nearest',
'dist': 'Distance'
}
nearest = merged.loc[merged.groupby(['City_x', 'Country_x'])['dist'].idxmin()]
nearest.rename(columns=mapping)[list(mapping.values())]
City Country Latitude Longitude geometry \
2 Brasilia Brazil -15.78 -70.66 POINT (-70.66000 -15.78000)
0 Buenos Aires Argentina -34.58 -58.66 POINT (-58.66000 -34.58000)
4 Santiago Chile -33.45 -70.66 POINT (-70.66000 -33.45000)
Nearest Distance
2 Bogota 2285.239605
0 Bogota 4628.641817
4 Bogota 4226.710978
Если вы хотите использовать pandas
вместо geopandas
,
# cross join pandas
city1df['key'] = 1
city2df['key'] = 1
merged = city1df.merge(city2df, on='key')
С math
функциями,
# 8.65 ms ± 2.21 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)
%%timeit
# find distance
merged['dist'] = list(
map(
dist2,
merged[['Longitude_x', 'Latitude_x']].values,
merged[['Longitude_y', 'Latitude_y']].values
)
)
mapping = {
'City_x': 'City',
'Country_x': 'Country',
'Latitude_x': 'Latitude',
'Longitude_x': 'Longitude',
'City_y': 'Nearest',
'dist': 'Distance'
}
nearest = merged.loc[merged.groupby(['City_x', 'Country_x'])['dist'].idxmin()]
nearest.rename(columns=mapping)[list(mapping.values())]
City Country Latitude Longitude Nearest Distance
2 Brasilia Brazil -15.78 -70.66 Bogota 2297.922808
0 Buenos Aires Argentina -34.58 -58.66 Bogota 4648.004515
4 Santiago Chile -33.45 -70.66 Bogota 4247.586882
С geopy
,
# 9.8 ms ± 807 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
%%timeit
# find distance
merged['dist'] = list(
map(
dist4,
merged[['Longitude_x', 'Latitude_x']].values,
merged[['Longitude_y', 'Latitude_y']].values
)
)
mapping = {
'City_x': 'City',
'Country_x': 'Country',
'Latitude_x': 'Latitude',
'Longitude_x': 'Longitude',
'City_y': 'Nearest',
'dist': 'Distance'
}
nearest = merged.loc[merged.groupby(['City_x', 'Country_x'])['dist'].idxmin()]
nearest.rename(columns=mapping)[list(mapping.values())]
City Country Latitude Longitude Nearest Distance
2 Brasilia Brazil -15.78 -70.66 Bogota 2285.239605
0 Buenos Aires Argentina -34.58 -58.66 Bogota 4628.641817
4 Santiago Chile -33.45 -70.66 Bogota 4226.710978