Я пытаюсь найти и отфильтровать Точки в GeoDataFrame (df1), которые близки к Точкам во втором GDF (df2), и наоборот. Я использую этот код для него:
ps1 = []
ps2 = []
for p1 in df1.geometry:
for p2 in df2.geometry:
dist = haversine(p1.y,p1.x,p2.y,p2.x)
if dist < 100:
ps1.append(p1)
ps2.append(p2)
df1 = df1[df1.geometry.isin(ps1)]
df2 = df2[df2.geometry.isin(ps2)]
Однако в последней строке я получаю сообщение об ошибке: TypeError: unhashable type: 'Point'
Но строка над ним работает как шарм, и типы данных обеих линий (df1 / df2 и ps1 / ps2) абсолютно одинаковы.
Как это возможно? И как это можно решить?
РЕДАКТИРОВАТЬ:
типы переменных:
df1 : <class 'geopandas.geodataframe.GeoDataFrame'>
df1.geometry: <class 'geopandas.geoseries.GeoSeries'>
ps1 : <class 'list'>
val1 : <class 'pandas.core.series.Series'>
df2 : <class 'geopandas.geodataframe.GeoDataFrame'>
df2.geometry: <class 'geopandas.geoseries.GeoSeries'>
ps2 : <class 'list'>
РЕДАКТИРОВАТЬ 2:
df1.dtypes
Out[301]:
lat float64
lon float64
time datetime64[ns, UTC]
geometry geometry
dtype: object
df2.dtypes
Out[302]:
lat float64
lon float64
time datetime64[ns, UTC]
geometry geometry
dtype: object
MWE:
import pandas as pd
from pandas import Timestamp
import geopandas as gpd
import numpy as np
def haversine(lat1, lon1, lat2, lon2, to_radians=True, earth_radius=6371000):
"""
slightly modified version: of http://stackoverflow.com/a/29546836/2901002
Calculate the great circle distance between two points
on the earth (specified in decimal degrees or in radians)
All (lat, lon) coordinates must have numeric dtypes and be of equal length.
"""
if to_radians:
lat1, lon1, lat2, lon2 = np.radians([lat1, lon1, lat2, lon2])
a = np.sin((lat2-lat1)/2.0)**2 + \
np.cos(lat1) * np.cos(lat2) * np.sin((lon2-lon1)/2.0)**2
return earth_radius * 2 * np.arcsin(np.sqrt(a))
df1 = pd.DataFrame.from_dict({'lat': {0: 52.378851603519905,
1: 52.37896949048437,
2: 52.378654032960824,
3: 52.37818902922923},
'lon': {0: 4.88585622453752,
1: 4.886671616078047,
2: 4.886413945242339,
3: 4.885995520636016},
'time': {0: Timestamp('2019-11-05 11:31:42+0000', tz='UTC'),
1: Timestamp('2019-11-05 11:32:22+0000', tz='UTC'),
2: Timestamp('2019-11-05 11:32:49+0000', tz='UTC'),
3: Timestamp('2019-11-05 11:33:31+0000', tz='UTC')}})
df2 = pd.DataFrame.from_dict({'lat': {0: 52.378851603519905,
1: 52.369466977365214,
2: 52.36923115238693,
3: 52.36898222465506},
'lon': {0: 4.88585622453752,
1: 4.9121331184582,
2: 4.912723204441477,
3: 4.913505393878495},
'time': {0: Timestamp('2019-11-05 08:54:32+0000', tz='UTC'),
1: Timestamp('2019-11-05 08:55:06+0000', tz='UTC'),
2: Timestamp('2019-11-05 08:55:40+0000', tz='UTC'),
3: Timestamp('2019-11-05 08:56:22+0000', tz='UTC')}})
df1 = gpd.GeoDataFrame(df1, geometry=gpd.points_from_xy(df1.lat, df1.lon))
df2 = gpd.GeoDataFrame(df2, geometry=gpd.points_from_xy(df2.lat, df2.lon))
ps1 = []
ps2 = []
for p1 in df1.geometry:
for p2 in df2.geometry:
dist = haversine(p1.y,p1.x,p2.y,p2.x)
if dist < 100:
ps1.append(p1)
ps2.append(p2)
val1 = gpd.GeoDataFrame(df1)
val2 = gpd.GeoDataFrame(df2)
# print(type(df1))
# print(type(df2))
# print(type(ps1))
# print(type(ps2))
print('df1 : ', type(df1))
print('df1.geometry: ', type(df1.geometry))
print('ps1 : ', type(ps1))
val1 = df1.geometry.isin(ps1)
print('val1 : ', type(val1))
print('df2 : ', type(df2))
print('df2.geometry: ', type(df2.geometry))
print('ps2 : ', type(ps2))
val2 = df2.geometry.isin(ps2)
print('val2 : ', type(val2))
# df1 = df1[df1.geometry.isin(ps1)]
# df2 = df2[df2.geometry.isin(ps2)]