Я попробовал ниже двумя способами
df_weather = pd.read_sql_table('cityweathers', self.engine)
def process_city_weather(df_weather):
df_weather.loc[:, "date_create"] = pd.to_datetime(df_weather[["year", "month","day"]])
# df_weather.loc[:, "update"] = pd.to_datetime(df_weather.time_create, unit='ms').dt.tz_localize('UTC').dt.tz_convert('Asia/Shanghai').dt.tz_localize(None)
df_weather = df_weather.drop_duplicates(['date_create'], keep='last')
# method 1
df_weather.loc[:, 'fineday'] = df_weather.weather.apply(lambda x: '雨' not in x).astype(int)
# method 2
df_weather['fineday'] = df_weather.weather.apply(lambda x: '雨' not in x).astype(int)
Оба не работают,
Я включил pd.options.mode.chained_assignment = 'raise'
, чтобы узнать, откуда приходят предупреждения. Ошибка на первом пути:
Traceback (most recent call last):
File "map_reduce.py", line 743, in <module>
File "map_reduce.py", line 738, in main
delayed(purchase_count_forecaster)(order_path, stock_path) for order_path, stock_path in read_map_by_store_id()
File "C:\Anaconda3\lib\site-packages\joblib\parallel.py", line 983, in __call__
if self.dispatch_one_batch(iterator):
File "C:\Anaconda3\lib\site-packages\joblib\parallel.py", line 825, in dispatch_one_batch
File "C:\Anaconda3\lib\site-packages\joblib\parallel.py", line 782, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "C:\Anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 182, in apply_async
result = ImmediateResult(func)
File "C:\Anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 545, in __init__
self.results = batch()
File "C:\Anaconda3\lib\site-packages\joblib\parallel.py", line 261, in __call__
for func, args, kwargs in self.items]
File "C:\Anaconda3\lib\site-packages\joblib\parallel.py", line 261, in <listcomp>
for func, args, kwargs in self.items]
File "map_reduce.py", line 391, in purchase_count_forecaster
df_weather = process_city_weather(df_weather)
File "e:\pp\sales-forecast\mlc\preprocess\weather.py", line 14, in process_city_weather
df_weather['fineday'] = df_weather.weather.apply(lambda x: '雨' not in x).astype(int)
File "C:\Anaconda3\lib\site-packages\pandas\core\frame.py", line 3119, in __setitem__
self._set_item(key, value)
File "C:\Anaconda3\lib\site-packages\pandas\core\frame.py", line 3201, in _set_item
File "C:\Anaconda3\lib\site-packages\pandas\core\generic.py", line 2712, in _check_setitem_copy
raise com.SettingWithCopyError(t)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
Я видел Как бороться с SettingWithCopyWarning в Pandas? , но он не покрывает мой случай
Функция ввода, как показано ниже
from joblib import Parallel, delayed
def main():
process_num = 1 if DEBUG else 4
delayed(purchase_count_forecaster)(order_path, stock_path) for order_path, stock_path in read_map_by_store_id()
def purchase_count_forecaster(order_path, stock_path):
hdfs = HdfsClient(host=HDFS_HOST, port=HDFS_PORT)
db = Database(DB_URL)
df_weather = db.read_weathers() # pd.read_sql_table here
df_weather = process_city_weather(df_weather)
# ....
