Здесь я добавляю время к данным, в которых указывается только дата . Между значениями или 288 значениями на дату есть 5 минут.
Код работает, когда входной фрейм данных составляет 1 день (288 строк) или меньше, но выдает ошибку, когда ввод длиннее. Есть идеи, что мне не хватает? Заранее спасибо.
Соответствующий раздел кода:
import datetime as dt
print("Print df_raw:\n", df_raw)
df = df_raw[:288]
# df = df_raw[:289] # Gives KeyError, see traceback below
print("\nPrint df BEFORE groubpy/apply:\n", df)
df.loc[:,'date'] = pd.to_datetime(df.date)
def f(x):
x['DT']=[val+dt.timedelta(minutes=(pos*5)) for val,pos in zip(x.loc[:,'date'], range(0,len(x.loc[:,'date'])))]
return x
df = df.groupby('date').apply(f)
df = df.set_index('DT').drop(columns='date')
print("\nPrint df AFTER groubpy/apply:\n", df)
Вывод (с 288 строками или меньше, работает как положено):
Print df_raw:
date values
0 2015-03-10 556.25
0 2015-03-10 516.993
0 2015-03-10 468.75
0 2015-03-10 432.812
0 2015-03-10 87.1095
.. ... ...
84 2014-12-16 None
84 2014-12-16 None
84 2014-12-16 160.938
84 2014-12-16 145.118
84 2014-12-16 125.977
[24480 rows x 2 columns]
Print df BEFORE groubpy/apply:
date values
0 2015-03-10 556.25
0 2015-03-10 516.993
0 2015-03-10 468.75
0 2015-03-10 432.812
0 2015-03-10 87.1095
.. ... ...
0 2015-03-10 781.446
0 2015-03-10 743.36
0 2015-03-10 708.985
0 2015-03-10 669.922
0 2015-03-10 632.422
[288 rows x 2 columns]
Print df AFTER groubpy/apply:
values
DT
2015-03-10 00:00:00 556.25
2015-03-10 00:05:00 516.993
2015-03-10 00:10:00 468.75
2015-03-10 00:15:00 432.812
2015-03-10 00:20:00 87.1095
... ...
2015-03-10 23:35:00 781.446
2015-03-10 23:40:00 743.36
2015-03-10 23:45:00 708.985
2015-03-10 23:50:00 669.922
2015-03-10 23:55:00 632.422
[288 rows x 1 columns]
Traceback (с 289 строк или больше, я получаю это):
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\groupby\groupby.py in apply(self, func, *args, **kwargs)
724 try:
--> 725 result = self._python_apply_general(f)
726 except Exception:
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\groupby\groupby.py in _python_apply_general(self, f)
744 return self._wrap_applied_output(
--> 745 keys, values, not_indexed_same=mutated or self.mutated
746 )
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\groupby\generic.py in _wrap_applied_output(self, keys, values, not_indexed_same)
371 elif isinstance(v, DataFrame):
--> 372 return self._concat_objects(keys, values, not_indexed_same=not_indexed_same)
373 elif self.grouper.groupings is not None:
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\groupby\groupby.py in _concat_objects(self, keys, values, not_indexed_same)
954 else:
--> 955 result = result.reindex(ax, axis=self.axis)
956
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
220 def wrapper(*args, **kwargs):
--> 221 return func(*args, **kwargs)
222
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\frame.py in reindex(self, *args, **kwargs)
3975 kwargs.pop("labels", None)
-> 3976 return super().reindex(**kwargs)
3977
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\generic.py in reindex(self, *args, **kwargs)
4513 return self._reindex_axes(
-> 4514 axes, level, limit, tolerance, method, fill_value, copy
4515 ).__finalize__(self)
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\frame.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
3863 frame = frame._reindex_index(
-> 3864 index, method, copy, level, fill_value, limit, tolerance
3865 )
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\frame.py in _reindex_index(self, new_index, method, copy, level, fill_value, limit, tolerance)
3885 fill_value=fill_value,
-> 3886 allow_dups=False,
3887 )
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\generic.py in _reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
4576 allow_dups=allow_dups,
-> 4577 copy=copy,
4578 )
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\internals\managers.py in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy)
1250 if not allow_dups:
-> 1251 self.axes[axis]._can_reindex(indexer)
1252
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\indexes\base.py in _can_reindex(self, indexer)
3361 if not self.is_unique and len(indexer):
-> 3362 raise ValueError("cannot reindex from a duplicate axis")
3363
ValueError: cannot reindex from a duplicate axis
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2896 try:
-> 2897 return self._engine.get_loc(key)
2898 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'date'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-207-872197908aef> in <module>
36 return x
37
---> 38 df = df.groupby('date').apply(f)
39 df = df.set_index('DT').drop(columns='date')
40
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\groupby\groupby.py in apply(self, func, *args, **kwargs)
735
736 with _group_selection_context(self):
--> 737 return self._python_apply_general(f)
738
739 return result
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\groupby\groupby.py in _python_apply_general(self, f)
740
741 def _python_apply_general(self, f):
--> 742 keys, values, mutated = self.grouper.apply(f, self._selected_obj, self.axis)
743
744 return self._wrap_applied_output(
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\groupby\ops.py in apply(self, f, data, axis)
235 # group might be modified
236 group_axes = _get_axes(group)
--> 237 res = f(group)
238 if not _is_indexed_like(res, group_axes):
239 mutated = True
<ipython-input-207-872197908aef> in f(x)
33
34 def f(x):
---> 35 x['DT']=[val+dt.timedelta(minutes=(pos*5)) for val,pos in zip(x.loc[:,'date'], range(0,len(x.loc[:,'date'])))]
36 return x
37
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\indexing.py in __getitem__(self, key)
1416 except (KeyError, IndexError, AttributeError):
1417 pass
-> 1418 return self._getitem_tuple(key)
1419 else:
1420 # we by definition only have the 0th axis
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\indexing.py in _getitem_tuple(self, tup)
803 def _getitem_tuple(self, tup):
804 try:
--> 805 return self._getitem_lowerdim(tup)
806 except IndexingError:
807 pass
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\indexing.py in _getitem_lowerdim(self, tup)
927 for i, key in enumerate(tup):
928 if is_label_like(key) or isinstance(key, tuple):
--> 929 section = self._getitem_axis(key, axis=i)
930
931 # we have yielded a scalar ?
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\indexing.py in _getitem_axis(self, key, axis)
1848 # fall thru to straight lookup
1849 self._validate_key(key, axis)
-> 1850 return self._get_label(key, axis=axis)
1851
1852
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\indexing.py in _get_label(self, label, axis)
158 raise IndexingError("no slices here, handle elsewhere")
159
--> 160 return self.obj._xs(label, axis=axis)
161
162 def _get_loc(self, key: int, axis: int):
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\generic.py in xs(self, key, axis, level, drop_level)
3727
3728 if axis == 1:
-> 3729 return self[key]
3730
3731 self._consolidate_inplace()
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2993 if self.columns.nlevels > 1:
2994 return self._getitem_multilevel(key)
-> 2995 indexer = self.columns.get_loc(key)
2996 if is_integer(indexer):
2997 indexer = [indexer]
~\AppData\Local\Continuum\miniconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2897 return self._engine.get_loc(key)
2898 except KeyError:
-> 2899 return self._engine.get_loc(self._maybe_cast_indexer(key))
2900 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2901 if indexer.ndim > 1 or indexer.size > 1:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'date'
РЕДАКТИРОВАТЬ
Спасибо за помощь. Судя по вашим предложениям, это решение мне подходит, без SettingWithCopyWarning
s:
import pandas as pd
import datetime as dt
# Note: df_raw is set elsewhere...
df = df_raw
df.loc[:,'date'] = pd.to_datetime(df['date'])
em = pd.DataFrame()
for key,t in df.groupby('date'):
temp = t
temp.loc[:,'DT'] = [i+dt.timedelta(minutes=j*5) for i,j in zip(t['date'],range(t['date'].shape[0]))]
em = pd.concat([em,temp])
print(em)
Python: 3.7.4
Pandas: 0.25. 3
Jupyter Lab