У меня есть такой фрейм данных * EDITED
StartDate EndDate Company Location
2019-01-15 2019-01-31 1.0 121.0
2019-02-01 2020-03-10 1.0 136.0
2006-10-02 2020-03-10 2.0 136.0
2003-07-31 2020-03-10 2.0 321.0
2010-11-03 2020-03-10 3.0 322.0
2013-02-01 2017-02-07 4.0 375.0
2017-02-08 2019-01-14 4.0 375.0
2019-01-15 2019-04-29 4.0 375.0
2019-04-30 2020-03-10 4.0 375.0
Как упомянуто в этой ссылке: Pandas: диапазон декомпрессии даты к отдельным датам Я хотел, чтобы он распаковывался только до 1 поле, которое является датой. Я следовал шаг за шагом в решении. Однако, когда я пытаюсь сделать group by с помощью resample, я получаю эту ошибку: ValueError: cannot reindex a non-unique index with a method or limit
По какой причине это происходит?
Чтобы быть более понятным, это мой код (индекс исходного кадра данных просто нормальный index 1, 2, 3, ...
df=read_parquet('company_location.parquet')
df=df[['COMPANY','STARTDATE','ENDDATE','LOCATION']]
df['STARTDATE']=pd.to_datetime(df['STARTDATE'])
df['ENDDATE']=pd.to_datetime(df['ENDDATE'])
df=df.dropna(axis=0,how='any')
df['rows']=range(len(df))
starts=df[['COMPANY','STARTDATE','LOCATION','rows']].rename(columns={'STARTDATE':'DATE'})
ends=df[['COMPANY','ENDDATE','LOCATION','rows']].rename(columns={'ENDDATE':'DATE'})
df_decomp=pd.concat([starts,ends])
df_decomp=df_decomp.set_index('rows', append=True)
df_decomp.sort_index()
Все хорошо, пока здесь.
тогда, когда я написал эту строку, возникает ошибка:
df_decomp=df_decomp.groupby(level=[0,1]).apply(lambda x: x.set_index('DATE').resample('D').fillna(method='pad'))
Ошибка: (это Jupyter Notebook)
ValueError Traceback (most recent call last)
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in apply(self, func, *args, **kwargs)
688 try:
--> 689 result = self._python_apply_general(f)
690 except Exception:
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in _python_apply_general(self, f)
706 keys, values, mutated = self.grouper.apply(f, self._selected_obj,
--> 707 self.axis)
708
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/groupby/ops.py in apply(self, f, data, axis)
189 group_axes = _get_axes(group)
--> 190 res = f(group)
191 if not _is_indexed_like(res, group_axes):
<ipython-input-29-e5d0ce53cd1c> in <lambda>(x)
----> 1 rep_movement_decomp=rep_movement_decomp.groupby(level=[0,1]).apply(lambda x: x.set_index('DATE').resample('D').fillna(method='pad'))
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/resample.py in fillna(self, method, limit)
759 """
--> 760 return self._upsample(method, limit=limit)
761
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/resample.py in _upsample(self, method, limit, fill_value)
1072 result = obj.reindex(res_index, method=method,
-> 1073 limit=limit, fill_value=fill_value)
1074
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
196 def wrapper(*args, **kwargs):
--> 197 return func(*args, **kwargs)
198
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in reindex(self, *args, **kwargs)
3808 kwargs.pop('labels', None)
-> 3809 return super(DataFrame, self).reindex(**kwargs)
3810
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
4355 return self._reindex_axes(axes, level, limit, tolerance, method,
-> 4356 fill_value, copy).__finalize__(self)
4357
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
3740 frame = frame._reindex_index(index, method, copy, level,
-> 3741 fill_value, limit, tolerance)
3742
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _reindex_index(self, new_index, method, copy, level, fill_value, limit, tolerance)
3748 level=level, limit=limit,
-> 3749 tolerance=tolerance)
3750 return self._reindex_with_indexers({0: [new_index, indexer]},
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in reindex(self, target, method, level, limit, tolerance)
3137 if method is not None or limit is not None:
-> 3138 raise ValueError("cannot reindex a non-unique index "
3139 "with a method or limit")
ValueError: cannot reindex a non-unique index with a method or limit
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-29-e5d0ce53cd1c> in <module>()
----> 1 rep_movement_decomp=rep_movement_decomp.groupby(level=[0,1]).apply(lambda x: x.set_index('DATE').resample('D').fillna(method='pad'))
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in apply(self, func, *args, **kwargs)
699
700 with _group_selection_context(self):
--> 701 return self._python_apply_general(f)
702
703 return result
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in _python_apply_general(self, f)
705 def _python_apply_general(self, f):
706 keys, values, mutated = self.grouper.apply(f, self._selected_obj,
--> 707 self.axis)
708
709 return self._wrap_applied_output(
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/groupby/ops.py in apply(self, f, data, axis)
188 # group might be modified
189 group_axes = _get_axes(group)
--> 190 res = f(group)
191 if not _is_indexed_like(res, group_axes):
192 mutated = True
<ipython-input-29-e5d0ce53cd1c> in <lambda>(x)
----> 1 rep_movement_decomp=rep_movement_decomp.groupby(level=[0,1]).apply(lambda x: x.set_index('DATE').resample('D').fillna(method='pad'))
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/resample.py in fillna(self, method, limit)
758 2018-01-01 02:00:00 6.0 5
759 """
--> 760 return self._upsample(method, limit=limit)
761
762 @Appender(_shared_docs['interpolate'] % _shared_docs_kwargs)
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/resample.py in _upsample(self, method, limit, fill_value)
1071 else:
1072 result = obj.reindex(res_index, method=method,
-> 1073 limit=limit, fill_value=fill_value)
1074
1075 result = self._apply_loffset(result)
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
195 @wraps(func)
196 def wrapper(*args, **kwargs):
--> 197 return func(*args, **kwargs)
198
199 if not PY2:
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in reindex(self, *args, **kwargs)
3807 kwargs.pop('axis', None)
3808 kwargs.pop('labels', None)
-> 3809 return super(DataFrame, self).reindex(**kwargs)
3810
3811 @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs)
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
4354 # perform the reindex on the axes
4355 return self._reindex_axes(axes, level, limit, tolerance, method,
-> 4356 fill_value, copy).__finalize__(self)
4357
4358 def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value,
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
3739 if index is not None:
3740 frame = frame._reindex_index(index, method, copy, level,
-> 3741 fill_value, limit, tolerance)
3742
3743 return frame
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in _reindex_index(self, new_index, method, copy, level, fill_value, limit, tolerance)
3747 new_index, indexer = self.index.reindex(new_index, method=method,
3748 level=level, limit=limit,
-> 3749 tolerance=tolerance)
3750 return self._reindex_with_indexers({0: [new_index, indexer]},
3751 copy=copy, fill_value=fill_value,
/usr/local/share/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py in reindex(self, target, method, level, limit, tolerance)
3136 else:
3137 if method is not None or limit is not None:
-> 3138 raise ValueError("cannot reindex a non-unique index "
3139 "with a method or limit")
3140 indexer, missing = self.get_indexer_non_unique(target)
ValueError: cannot reindex a non-unique index with a method or limit