IIUYC, вот мой подход к этой проблеме:
import pandas as pd
from datetime import datetime
df = pd.DataFrame(
[
[datetime(2016, 1, 1), 1000, 5],
[datetime(2016, 1, 1), 1001, 1],
[datetime(2016, 1, 2), 1000, 1],
[datetime(2016, 1, 2), 1001, 1],
[datetime(2016, 1, 3), 1000, 1],
[datetime(2016, 1, 3), 1001, 5],
[datetime(2016, 1, 4), 1000, 1],
[datetime(2016, 1, 4), 1001, 1],
],
columns=['date', 'spatial_pixel', 'column_A']
)
df
# date spatial_pixel column_A
# 0 2016-01-01 1000 5
# 1 2016-01-01 1001 1
# 2 2016-01-02 1000 1
# 3 2016-01-02 1001 1
# 4 2016-01-03 1000 1
# 5 2016-01-03 1001 5
# 6 2016-01-04 1000 1
# 7 2016-01-04 1001 1
def sum_days_in_row_with_condition(g):
sorted_g = g.sort_values(by='date', ascending=True)
condition = sorted_g['column_A'] < 2
sorted_g['days-in-a-row'] = condition.cumsum() - condition.cumsum().where(~condition).ffill().astype(int)
return sorted_g
(df.groupby('spatial_pixel')
.apply(sum_days_in_row_with_condition)
.reset_index(drop=True))
# date spatial_pixel column_A days-in-a-row
# 0 2016-01-01 1000 5 0
# 1 2016-01-02 1000 1 1
# 2 2016-01-03 1000 1 2
# 3 2016-01-04 1000 1 3
# 4 2016-01-01 1001 1 1
# 5 2016-01-02 1001 1 2
# 6 2016-01-03 1001 5 0
# 7 2016-01-04 1001 1 1