Вот отредактированный ответ на вопрос
import pandas as pd
import datetime as dt
import numpy as np
# Your data plus a new patient that comes often
data = {'Patient_ID':[12,1352,55,1352,12,6,1352,100,100,100,100] ,
'Surgery_Date': ['25/01/2009', '28/01/2009','29/01/2009','12/12/2008','23/02/2008','2/02/2009','12/01/2009','01/01/2009','01/02/2009','01/01/2010','01/02/2010']}
df = pd.DataFrame(data,columns = ['Patient_ID','Surgery_Date'])
readmissions = pd.Series(np.zeros(len(df),dtype=int),index=df.index))
# Loop through all unique ids
all_id = df['Patient_ID'].unique()
id_admissions = {}
for pid in all_id:
# These are all the times a patient with a given ID has had surgery
patient = df.loc[df['Patient_ID']==pid]
admissions_sorted = pd.to_datetime(patient['Surgery_Date'], format='%d/%m/%Y').sort_values()
# This checks if the previous surgery was longer than 180 days ago
frequency = admissions_sorted.diff()<dt.timedelta(days=180)
# Compute the readmission
n_admissions = [0]
for v in frequency.values[1:]:
n_admissions.append((n_admissions[-1]+1)*v)
# Add these value to the time series
readmissions.loc[admissions_sorted.index] = n_admissions
df['Readmission'] = readmissions
Это возвращает
Patient_ID Surgery_Date Readmission
0 12 25/01/2009 0
1 1352 28/01/2009 2
2 55 29/01/2009 0
3 1352 12/12/2008 0
4 12 23/02/2008 0
5 6 2/02/2009 0
6 1352 12/01/2009 1
7 100 01/01/2009 0
8 100 01/02/2009 1
9 100 01/01/2010 0
10 100 01/02/2010 1
Надеюсь, это поможет! Это, вероятно, не очень python -esque или pandas -esque, но оно должно работать по назначению. Я убежден, что это можно сделать гораздо более эффективным и читабельным.