Я использую следующий цикл if для создания получасовых интервалов примерно для 1 миллиона наблюдений, что отнимает уйму времени.Вот мой цикл if
def half_hourly_buckets(dataframe,time_column):
dataframe[time_column] = pd.to_datetime(dataframe[time_column],format = '%H:%M:%S').dt.time
for j in range(len(dataframe)):
x = dataframe.loc[j,time_column]
if (x >= datetime.time(0,0,1)) & (x <= datetime.time(0,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "00:00:01 - 00:30:00"
elif (x >= datetime.time(0,30,1)) & (x <= datetime.time(1,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "00:30:01 - 01:00:00"
elif (x >= datetime.time(1,0,1)) & (x <= datetime.time(1,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "01:00:01 - 01:30:00"
elif (x >= datetime.time(1,30,1)) & (x <= datetime.time(2,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "01:30:01 - 02:00:00"
elif (x >= datetime.time(2,0,1)) & (x <= datetime.time(2,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "02:00:01 - 02:30:00"
elif (x >= datetime.time(2,30,1)) & (x <= datetime.time(3,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "02:30:01 - 03:00:00"
elif (x >= datetime.time(3,0,1)) & (x <= datetime.time(3,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "03:00:01 - 03:30:00"
elif (x >= datetime.time(3,30,1)) & (x <= datetime.time(4,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "03:30:01 - 04:00:00"
elif (x >= datetime.time(4,0,1)) & (x <= datetime.time(4,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "04:00:01 - 04:30:00"
elif (x >= datetime.time(4,30,1)) & (x <= datetime.time(5,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "04:30:01 - 05:00:00"
elif (x >= datetime.time(5,0,1)) & (x <= datetime.time(5,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "05:00:01 - 05:30:00"
elif (x >= datetime.time(5,30,1)) & (x <= datetime.time(6,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "05:30:01 - 06:00:00"
elif (x >= datetime.time(6,0,1)) & (x <= datetime.time(6,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "06:00:01 - 06:30:00"
elif (x >= datetime.time(6,30,1)) & (x <= datetime.time(7,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "06:30:01 - 07:00:00"
elif (x >= datetime.time(7,0,1)) & (x <= datetime.time(7,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "07:00:01 - 07:30:00"
elif (x >= datetime.time(7,30,1)) & (x <= datetime.time(8,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "07:30:01 - 08:00:00"
elif (x >= datetime.time(8,0,1)) & (x <= datetime.time(8,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "08:00:01 - 08:30:00"
elif (x >= datetime.time(8,30,1)) & (x <= datetime.time(9,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "08:30:01 - 09:00:00"
elif (x >= datetime.time(9,0,1)) & (x <= datetime.time(9,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "09:00:01 - 09:30:00"
elif (x >= datetime.time(9,30,1)) & (x <= datetime.time(10,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "09:30:01 - 10:00:00"
elif (x >= datetime.time(10,0,1)) & (x <= datetime.time(10,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "10:00:01 - 10:30:00"
elif (x >= datetime.time(10,30,1)) & (x <= datetime.time(11,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "10:30:01 - 11:00:00"
elif (x >= datetime.time(11,0,1)) & (x <= datetime.time(11,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "11:00:01 - 11:30:00"
elif (x >= datetime.time(11,30,1)) & (x <= datetime.time(12,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "11:30:01 - 12:00:00"
elif (x >= datetime.time(12,0,1)) & (x <= datetime.time(12,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "12:00:01 - 12:30:00"
elif (x >= datetime.time(12,30,1)) & (x <= datetime.time(13,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "12:30:01 - 13:00:00"
elif (x >= datetime.time(13,0,1)) & (x <= datetime.time(13,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "13:00:01 - 13:30:00"
elif (x >= datetime.time(13,30,1)) & (x <= datetime.time(14,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "13:30:01 - 14:00:00"
elif (x >= datetime.time(14,0,1)) & (x <= datetime.time(14,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "14:00:01 - 14:30:00"
elif (x >= datetime.time(14,30,1)) & (x <= datetime.time(15,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "14:30:01 - 15:00:00"
elif (x >= datetime.time(15,0,1)) & (x <= datetime.time(15,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "15:00:01 - 15:30:00"
elif (x >= datetime.time(15,30,1)) & (x <= datetime.time(16,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "15:30:01 - 16:00:00"
elif (x >= datetime.time(16,0,1)) & (x <= datetime.time(16,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "16:00:01 - 16:30:00"
elif (x >= datetime.time(16,30,1)) & (x <= datetime.time(17,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "16:30:01 - 17:00:00"
elif (x >= datetime.time(17,0,1)) & (x <= datetime.time(17,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "17:00:01 - 17:30:00"
elif (x >= datetime.time(17,30,1)) & (x <= datetime.time(18,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "17:30:01 - 18:00:00"
elif (x >= datetime.time(18,0,1)) & (x <= datetime.time(18,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "18:00:01 - 18:30:00"
elif (x >= datetime.time(18,30,1)) & (x <= datetime.time(19,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "18:30:01 - 19:00:00"
elif (x >= datetime.time(19,0,1)) & (x <= datetime.time(19,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "19:00:01 - 19:30:00"
elif (x >= datetime.time(19,30,1)) & (x <= datetime.time(20,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "19:30:01 - 20:00:00"
elif (x >= datetime.time(20,0,1)) & (x <= datetime.time(20,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "20:00:01 - 20:30:00"
elif (x >= datetime.time(20,30,1)) & (x <= datetime.time(21,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "20:30:01 - 21:00:00"
elif (x >= datetime.time(21,0,1)) & (x <= datetime.time(21,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "21:00:01 - 21:30:00"
elif (x >= datetime.time(21,30,1)) & (x <= datetime.time(22,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "21:30:01 - 22:00:00"
elif (x >= datetime.time(22,0,1)) & (x <= datetime.time(22,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "22:00:01 - 22:30:00"
elif (x >= datetime.time(22,30,1)) & (x <= datetime.time(23,0,0)):
dataframe.loc[j,'half_hourly_bucket'] = "22:30:01 - 23:00:00"
elif (x >= datetime.time(23,0,1)) & (x <= datetime.time(23,30,0)):
dataframe.loc[j,'half_hourly_bucket'] = "23:00:01 - 23:30:00"
else:
dataframe.loc[j,'half_hourly_bucket'] = "23:30:01 - 00:00:00"
return dataframe
Есть ли способ избежать этого цикла и увеличить скорость обработки?