Исправленный ответ:
Здесь используются pandas iloc и numpy repeat для создания нового кадра данных из исходного индекса данных, но после определения индексов повторения на основе диапазонов дат и действительных дней недели.
import pandas as pd
import numpy as np
df_arr = np.array([
'2017-03-26 16:55:00', '2017-10-28 16:55:00', '1234567', 'x',
'2017-03-26 20:35:00', '2017-10-28 20:35:00', '1234567', 'y',
'2017-03-26 14:55:00', '2017-10-28 14:55:00', '..3.567', 'y',
'2017-03-26 11:15:00', '2017-10-28 11:15:00', '1234567', 'y',
'2017-03-26 09:30:00', '2017-06-11 09:30:00', '......7',' x'])
df = pd.DataFrame(df_arr.reshape(5, 4),
columns=['START', 'END', 'FREQ', 'VARIABLE'])
def get_weekdays_dates_repeats(start, end, valid_weekday_nums):
date_range = pd.date_range(start, end, freq="D", normalize=True)
all_day_nums = date_range.dayofweek.values + 1
filtered_idx = np.where(np.isin(all_day_nums, valid_weekday_nums))
day_nums = all_day_nums[filtered_idx]
dates = date_range[filtered_idx]
return day_nums, dates.values.astype('<M8[D]'), day_nums.size
starts = df.START.values
ends = df.END.values
freqs = df.FREQ.str.replace('.', '').values
repeats = np.zeros(len(df))
weekdays_arr_list = []
dates_arr_list = []
for i in range(len(df)):
valid_day_nums = [int(s) for s in list(freqs[i])]
days, dates, repeat = \
get_weekdays_dates_repeats(starts[i], ends[i], valid_day_nums)
weekdays_arr_list.append(days)
dates_arr_list.append(dates)
repeats[i] = repeat
weekday_col = np.concatenate(weekdays_arr_list)
dates_col = np.concatenate(dates_arr_list)
repeats = repeats.astype(int)
df2 = df.iloc[np.repeat(df.index.values, repeats)].reset_index(drop=True)
df2['day_num'] = weekday_col
df2['date'] = dates_col
df2.head ()
START END FREQ VARIABLE day_num date
0 2017-03-26 16:55:00 2017-10-28 16:55:00 1234567 x 7 2017-03-26
1 2017-03-26 16:55:00 2017-10-28 16:55:00 1234567 x 1 2017-03-27
2 2017-03-26 16:55:00 2017-10-28 16:55:00 1234567 x 2 2017-03-28
3 2017-03-26 16:55:00 2017-10-28 16:55:00 1234567 x 3 2017-03-29
4 2017-03-26 16:55:00 2017-10-28 16:55:00 1234567 x 4 2017-03-30
df2.tail ()
START END FREQ VARIABLE day_num date
782 2017-03-26 09:30:00 2017-06-11 09:30:00 ......7 x 7 2017-05-14
783 2017-03-26 09:30:00 2017-06-11 09:30:00 ......7 x 7 2017-05-21
784 2017-03-26 09:30:00 2017-06-11 09:30:00 ......7 x 7 2017-05-28
785 2017-03-26 09:30:00 2017-06-11 09:30:00 ......7 x 7 2017-06-04
786 2017-03-26 09:30:00 2017-06-11 09:30:00 ......7 x 7 2017-06-11