Я написал функцию для поиска частоты, используя подход, основанный на правилах. Я думаю, это будет работать в большинстве ситуаций. Любые отзывы приветствуются.
def findSampleFreq(ts, nrows=10, time_col='TimeStamp'):
n = min(nrows, ts.shape[0])
t_diffs = []
for i in np.arange(n):
td = ts.loc[i+1, time_col]-ts.loc[i, time_col]
t_diffs.append(td)
avg_diff = np.mean(t_diffs)
diff_ns = avg_diff.value
diff_us = diff_ns / 1000
diff_ms = diff_us / 1000
diff_sec = diff_ms / 1000
diff_min = diff_sec / 60
diff_hour = diff_min / 60
diff_day = avg_diff.days
diff_wk = diff_day / 7
diff_month = diff_day / 30.4375
diff_qtr = diff_day / 91.3125
diff_yr = diff_day / 365.25
#the unit having minimum absolute difference with 1
# is the sampling freq
diffs = [np.abs(1-diff_ns), np.abs(1-diff_us), np.abs(1-diff_ms),
np.abs(1-diff_sec), np.abs(1-diff_min), np.abs(1-diff_hour),
np.abs(1-diff_day), np.abs(1-diff_wk), np.abs(1-diff_month),
np.abs(1-diff_qtr), np.abs(1-diff_yr)]
aliases = ['N', 'U', 'L', 'S', 'T', 'H', 'D', 'W', 'MS', 'Q', 'YS']
id = diffs.index(min(diffs))
return aliases[id]