import pandas as pd
import datetime
import numpy as np
from datetime import timedelta
def diff_func(row):
return (row['Timestamp'] - row['previous_end'])
dfMockLog = [ (1, ("2017-01-01 09:00:00"), "htt://x.org/page1.html"),
(1, ("2017-01-01 09:01:00"), "htt://x.org/page2.html"),
(1, ("2017-01-01 09:02:00"), "htt://x.org/page3.html"),
(1, ("2017-01-01 09:05:00"), "htt://x.org/page3.html"),
(1, ("2017-01-01 09:30:00"), "htt://x.org/page2.html"),
(1, ("2017-01-01 09:33:00"), "htt://x.org/page1.html"),
(1, ("2017-01-01 09:37:00"), "htt://x.org/page2.html"),
(1, ("2017-01-01 09:41:00"), "htt://x.org/page3.html"),
(1, ("2017-01-01 10:00:00"), "htt://x.org/page1.html"),
(1, ("2017-01-01 11:00:00"), "htt://x.org/page2.html"),
(2, ("2017-01-01 09:41:00"), "htt://x.org/page3.html"),
(2, ("2017-01-01 09:42:00"), "htt://x.org/page1.html"),
(2, ("2017-01-01 09:43:00"), "htt://x.org/page2.html")]
dfMockLog = pd.DataFrame(dfMockLog, columns = ['user', 'Timestamp', 'url'])
dfMockLog['Timestamp'] = pd.to_datetime(dfMockLog['Timestamp'])
dfMockLog = dfMockLog.sort_values(['user','Timestamp'])
dfMockLog['previous_end'] = dfMockLog.groupby(['user'])['Timestamp'].shift(1)
dfMockLog['time_diff'] = dfMockLog.apply(diff_func, axis=1)
dfMockLog['cum_sum'] = dfMockLog['time_diff'].cumsum()
print(dfMockLog)
Мне нужно, чтобы столбец "timediff" был преобразован в секунды. И столбец "cum_sum" должен содержать накопленную сумму, разделенную на "user".Будет здорово, если вы сможете поделиться всеми возможными форматами для timedelta.