Это решение обрабатывает упорядоченные события и отслеживает состояние (настоящее или нет) для всех, кто появляется.
Основные допущения:
- подсчитывается только время с присутствием владельца данных (например, «Имя организатора»)
- окончание сеанса - последнее записанное событие
Остальные примечания в коде.
Проверка правильности ответа: Организатор присоединился в 16:30:41, максимальная отметка времени - 17:30:32, а органайзер отсутствовал примерно на минуту, поэтому максимально возможная продолжительность составляет около 59 минут, после logi c @above_c_level для общего времени вместе.
import pandas as pd
class MeetingMonitor:
def __init__(self, df, owner):
df.columns = ["who", "action", "timestamp"]
df['action'] = df['action'].replace('Joined.*', 1, regex=True)
df['action'] = df['action'].replace('Left.*', 0, regex=True)
df['timestamp'] = pd.to_datetime(df['timestamp'])
self.owner = owner
self.min_join = df.loc[df['who'] == owner, 'timestamp'].min()
df = df.sort_values('timestamp')
self.df = df
self.folks = {}
def get_report(self):
self.folks = {}
self.df.apply(self.handle, axis=1)
# no data on true end of session, so best guest is last event
self.everybody_leaves(df['timestamp'].max())
results = [(self.folks[folk]['who'], self.folks[folk]['duration'])
for folk in self.folks.keys()]
results = pd.DataFrame(results, columns=['who', 'duration'])
results['slack'] = results.duration.max() - results.duration
return results.sort_values('slack')
def make_folk(self, event):
folk = {
'who': event['who'],
'duration': pd.Timedelta(0),
'state': 1,
'in': max(event.timestamp, self.min_join)
}
self.folks[folk['who']] = folk
def join(self, event):
self.folks[event['who']]['state'] = 1
self.folks[event['who']]['in'] = event.timestamp
def leave(self, who, timestamp):
if self.folks[who]['state'] == 0: # everybody leaves
return
self.folks[who]['duration'] += timestamp - self.folks[who]['in']
self.folks[who]['state'] = 0
def everybody_leaves(self, timestamp):
for folk in self.folks.keys():
self.leave(folk, timestamp)
def handle(self, event):
if event.who not in self.folks:
if event.action == 1:
self.make_folk(event)
return 1
else:
pass # someone left who wasn't here ... ok
elif event.action == self.folks[event.who]['state']:
# this shouldn't happen, mostly because of "everybody leaves" below
# asymmetric assumption for bad data here,
# biased in favor of double joiners *shrug*
return 1
elif event.action == 1:
self.join(event)
return 1
elif event.action == 0:
if event.who == self.owner:
self.everybody_leaves(event.timestamp)
else:
self.leave(event.who, event.timestamp)
return 1
# https://waffleguppies.tumblr.com/post/50741279401/just-a-reminder-that-the-nuclear-tesuji-is-a
raise ValueError("(ノಠ益ಠ)ノ彡" + str(event))
df = pd.DataFrame([["Organiser Name", "Joined", "03/08/2020, 16:30:41"],
["Organiser Name", "Left", "03/08/2020, 17:03:32"],
["Organiser Name", "Joined", "03/08/2020, 17:04:25"],
["Student 1", "Joined before", "03/08/2020, 16:30:41"],
["Student 1", "Joined before", "03/08/2020, 17:04:27"],
["Student 2", "Joined before", "03/08/2020, 16:30:41"],
["Student 2", "Joined", "03/08/2020, 17:04:27"],
["Student 3", "Joined", "03/08/2020, 16:31:47"],
["Student 3", "Joined", "03/08/2020, 17:04:27"],
["Student 3", "Left", "03/08/2020, 17:30:32"],
["Student 4", "Joined", "03/08/2020, 16:32:01"],
["Student 4", "Left", "03/08/2020, 16:37:20"],
["Student 4", "Joined", "03/08/2020, 16:39:27"],
["Student 4", "Joined", "03/08/2020, 17:04:27"],
["Student 4", "Left", "03/08/2020, 17:17:19"],
["Student 4", "Joined", "03/08/2020, 17:19:13"],
["Student 5", "Joined", "03/08/2020, 16:35:41"],
["Student 5", "Left", "03/08/2020, 16:36:46"],
["Student 6", "Joined", "03/08/2020, 16:38:01"],
["Student 6", "Left", "03/08/2020, 16:40:14"],
["Student 7", "Joined", "03/08/2020, 17:15:08"],
["Student 7", "Left", "03/08/2020, 17:15:44"],
["Student 7", "Joined", "03/08/2020, 17:15:48"],
["Student 7", "Left", "03/08/2020, 17:15:54"],
["Student 8", "Joined", "03/08/2020, 17:18:12"],
["Student 8", "Left", "03/08/2020, 17:19:59"]], columns=["Full Name", "User Action", "Timestamp"])
# don't assume data will be nicely ordered, make user specify the owner
mm = MeetingMonitor(df, df.iloc[0, 0])
res = mm.get_report()
print(res)
Вывод:
who duration slack
0 Organiser Name 0 days 00:58:58 0 days 00:00:00
1 Student 1 0 days 00:58:56 0 days 00:00:02
2 Student 2 0 days 00:58:56 0 days 00:00:02
3 Student 3 0 days 00:57:50 0 days 00:01:08
4 Student 4 0 days 00:53:35 0 days 00:05:23
6 Student 6 0 days 00:02:13 0 days 00:56:45
8 Student 8 0 days 00:01:47 0 days 00:57:11
5 Student 5 0 days 00:01:05 0 days 00:57:53
7 Student 7 0 days 00:00:42 0 days 00:58:16