my_list = [{'key':1,'timestamp':1234567890,'action':'like','type':'photo','id':245},
{'key':2,'timestamp':2345678901,'action':'like','type':'photo','id':252},
{'key':1,'timestamp':3456789012,'action':'like','type':'photo','id':212}]
r = {}
for d in my_list:
k = d['key']
if k not in r or r[k]['timestamp'] < d['timestamp']:
r[k] = d
list(r.values())
вывод:
[{'key': 1,
'timestamp': 3456789012,
'action': 'like',
'type': 'photo',
'id': 212},
{'key': 2,
'timestamp': 2345678901,
'action': 'like',
'type': 'photo',
'id': 252}]
Вот простой тест между большинством предлагаемых решений:
from itertools import groupby
import itertools
from operator import itemgetter
from simple_benchmark import BenchmarkBuilder
b = BenchmarkBuilder()
@b.add_function()
def kederrac(lst):
r = {}
for d in lst:
k = d['key']
if k not in r or r[k]['timestamp'] < d['timestamp']:
r[k] = d
return list(r.values())
@b.add_function()
def Daweo(lst):
s = sorted(lst, key=lambda x:(x['key'],x['timestamp']), reverse=True)
return [next(g) for k, g in itertools.groupby(s, lambda x:x['key'])]
@b.add_function()
def Jan(lst):
result = []
sorted_lst = sorted(lst, key=lambda x: x['key'])
for k,v in groupby(sorted_lst, key = lambda x: x['key']):
result.append(max(v, key=lambda x: x['timestamp']))
return result
@b.add_function()
def Jan_one_line(lst):
keyfunc = lambda x: x['key']
return [max(v, key = lambda x: x['timestamp'])
for k, v in groupby(sorted(lst, key=keyfunc), key=keyfunc)]
@b.add_function()
def gold_cy(lst):
key = itemgetter('key')
ts = itemgetter('timestamp')
def custom_sort(item):
return (key(item), -ts(item))
results = []
for k, v in groupby(sorted(lst, key=custom_sort), key=key):
results.append(next(v))
return results
@b.add_arguments('Number of dictionaries in list')
def argument_provider():
for exp in range(2, 18):
size = 2**exp
yield size, [{'key':choice(range((size // 10) or 2)),
'timestamp': randint(1_000_000_000, 10_000_000_000),
'action':'like','type':'photo','id':randint(100, 10000)}
for _ in range(size)]
r = b.run()
r.plot()
показывает, что простое решение for
l oop более эффективно, результат ожидается, поскольку встроенная функция sorted
будет иметь сложность времени O (NlogN) * 1016 *