- См. Встроенные комментарии
- Необходимо использовать pandas v0.25 или выше
- В каждой записи
confirmed.timeline.date
и deaths.timeline.date
должны быть одинаковой длины.
import pandas as pd
# create a dataframe
top = pd.DataFrame(location)
# dataframe of desired columns
d = top[['country', 'country_population', 'timelines']].copy()
# transform timelines
d[['confirmed.timeline.date', 'confirmed.timeline.count']] = d.timelines.apply(lambda x: pd.Series([list(x['confirmed']['timeline'].keys()), list(x['confirmed']['timeline'].values())]))
d[['deaths.timeline.date', 'deaths.timeline.count']] = d.timelines.apply(lambda x: pd.Series([list(x['deaths']['timeline'].keys()), list(x['deaths']['timeline'].values())]))
d.drop(columns=['timelines'], inplace=True)
d
# explode the lists created in the last step and then join the data
cols = ['confirmed.timeline.date', 'confirmed.timeline.count', 'deaths.timeline.date', 'deaths.timeline.count']
d = d[['country', 'country_population']].join(pd.concat([d.explode(col)[col] for col in cols], axis=1))
print(d)
country country_population confirmed.timeline.date confirmed.timeline.count deaths.timeline.date deaths.timeline.count
0 US 327167434 2020-01-22T00:00:00Z 1 2020-01-22T00:00:00Z 0
0 US 327167434 2020-01-23T00:00:00Z 1 2020-01-23T00:00:00Z 0
0 US 327167434 2020-01-24T00:00:00Z 2 2020-01-24T00:00:00Z 0
1 AF 327167435 2020-02-22T00:00:00Z 2 2020-02-22T00:00:00Z 1
1 AF 327167435 2020-02-23T00:00:00Z 2 2020-02-23T00:00:00Z 1
1 AF 327167435 2020-02-24T00:00:00Z 3 2020-02-24T00:00:00Z 1
2 AS 327167436 2020-03-22T00:00:00Z 3 2020-03-22T00:00:00Z 2
2 AS 327167436 2020-03-23T00:00:00Z 3 2020-03-23T00:00:00Z 2
2 AS 327167436 2020-03-24T00:00:00Z 4 2020-03-24T00:00:00Z 2
Тестовые данные
location =
[{'coordinates': {'latitude': '37.0902', 'longitude': '-95.7129'},
'country': 'US',
'country_code': 'US',
'country_population': 327167434,
'id': 225,
'last_updated': '2020-05-06T11:33:46.184263Z',
'latest': {'confirmed': 1204351, 'deaths': 71064, 'recovered': 0},
'province': '',
'timelines': {'confirmed': {'latest': 1204351,
'timeline': {'2020-01-22T00:00:00Z': 1,
'2020-01-23T00:00:00Z': 1,
'2020-01-24T00:00:00Z': 2}},
'deaths': {'latest': 71064,
'timeline': {'2020-01-22T00:00:00Z': 0,
'2020-01-23T00:00:00Z': 0,
'2020-01-24T00:00:00Z': 0}}}},
{'coordinates': {'latitude': '37.0902', 'longitude': '-95.7129'},
'country': 'AF',
'country_code': 'AF',
'country_population': 327167435,
'id': 226,
'last_updated': '2020-05-06T11:33:46.184263Z',
'latest': {'confirmed': 1204351, 'deaths': 71064, 'recovered': 0},
'province': '',
'timelines': {'confirmed': {'latest': 1204351,
'timeline': {'2020-02-22T00:00:00Z': 2,
'2020-02-23T00:00:00Z': 2,
'2020-02-24T00:00:00Z': 3}},
'deaths': {'latest': 71064,
'timeline': {'2020-02-22T00:00:00Z': 1,
'2020-02-23T00:00:00Z': 1,
'2020-02-24T00:00:00Z': 1}}}},
{'coordinates': {'latitude': '37.0902', 'longitude': '-95.7129'},
'country': 'AS',
'country_code': 'AS',
'country_population': 327167436,
'id': 227,
'last_updated': '2020-05-06T11:33:46.184263Z',
'latest': {'confirmed': 1204351, 'deaths': 71064, 'recovered': 0},
'province': '',
'timelines': {'confirmed': {'latest': 1204351,
'timeline': {'2020-03-22T00:00:00Z': 3,
'2020-03-23T00:00:00Z': 3,
'2020-03-24T00:00:00Z': 4}},
'deaths': {'latest': 71064,
'timeline': {'2020-03-22T00:00:00Z': 2,
'2020-03-23T00:00:00Z': 2,
'2020-03-24T00:00:00Z': 2}}}}]