это динамическая страница. лучший способ получить результаты здесь - это перейти прямо к источнику данных через API. Я поместил его в фрейм данных, но вы можете делать то, что хотите, и просто извлечь href, вызвав этот столбец (я не знаю, какой URL вы хотели).
import requests
import pandas as pd
import math
from pandas.io.json import json_normalize
url = 'https://www.metmuseum.org/api/collection/collectionlisting'
payload = {
'artist': '',
'department': '',
'era': '8000-2000 B.C.',
'geolocation': '',
'material': 'Bowls',
'offset': '0',
'pageSize': '0',
'perPage': '100',
'searchField': 'All',
'showOnly': 'openAccess',
'sortBy': 'Relevance',
'sortOrder': 'asc'}
jsonData = requests.get(url, params=payload).json()
print ('Aquired page 1...')
df = json_normalize(jsonData['results'])
total_collections = jsonData['totalResults']
totalPages = math.ceil(total_collections / 100)
for page in range(1, totalPages):
payload = {
'artist': '',
'department': '',
'era': '8000-2000 B.C.',
'geolocation': '',
'material': 'Bowls',
'offset': '%s' %(page*100),
'pageSize': '0',
'perPage': '100',
'searchField': 'All',
'showOnly': 'openAccess',
'sortBy': 'Relevance',
'sortOrder': 'asc'}
jsonData = requests.get(url, params=payload).json()
print ('Aquired page %s...' %(page+1))
temp_df = json_normalize(jsonData['results'])
df = df.append(temp_df, sort=True).reset_index(drop=True)
Вывод:
print (df.head(5).to_string())
accessionNumber artist culture date description galleryInformation image largeImage medium regularImage teaserText title url
0 36.1.117 ca. 3850–2960 B.C.\n Not on view https://images.metmuseum.org/CRDImages/eg/mobi... eg/web-large/36.1.117_EGDP010235.jpg Pottery eg/web-additional/36.1.117_EGDP010235.jpg <p>Date: ca. 3850–2960 B.C.\n<br/>Accession Nu... Bowl with flattened rim /art/collection/search/552185?&searchField=All...
1 1992.252.1 Japan None Accession Number: 1992.252.1 On view at The Met Fifth Avenue in <a href='ht... https://images.metmuseum.org/CRDImages/as/mobi... as/web-large/DP23088.jpg Earthenware with cord-marked and incised decor... as/web-additional/DP23088.jpg <p>Accession Number: 1992.252.1</p> “Flame-rimmed” deep bowl (kaen doki)\n\n /art/collection/search/44905?&searchField=All&...
2 33.4.41 ca. 3850–2960 B.C. Not on view https://images.metmuseum.org/CRDImages/eg/mobi... eg/web-large/33.4.41_EGDP011262.jpg Pottery eg/web-additional/33.4.41_EGDP011262.jpg <p>Date: ca. 3850–2960 B.C.<br/>Accession Numb... Deep bowl /art/collection/search/558199?&searchField=All...
3 12.181.38 ca. 3100–2649 B.C. Not on view https://images.metmuseum.org/CRDImages/eg/mobi... eg/web-large/12-181-38.jpg Travertine (Egyptian alabaster) eg/web-additional/12-181-38.jpg <p>Date: ca. 3100–2649 B.C.<br/>Accession Numb... Shallow bowl /art/collection/search/547548?&searchField=All...
4 99.4.55 ca. 3850–2960 B.C.\n Not on view https://images.metmuseum.org/CRDImages/eg/mobi... eg/web-large/99.4.55_EGDP010319.jpg Pottery eg/web-additional/99.4.55_EGDP010319.jpg <p>Date: ca. 3850–2960 B.C.\n<br/>Accession Nu... Shallow bowl /art/collection/search/552308?&searchField=All...