Я пытаюсь создать веб-сайт с кнопкой «Загрузить еще» с помощью Selenium и BeautifulSoup.Я получил свой сценарий, чтобы нажать кнопку «Загрузить больше» и загрузить остальное содержимое, но у меня возникли проблемы с копированием содержимого в файл json.Вот мой скрипт
from bs4 import BeautifulSoup as soup
from selenium import webdriver
import time
url = "https://smarthistory.org/americas-before-1900/"
driver = webdriver.Chrome('/Users/rawlins/Downloads/chromedriver')
driver.get(url)
html = driver.page_source.encode('utf-8')
page_num = 0
while driver.find_elements_by_css_selector('#load-more-cc-objects'):
driver.find_element_by_css_selector('#load-more-cc-objects').click()
page_num += 1
print("getting page number "+str(page_num))
time.sleep(1)
html = driver.page_source.encode('utf-8')
data = []
# Parse HTML, close browser
page_soup = soup(driver.page_source, 'lxml')
containers = page_soup.findAll("div", {"class":"mb-8 hover-zoom tablescraper-selected-row opacity-100"})
for container in containers:
item = {}
item['type'] = "Course Material"
item['title'] = container.find('h5', {'class' : 'm-0 mt-4 text-grey-darker text-normal leading-tight hover-connect'}).text.strip()
item['link'] = container.a["href"]
item['source'] = "Smarthistory"
item['base_url'] = "https://smarthistory.org"
item['license'] = "Attribution-NonCommercial-ShareAlike"
data.append(item) # add the item to the list
with open("smarthistory-2.json", "w") as writeJSON:
json.dump(data, writeJSON, ensure_ascii=False)
browser.quit()
Мой ожидаемый результат примерно такой
[
{
"type": "Course Material",
"title": "Impressionism as optical realism: Monet",
"link": "https://smarthistory.org/impressionism-optical-realism-monet/",
"source": "Smarthistory",
"base_url": "https://smarthistory.org",
"license": "Attribution-NonCommercial-ShareAlike"
},
{
"type": "Course Material",
"title": "Impressionism: painting modern life",
"link": "https://smarthistory.org/painting-modern-life/",
"source": "Smarthistory",
"base_url": "https://smarthistory.org",
"license": "Attribution-NonCommercial-ShareAlike"
}
]