Проверьте, есть ли на странице next button
, а затем нажмите «Выйти из», пока l oop.
if len(browser.find_elements_by_xpath("//a[contains(.,'Next')]"))>0:
browser.find_element_by_xpath("//a[contains(.,'Next')]").click()
else:
break
Нет необходимости использовать time.sleep()
вместо использования WebDriverWait
()
Код :
import csv
from bs4 import BeautifulSoup as bs
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
browser=webdriver.Chrome()
browser.get('https://dir.businessworld.com.my/15/posts/16-Computers-The-Internet')
WebDriverWait(browser, 10).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "table.postlisting")))
soup=bs(browser.page_source)
filename = "C:/Users/User/Desktop/test.csv"
csv_writer = csv.writer(open(filename, 'w'))
pages_remaining = True
while pages_remaining:
WebDriverWait(browser,10).until(EC.visibility_of_element_located((By.CSS_SELECTOR,"table.postlisting")))
for tr in soup.find_all("tr"):
data = []
# for headers ( entered only once - the first time - )
for th in tr.find_all("th"):
data.append(th.text)
if data:
print("Inserting headers : {}".format(','.join(data)))
csv_writer.writerow(data)
continue
for td in tr.find_all("td"):
if td.a:
data.append(td.a.text.strip())
else:
data.append(td.text.strip())
if data:
print("Inserting data: {}".format(','.join(data)))
csv_writer.writerow(data)
if len(browser.find_elements_by_xpath("//a[contains(.,'Next')]"))>0:
browser.find_element_by_xpath("//a[contains(.,'Next')]").click()
else:
break