Попробуйте поймать было бы лучше
from selenium import webdriver
import time
from selenium.common.exceptions import ElementNotVisibleException, NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('C:/Users/../Downloads/cd79/chromedriver.exe', options=options)
driver.implicitly_wait(10)
# links_total = []
driver.get("https://www.cbp.gov/contact/find-broker-by-port?field_port_location_tid=All&field_port_code_value=")
def first_links(links_total=[]):
initial_data = driver.find_elements_by_tag_name('td')
for initial in initial_data:
page_links = initial.find_elements_by_tag_name('a')
for page in page_links:
page_link = page.get_attribute("href")
links_total.append(page_link)
# driver.refresh()
try:
next_page = driver.find_element_by_partial_link_text('next')
next_page.click()
time.sleep(2)
first_links(links_total)
except (TimeoutError, ElementNotVisibleException, NoSuchElementException):
print("NEXT btn not found : ")
pass
return links_total
all_links = first_links()
for link in all_links:
print(link)
На самом деле вам не нужно использовать Selenium. Вы можете сделать это с помощью Beautiful Soap вот так:
import requests
from bs4 import BeautifulSoup
page_num=0
url_cbp = r"https://www.cbp.gov/contact/find-broker-by-port?field_port_location_tid=All&field_port_code_value=&page={}"
def get_links(links_total=[], page_num=0):
page = requests.get(url_cbp.format(page_num))
soup = BeautifulSoup(page.content, 'html.parser')
results = soup.find(id='region-content')
table_cells = results.find_all('td', class_='views-field')
for cell in table_cells:
# print(cell )
# print('\n\n')
cell_link = cell.find('a')
page_link = cell_link["href"]
links_total.append(page_link)
next_page = results.find('li', class_='pager-next')
if next_page:
page_num += 1
get_links(links_total, page_num)
return links_total
all_links = get_links()
for link in all_links:
print(link)