Question

Я пытаюсь очистить данные агентов здесь. Я могу получить ссылки с первой страницы. Я использую нумерованные циклы, потому что знаю общее количество страниц. Я пытался запустить это, пока есть опция "следующая страница". Я попробовал и "попробовать", и "если нет", но не смог понять. Любая помощь приветствуется. Вот код.

from selenium import webdriver
import time

from selenium.common.exceptions import ElementNotVisibleException, NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('C:/Users/../Downloads/cd79/chromedriver.exe', options=options)


links_total = []
driver.get("https://www.cbp.gov/contact/find-broker-by-port?field_port_location_tid=All&field_port_code_value=")
def first_links():
    initial_data = driver.find_elements_by_tag_name('td')
    for initial in initial_data:
        page_links = initial.find_elements_by_tag_name('a')
        for page in page_links:
            page_link = page.get_attribute("href")
            links_total.append(page_link)
    driver.refresh()
    if driver.find_element_by_partial_link_text('next'):
        next_page = driver.find_element_by_partial_link_text('next')
        next_page.click()
        time.sleep(2)
        new_data = driver.find_elements_by_tag_name('td')
        for new in new_data:
            links = new.find_elements_by_tag_name('a')
            for link in links:
                new_link = link.get_attribute("href")
                links_total.append(new_link)



for i in range(1, 23):
    first_links()


for link in links_total:
    print(link)

Sowjanya R Bhat · Answer 1 · 28 мая 2020

Попробуйте поймать было бы лучше

from selenium import webdriver
import time

from selenium.common.exceptions import ElementNotVisibleException, NoSuchElementException
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('C:/Users/../Downloads/cd79/chromedriver.exe', options=options)

driver.implicitly_wait(10)
# links_total = []
driver.get("https://www.cbp.gov/contact/find-broker-by-port?field_port_location_tid=All&field_port_code_value=")

def first_links(links_total=[]):
    initial_data = driver.find_elements_by_tag_name('td')
    for initial in initial_data:
        page_links = initial.find_elements_by_tag_name('a')
        for page in page_links:
            page_link = page.get_attribute("href")
            links_total.append(page_link)
    # driver.refresh()
    try:
      next_page = driver.find_element_by_partial_link_text('next')
      next_page.click()
      time.sleep(2)
      first_links(links_total) 
    except (TimeoutError, ElementNotVisibleException, NoSuchElementException):
      print("NEXT btn not found : ")
      pass

    return links_total

all_links = first_links()

for link in all_links:
    print(link)

На самом деле вам не нужно использовать Selenium. Вы можете сделать это с помощью Beautiful Soap вот так:

import requests
from bs4 import BeautifulSoup

page_num=0
url_cbp = r"https://www.cbp.gov/contact/find-broker-by-port?field_port_location_tid=All&field_port_code_value=&page={}"

def get_links(links_total=[], page_num=0):
  page = requests.get(url_cbp.format(page_num))
  soup = BeautifulSoup(page.content, 'html.parser')
  results = soup.find(id='region-content')

  table_cells = results.find_all('td', class_='views-field')
  for cell in table_cells:
    # print(cell )
    # print('\n\n')
    cell_link = cell.find('a')
    page_link = cell_link["href"]
    links_total.append(page_link)

  next_page = results.find('li', class_='pager-next')

  if next_page:
    page_num += 1
    get_links(links_total, page_num)

  return links_total

all_links = get_links()

for link in all_links:
  print(link)

если или попробуйте l oop для элемента на странице селен

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

1 Ответ

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

если или попробуйте l oop для элемента на странице селен

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

1 Ответ

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Похожие темы