Я пытаюсь удалить информацию о курсах из udemy в python селен, но у меня есть некоторые ошибки в циклах моего кода - PullRequest
0 голосов
/ 06 мая 2020

У меня есть некоторые ошибки, и код не извлекает все данные внутри циклов

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from time import*
import xlsxwriter
import pprint

link= "https://www.udemy.com/courses/business/home-business/"

output_file= "Udemy_output1.xlsx"

wb= xlsxwriter.Workbook(output_file)
ws= wb.add_worksheet("sheet1")
row= 0
col= 0

wd= webdriver.Firefox(executable_path="./geckodriver")
wd.get(link)
sleep(2)
time=1
ac= ActionChains(wd)

for each_course in wd.find_elements_by_xpath("//*[@class='list-view-course-card--course-card-wrapper--TJ6ET']"):
    sleep(2)
    each_course.click()
    wd.find_elements_by_tag_name("body")
    ac.send_keys(Keys.COMMAND + 't').perform()
    sleep(2)
    url= wd.current_url
    ws.write(row,col, url)
    sleep(4)
    title= wd.find_element_by_tag_name("h1")
    ptitle= title.text
    ws.write(row,col+1, ptitle)
    sleep(4)
    subtitle= wd.find_element_by_class_name("clp-lead__headline")
    psubtitle= subtitle.text
    ws.write(row,col+2, psubtitle)
    sleep(4)
    avg_rat= wd.find_element_by_xpath("/html/body/div[2]/div[3]/div[5]/div/div[2]/div[1]/div/div/div[2]/div[2]/div/div/div[2]/span/span")
    pavg_rat= avg_rat.text
    ws.write(row,col+3, pavg_rat)
    sleep(4)
    rating= wd.find_element_by_xpath('/html/body/div[2]/div[3]/div[5]/div/div[2]/div[1]/div/div/div[2]/div[2]/div/div/div[2]/span')
    prating= rating.text
    ws.write(row,col+4, prating)
    sleep(4)
    studint_enrol= wd.find_element_by_xpath('/html/body/div[2]/div[3]/div[5]/div/div[2]/div[1]/div/div/div[2]/div[3]/div/div')
    pstudint_enrol=studint_enrol.text
    ws.write(row,col+5, pstudint_enrol)
    sleep(4)
    price= wd.find_element_by_xpath('//*[@id="udemy"]/div[2]/div[3]/div[5]/div/div[2]/div[2]/div/div[1]/div[2]/div/div[1]/div/div[3]/div/div/div/div[1]/span[2]/span')
    pprice=price.text
    ws.write(row,col+6, pprice)
    sleep(4)
    auther= wd.find_element_by_class_name("instructor-links__link")
    pauther= auther.text
    ws.write(row,col+7, pauther)
    sleep(4)
    last_update= wd.find_element_by_class_name("last-update-date")
    plast_update= last_update.text
    ws.write(row,col+8, plast_update[plast_update.index("t"): ])
    sleep(4)
    language= wd.find_element_by_class_name("clp-lead__locale")
    planguage= language.text
    ws.write(row,col+9, planguage)
    sleep(4)
    will_learn= wd.find_elements_by_xpath('//span[@class="what-you-get__text"]')
    lines= ""
    for each_line in will_learn:
        lin= each_line.text
        lines.join(lin + ", ")
        sleep(4)
    ws.write(row,col+10, lines)
    requirements= wd.find_elements_by_xpath("requirements__item")
    reqlines= ""
    for each_requirement in requirements:
        texts= each_requirement.text
        reqlines.join(texts + ", ")
        sleep(4)
    ws.write(row,col+11, reqlines)
    on_demand= wd.find_element_by_class_name("incentives__text")
    pon_demand= on_demand.text
    ws.write(row,col+12,pon_demand[: pon_demand.index("h")])
    sleep(4)
    articals= wd.find_element_by_class_name("incentives__text")
    particls= articals.text
    ws.write(row,col+13, particls)#[: particls.index("a")])
    sleep(4)
    downloads= wd.find_element_by_class_name("incentives__text")
    pdownloads= downloads.text
    ws.write(row,col+14, pdownloads)#[: pdownloads.index("d")])
    sleep(4)
    row+= 1
    time+= 1
    wd.back()
    sleep(5)


next_page= wd.find_element_by_xpath('/html/body/div[2]/div[3]/div/div/div[5]/div[3]/ul/li[8]/a')
sleep(4)
next_page.click()

wb.close()
wd.close()

и это то, что у меня здесь

Traceback (most recent call last):
  File "/home/bakrimohandess/Desktop/news/udemy.py", line 27, in <module>
    each_course.click()
  File "/home/bakrimohandess/.local/lib/python3.6/site-packages/selenium/webdriver/remote/webelement.py", line 85, in click
    self._execute(Command.CLICK_ELEMENT)
  File "/home/bakrimohandess/.local/lib/python3.6/site-packages/selenium/webdriver/remote/webelement.py", line 693, in _execute
    return self._parent.execute(command, params)
  File "/home/bakrimohandess/.local/lib/python3.6/site-packages/selenium/webdriver/remote/webdriver.py", line 338, in execute
    self.error_handler.check_response(response)
  File "/home/bakrimohandess/.local/lib/python3.6/site-packages/selenium/webdriver/remote/errorhandler.py", line 240, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: Web element reference not seen before: a9fc602e-7ba1-4eef-83e0-a6c5bab6ba1b
Stacktrace:
WebDriverError@chrome://marionette/content/error.js:175:5
NoSuchElementError@chrome://marionette/content/error.js:387:5
get@chrome://marionette/content/element.js:222:13
clickElement@chrome://marionette/content/listener.js:1330:22

сначала, модуль щелчка не делает ' t работает после первого l oop и внутренние циклы для извлечения того, что вы узнаете, и требования не извлекают ничего, я сделал что-то не так в моем коде или какую небольшую информацию он мог бы помочь, а у вас нет для объяснения будет достаточно одного слова, я могу поискать и протестировать

...