Python Селен код для загрузки записи выдает ошибку - PullRequest
0 голосов
/ 01 мая 2020

Я загружаю записи с официального портала окружных судов Индии. Я разработал следующий код. Я не уверен, что это идеально, но это близко, так как мне почти удалось достичь цели. Это почти потому, что в некоторых случаях код переходит и не покрывает все параметры. Иногда он выдает ошибки, и мне приходится перезапускать его вручную. Теперь у меня есть вопросы: 1. Является ли код правильным для достижения цели? (загрузка записей ожидающих рассмотрения дел) 2. Могу ли я исправить это, чтобы избежать ошибок? Код вставлен ниже, сопровождаемый одной из ошибок, с которыми я иногда сталкиваюсь. Код:

District court website - select state - select district - services - case status - act - back
'''
import datetime
import cv2
import base64
from PIL import Image
from io import BytesIO
import time
import selenium
import self as self
from pytesseract import pytesseract
from selenium.webdriver.common.keys import Keys
import os
from selenium.webdriver.support import expected_conditions as EC, expected_conditions
from selenium.webdriver.support.select import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, TimeoutException, StaleElementReferenceException, \
    WebDriverException, ElementNotInteractableException

driver = selenium.webdriver.Firefox()
url = r'https://districts.ecourts.gov.in/'
driver.get(url)
# some definatons to be used latter in the code.

invalidCaptcha = "Invalid Captcha"
recordNotFound = "Record Not Found"
wait = WebDriverWait(driver, 180)
waitMid = WebDriverWait(driver, 20)
waitShort = WebDriverWait(driver, 5)


# functions

def district():
    global dateToday

    def complex_and_act():

        def captchcrack():
            def imgtotxt():
                elem = driver.find_element_by_id("captcha_image")
                loc = elem.location
                size = elem.size
                left = loc['x']
                top = loc['y']
                width = size['width']
                height = size['height']
                box = (int(left), int(top), int(left + width), int(top + height))
                screenshot = driver.get_screenshot_as_base64()
                img = Image.open(BytesIO(base64.b64decode(screenshot)))
                area = img.crop(box)
                area.save('/home/sangharshmanuski/Documents/e_courts/captcha/file_trial.png', 'PNG')
                fullPath = r'/home/sangharshmanuski/Documents/e_courts/captcha'
                f = os.listdir(fullPath)
                desPath = r"/home/sangharshmanuski/Documents/e_courts/editC"
                img = cv2.imread(os.path.join(fullPath, 'file_trial.png'), 0)
                ret, thresh1 = cv2.threshold(img, 111, 255, cv2.THRESH_BINARY)
                cv2.imwrite('/home/sangharshmanuski/Documents/e_courts/editC/oneDisNoLoop.png', thresh1)
                # know the text with pytesseract
                captchaText = pytesseract.image_to_string(
                    Image.open('/home/sangharshmanuski/Documents/e_courts/editC/oneDisNoLoop.png'))
                captcha = driver.find_element_by_id('captcha')
                captcha.send_keys(captchaText)
                driver.find_element_by_css_selector('input.button:nth-child(1)').click()
                time.sleep(3)

            def accept():

                while True:
                    try:
                        driver.switch_to.alert.accept()
                        driver.switch_to.window(driver.current_window_handle[-1])
                        driver.find_element_by_css_selector(
                            '#captcha_container_2 > div:nth-child('
                            '1) > div:nth-child(1) > span:nth-child(3) > a:nth-child(7) > img:nth-child(1)').click()
                        print('alert was present')
                        imgtotxt()
                    except:
                        print('no alret')
                        break

            imgtotxt()
            accept()

        def incorrectcaptcha():
            while driver.find_element_by_css_selector('#waitmsg').is_displayed():
                i = 0
                if i < 20:
                    time.sleep(1)

            else:
                while driver.find_element_by_css_selector('#errSpan > p:nth-child(1)').is_displayed():
                    incorrect = driver.find_element_by_css_selector('#errSpan > p:nth-child(1)').text
                    if incorrect == invalidCaptcha:
                        print('invalid captcha')
                        captchcrack()
                    else:
                        return print('captcha cracked correctly')

        def record():
            if driver.find_element_by_css_selector(
                    'a.someclass').is_displayed():
                listAllView = driver.find_elements_by_css_selector('a.someclass')
                print('downloading the record ' + nameCourtComp)
                # make new dirctory by name of Court Complex
                distDir2 = os.path.join(
                    '/home/sangharshmanuski/Documents/e_courts/mha/downloads3',
                    newDistNameDict, nameCourtComp)
                if not os.path.exists(distDir2):
                    os.makedirs(distDir2)
                x = 0
                for view in listAllView:
                    try:
                        view.click()
                        wait.until(EC.presence_of_element_located((By.ID, 'back_top')))
                        openFile = open(
                            os.path.join(distDir2, "file_" + str(x) + ".html"), "w")
                        openFile.write(driver.page_source)
                        openFile.close()
                        back = driver.find_element_by_id('back_top')
                        back.click()
                        x += 1
                    except (TimeoutException, ElementNotInteractableException):
                        driver.refresh()
                        wait.until(
                            EC.presence_of_element_located((
                                By.CSS_SELECTOR,
                                '#captcha_container_2 > div:nth-child(1) > div:nth-child('
                                '1) > span:nth-child(3) > a:nth-child(7) > img:nth-child(1)')))
                        driver.find_element_by_css_selector('input.button:nth-child(2)').click()
                        nonlocal courtComp
                        courtComp = courtComp - 1
                        return print(
                            'While Downloading record for '
                            + nameCourtComp + ' error occured, retrying now...')
                else:
                    time.sleep(2)
                    driver.find_element_by_css_selector('input.button:nth-child(2)').click()

        courtComp = 1
        courtComplexDownload = Select(driver.find_element_by_css_selector('#court_complex_code'))
        courtComplexDownloadList = courtComplexDownload.options
        courtComplexLen = len(courtComplexDownloadList)
        while courtComp < courtComplexLen:
            nameCourtComp = courtComplexDownloadList[courtComp].text
            courtComplexDownload.select_by_index(courtComp)
            try:
                wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#actcode > option:nth-child(1791)')))
                driver.find_element_by_css_selector('#actcode > option:nth-child(1791)').click()
            except (NoSuchElementException, TimeoutException):
                print(nameCourtComp + ' has no option for PoA')
                courtComp += 1
                continue

            captchcrack()
            incorrectcaptcha()
            while driver.find_element_by_css_selector('#waitmsg').is_displayed():
                time.sleep(1)
            if driver.find_element_by_css_selector('#errSpan > p:nth-child(1)').is_displayed():
                courtComp += 1
                continue
            else:
                record()
                courtComp += 1
                continue

    districtListDropdown = Select(driver.find_element_by_css_selector("#sateist"))
    distOptions = districtListDropdown.options
    lenOpts = len(distOptions) - int(1)
    i = 1
    while i <= lenOpts:

            wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#sateist')))
            # change the name latter. set it to format as per name of the state.
            newDistDropDown = Select(driver.find_element_by_css_selector("#sateist"))
            newDistOptions = newDistDropDown.options
            dateToday = datetime.datetime.now()
            newDistName = newDistOptions[i].text
            newDistNameDict = newDistName + '_' + str(dateToday.day) + '_' + str(dateToday.month) + '_' + str(
                dateToday.year)
            newDistDropDown.select_by_index(i)
            distDir = os.path.join('/home/sangharshmanuski/Documents/e_courts/mha/downloads3', newDistNameDict)
            if not os.path.exists(distDir):
                os.mkdir(distDir)
            # wait for new District Court page to upload fully.
            headingDist = driver.find_element_by_css_selector('.heading')
            if headingDist.text.lower() == newDistName.lower():
                wait.until(
                    EC.element_to_be_clickable((By.CSS_SELECTOR, 'button.accordion2:nth-child(2)'))).click()
                current = driver.window_handles[0]
                wait.until(EC.element_to_be_clickable(
                    (By.CSS_SELECTOR,
                     'div.panel:nth-child(3) > ul:nth-child(1) > li:nth-child(6) > a:nth-child(1)'))).click()
                # wait until new tab opens.
                wait.until(EC.number_of_windows_to_be(2))
                # define new tab by differentiating from current tab.
                newWindow = [window for window in driver.window_handles if window != current][0]
                # switch to the new tab. ref: https://stackoverflow.com/questions/41571217/python-3-5-selenium-how-to-handle-a-new-window-and-wait-until-it-is-fully-lo
                driver.switch_to.window(newWindow)
                # wait till court complex list appears.
                wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#court_complex_code')))
                # create list of all court complex.
                # 2 approaches - 1 select 2 click.
                time.sleep(5)
                complex_and_act()
                driver.close()
                print("all court complexes in " + newDistName + " completed")
                driver.switch_to.window(current)
                driver.back()

            else:
                time.sleep(5)
                continue
            i += 1

    else:
        return print("all districts completed")


wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#sateist > option:nth-child(22)")))
select = Select(driver.find_element_by_css_selector('#sateist'))
options = select.options
select.select_by_visible_text('Maharashtra')
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.region')))
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#sateist')))

district()

Сообщение об ошибке:

 > Traceback (most recent call last):   File
    > "/home/sangharshmanuski/Documents/e_courts/code_2/function_3.py", line
    > 234, in <module>
    >     district()   File "/home/sangharshmanuski/Documents/e_courts/code_2/function_3.py", line
    > 212, in district
    >     complex_and_act()   File "/home/sangharshmanuski/Documents/e_courts/code_2/function_3.py", line
    > 163, in complex_and_act
    >     incorrectcaptcha()   File "/home/sangharshmanuski/Documents/e_courts/code_2/function_3.py", line
    > 99, in incorrectcaptcha
    >     while driver.find_element_by_css_selector('#errSpan > p:nth-child(1)').is_displayed():   File
    > "/usr/local/lib/python3.7/dist-packages/selenium/webdriver/remote/webdriver.py",
    > line 598, in find_element_by_css_selector
    >     return self.find_element(by=By.CSS_SELECTOR, value=css_selector)   File
    > "/usr/local/lib/python3.7/dist-packages/selenium/webdriver/remote/webdriver.py",
    > line 978, in find_element
    >     'value': value})['value']   File "/usr/local/lib/python3.7/dist-packages/selenium/webdriver/remote/webdriver.py",
    > line 321, in execute
    >     self.error_handler.check_response(response)   File "/usr/local/lib/python3.7/dist-packages/selenium/webdriver/remote/erroTraceback (most recent call last):   File "/home/sangharshmanuski/Documents/e_courts/code_2/function_3.py", line 234, in <module>
    district()   File "/home/sangharshmanuski/Documents/e_courts/code_2/function_3.py", line 212, in district
    complex_and_act()   File "/home/sangharshmanuski/Documents/e_courts/code_2/function_3.py", line 163, in complex_and_act
    incorrectcaptcha()   File "/home/sangharshmanuski/Documents/e_courts/code_2/function_3.py", line 99, in incorrectcaptcha
    while driver.find_element_by_css_selector('#errSpan > p:nth-child(1)').is_displayed():   File "/usr/local/lib/python3.7/dist-packages/selenium/webdriver/remote/webdriver.py", line 598, in find_element_by_css_selector
    return self.find_element(by=By.CSS_SELECTOR, value=css_selector)   File "/usr/local/lib/python3.7/dist-packages/selenium/webdriver/remote/webdriver.py", line 978, in find_element
    'value': value})['value']   File "/usr/local/lib/python3.7/dist-packages/selenium/webdriver/remote/webdriver.py", line 321, in execute
    self.error_handler.check_response(response)   File "/usr/local/lib/python3.7/dist-packages/selenium/webdriver/remote/errorhandler.py", line 242, in check_response
    raise exception_class(message, screen, stacktrace) selenium.common.exceptions.NoSuchElementException: Message: Unable to locate element: #errSpan > p:nth-child(1) rhandler.py",
    > line 242, in check_response
    >     raise exception_class(message, screen, stacktrace) selenium.common.exceptions.NoSuchElementException: Message: Unable to
    > locate element: #errSpan > p:nth-child(1)

Надеюсь, вопрос здесь уместен. Если нет, пожалуйста, дайте мне знать, я удалю его. Заранее спасибо!

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...