Очистка данных на основе автоматического доступа к нескольким страницам на основе href - PullRequest
1 голос
/ 04 июня 2019

У меня проблема с автоматизацией нескольких страниц с помощью селена webdriver и python. В моем коде я автоматически нажимаю на страницы до 10 страниц, но после 10 страниц это не сработает. Я не получаю страницу, нажимающую после страницы № 11.

import urllib.request
from bs4 import BeautifulSoup
import csv
import os
from selenium import webdriver
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
import os



url = 'http://www.igrmaharashtra.gov.in/eASR/eASRCommon.aspx? 
hDistName=Buldhana'
chrome_path = r'C:/Users/User/AppData/Local/Programs/Python/Python36/Scripts/chromedriver.exe'
d = webdriver.Chrome(executable_path=chrome_path)
d.implicitly_wait(10)
d.get(url)



Select(d.find_element_by_name('ctl00$ContentPlaceHolder5$ddlTaluka')).select_by_value('7')
Select(d.find_element_by_name('ctl00$ContentPlaceHolder5$ddlVillage')).select_by_value('1464') 
 page = [page.get_attribute('href')for page in 
 d.find_elements_by_css_selector( 
 "#ctl00_ContentPlaceHolder5_grdUrbanSubZoneWiseRate [href*='Page$']")]

while True:
         pages = [page.get_attribute('href')for page in 
         d.find_elements_by_css_selector( 
         "#ctl00_ContentPlaceHolder5_grdUrbanSubZoneWiseRate 
         [href*='Page$']")]



         for script_page in pages:
            d.execute_script(script_page)
            #print(script_page)

1 Ответ

0 голосов
/ 04 июня 2019

Попробуйте использовать индексы страниц и проверьте, доступна ли страница, и вам нужно нажать на каждую страницу и перейти. Попробуйте следующий код.

from selenium import webdriver

url = 'http://www.igrmaharashtra.gov.in/eASR/eASRCommon.aspx?hDistName=Buldhana'
chrome_path = r'C:/Users/User/AppData/Local/Programs/Python/Python36/Scripts/chromedriver.exe'
d = webdriver.Chrome(executable_path=chrome_path)
d.implicitly_wait(10)
d.get(url)
Select(d.find_element_by_name('ctl00$ContentPlaceHolder5$ddlTaluka')).select_by_value('7')
Select(d.find_element_by_name('ctl00$ContentPlaceHolder5$ddlVillage')).select_by_value('1464')
i=2
while True:
    if len(d.find_elements_by_css_selector("#ctl00_ContentPlaceHolder5_grdUrbanSubZoneWiseRate a[href*='Page${}']".format(i)))>0:
        print( d.find_elements_by_css_selector("#ctl00_ContentPlaceHolder5_grdUrbanSubZoneWiseRate a[href*='Page${}']".format(i))[0].get_attribute('href'))
        d.find_elements_by_css_selector("#ctl00_ContentPlaceHolder5_grdUrbanSubZoneWiseRate a[href*='Page${}']".format(i))[0].click()
        i+=1
    else:
        break

Выход: Так как я начал со страницы 2.


javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$2')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$3')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$4')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$5')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$6')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$7')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$8')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$9')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$10')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$11')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$12')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$13')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$14')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$15')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$16')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$17')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$18')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$19')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$20')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$21')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$22')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$23')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$24')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$25')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$26')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$27')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$28')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$29')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$30')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$31')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$32')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$33')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$34')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$35')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$36')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$37')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$38')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$39')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$40')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$41')
javascript:__doPostBack('ctl00$ContentPlaceHolder5$grdUrbanSubZoneWiseRate','Page$42')

Process finished with exit code 0
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...