Невозможно опубликовать запрос на URL с помощью Scrapy - PullRequest
0 голосов
/ 14 октября 2019

Вот фактический код:

class TestSpider(scrapy.Spider):
name = 'test'

start_urls = ['https://www.appraisers.org/find-an-appraiser']

def parse(self, response):
    viewstate=response.selector.xpath("//input[@name='__VIEWSTATE']/@value").extract_first()
    yield scrapy.FormRequest.from_response(response=response,formid="form1",formdata={'ctl07_TSSM':';Telerik.Sitefinity.Resources, Version=6.1.4700.0, Culture=neutral, PublicKeyToken=null:en:6b071be6-0037-40cd-9ddd-b4306dfd727b:7a90d6a:83fa35c7:850288ef','__EVENTTARGET':'','__EVENTARGUMENT':'','__LASTFOCUS':'','__VIEWSTATE':viewstate,'ctl00$Header$T8F41DF66014$ctl00$ctl00$searchTextBox':'','ctl00_RightSide_T11E1703E001_ctl00_ctl00_Breadcrumb_ClientState':'','ctl00$RightSide$FindAnAppraiser$txbxLastName':'','ctl00$RightSide$FindAnAppraiser$txbxFirstName':'','ctl00$RightSide$FindAnAppraiser$rdLocation':'rdState','ctl00$RightSide$FindAnAppraiser$ddlState':'50','ctl00$RightSide$FindAnAppraiser$ddlCountry':'222','ctl00$RightSide$FindAnAppraiser$ddlDiscipline':'','ctl00$RightSide$FindAnAppraiser$ddlSpecialty':'','ctl00$RightSide$FindAnAppraiser$ddlIndustry':'','ctl00$RightSide$FindAnAppraiser$txtKeyword':'','ctl00$RightSide$FindAnAppraiser$btnSearch':'Search'},callback=self.chk)
def chk(self,response):
    viewstategenerator=response.selector.xpath("//input[@name='__VIEWSTATEGENERATOR']/@value").extract_first()
    eventvalidation=response.selector.xpath("//input[@name='__EVENTVALIDATION']/@value").extract_first()
    viewstate=response.selector.xpath("//input[@name='__VIEWSTATE']/@value").extract_first()

    reqs=response.selector.xpath("//a/font[contains(text(),'Profile')]/parent::a/@href").extract()
    for req in reqs:
        req=req.replace("javascript:__doPostBack(",'')
        req=req.replace(")",'')
        req=req.split(",")
        req[0]=req[0].replace("'","")
        req[1]=req[1].replace("'","")
        yield scrapy.FormRequest.from_response(response=response,formid="form1",formdata={'ctl07_TSSM':';Telerik.Sitefinity.Resources, Version=6.1.4700.0, Culture=neutral, PublicKeyToken=null:en:6b071be6-0037-40cd-9ddd-b4306dfd727b:7a90d6a:83fa35c7:850288ef','__EVENTTARGET':req[0],'__EVENTARGUMENT':req[1],'__VIEWSTATE':viewstate,'__VIEWSTATEGENERATOR':viewstategenerator,'__VIEWSTATEENCRYPTED':'','__EVENTVALIDATION':eventvalidation,'ctl00$Header$T8F41DF66014$ctl00$ctl00$searchTextBox':'','ctl00_RightSide_T11E1703E001_ctl00_ctl00_Breadcrumb_ClientState':''},callback=self.get_data)
def get_data(self,response):
    address=response.selector.xpath("//span[contains(@id,'Addresses')]/text()").extract()
    print(address)

Я получаю успех с первым почтовым запросом, который отправляется в значение start_url. Но не удалось опубликовать запрос в функции chk (). Все правильно, но все равно не повезло. Кто-нибудь, кто может помочь?

...