Я пытаюсь войти, используя python scrapy. Но это не работает.
Для справки
import quotes as q
import loginspidernew as login
import scrapy
from scrapy.crawler import CrawlerProcess
class ValidateURL:
def checkURL(self,urls):
try:
if(urls):
for key, value in urls.items():
if value['login_details']:
self.runScrap(value)
except:
return False
def runScrap(self,data):
if data:
process = CrawlerProcess()
process.crawl(login.LoginSpider,login_url='http://quotes.toscrape.com/login', start_urls=[data['url']], credentials=data)
process.start()
from scrapy.http import Request, FormRequest
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
# from scrapy.selector import HtmlXPathSelector
from scrapy.http import FormRequest
import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy.utils.response import open_in_browser
from bs4 import BeautifulSoup
class LoginSpider(CrawlSpider):
name = 'loginspider'
def init_request(self):
return Request(url=self.login_url, callback=self.start_requests)
def start_requests(self):
print ("\n start_request is here \n")
yield Request(
url = self.login_url,
callback = self.login,
dont_filter = True
)
def fetch_form_data(self,response):
if all(field in response.text for field in self.credentials['fields_in_response']):
inputs =response.xpath('//form//input').extract()
soup_dict={}
for key,i in enumerate(inputs):
soup = BeautifulSoup(i, 'html.parser')
inp_type = soup.input['type'] if soup.input.has_attr('type') else None
inp_value = soup.input['value'] if soup.input.has_attr('value') else None
inp_name = soup.input['name'] if soup.input.has_attr('name') else None
soup_dict[key]= {'name':inp_name,'value':inp_value,'type':inp_type}
login_cred= self.credentials['login_details']
form_data={}
for key,value in soup_dict.items():
if value['name'] != None and value['type'] == 'text':
form_data[value['name']]=login_cred['name']
elif value['name'] != None and value['type'] == 'password':
form_data[value['name']]=login_cred['pwd']
elif value['name'] != None and value['type'] == 'hidden':
form_data[value['name']]=value['value']
else:
pass
return form_data
def login(self, response):
print ("\n Login is here! \n")
formdata=self.fetch_form_data(response)
return FormRequest.from_response(response,
formdata,
callback=self.check_login_response)
def check_login_response(self, response):
open_in_browser(response)
print(type(self.credentials['fields_in_main_page']))
print ("\n Check_login_response \n")
if all(field in response.text for field in self.credentials['fields_in_main_page']):
print("Worked, logged in")
#return self.parse_item
else:
print("Not logged in")
return
Код выполняется правильно, и в пауке нет ошибок. Я думаю, что это связано с сайтом. Могу ли я узнать, чего не хватает и как я могу преодолеть эту проблему. Даже после того, как я пытаюсь войти через браузер, он показывает неверный токен csrf. Нужно ли что-нибудь добавить, дайте мне знать.