Я пытаюсь войти, используя следующий код:
# -*- coding: utf-8 -*-
from datetime import datetime
from requests_toolbelt import MultipartEncoder
import traceback
import scrapy
import Utils.GeneralUtils as utils
import Utils.DbUtils as du
import dateparser as dp
from datetime import datetime
import math
DEBUG = False
class InnerException(Exception):
'''
workaround for inner exception of parse_insert_comment
only for parse_post_page to use to keep raising
exception as i.args[0]
some unregistered user will cause user_name.strip()
NoneType error
Ignore those users / comments / posts
'''
pass
class WbcSpider(scrapy.Spider):
'''
yield_dict: mandantory keys
db_handler
meta_dict
error
result
'''
name = 'wbc'
def __init__(self):
super().__init__()
self.start_ric_url_dict = {
'wbc': 'https://hotcopper.com.au/asx/wbc/discussion/?post_view=0'
}
# flags' dict for determing whether keep scraping
self.comment_cont_dict = dict()
self.post_cont_dict = dict()
self.max_old_num = 5
self.stop_date_flag = dp.parse('2018-12-31')
self.scrapy_meta_keys = [
'depth', 'download_timeout', 'download_slot', 'download_latency', '_id'
]
# self.exchange = ExchangeParser()
# private
# if self.exchange.is_multi_source_exchange:
# self.latest_date = utils.create_date_time_tzinfo('30 DEC 2017',
# self.exchange.tzinfo)
def start_requests(self):
form_data = {
'login': '***',
'password': '***',
'remember': '1',
'cookie_check': '1',
'tos': '1',
'redirect': 'https://hotcopper.com.au/',
'_xfToken': '',
'_xfResponseType': 'json',
'_xfRequestUri': '/login/'
}
boundary = '----WebKitFormBoundaryKJHwhAZcqb1VwurG'
encoder = MultipartEncoder(form_data, boundary)
headers = {
'Content-Type':
'multipart/form-data; boundary=----WebKitFormBoundaryKJHwhAZcqb1VwurG',
'Content-Length':
encoder.len
}
yield scrapy.Request(
url='https://hotcopper.com.au/login/login',
method='POST',
body=encoder.to_string(),
headers=headers,
callback=self.after_login,
dont_filter=True)
def after_login(self, response):
from scrapy.shell import inspect_response
inspect_response(response, self)
for ric, url in self.start_ric_url_dict.items():
yield scrapy.Request(url, callback=self.parse_forum_page)
if DEBUG: # only one market
break
Я перешел по этой ссылке Как правильно сделать запрос к форме "multipart / form-data" с помощью Scrapy? для запроса POST multipart / form. Тем не менее, Scrap возвращает мне эту ошибку:
2019-06-07 01:24:14 [scrapy.spidermiddlewares.httperror] INFO: Ignoring response <400 https://hotcopper.com.au/login/login>: HTTP status code is not handled or not allowed
Какую ошибку я совершил?