промежуточное ПО загрузчика scrapy настроить - PullRequest
0 голосов
/ 22 февраля 2020
from urllib.parse import urlencode
def get_url(url):
    payload = {'api_key': 'MY_API', 'url': url,
               }
    proxy_url = 'http://api.scraperapi.com/?' + urlencode(payload)
    return proxy_url

Как мне отредактировать мой request URL-адрес на go через выше в моем промежуточном программном обеспечении

def process_request(self, request, spider):
    #request.url = get_url(request.url)
    url = get_url(request.url)
    request=request.replace(url=url)
    # Called for each request that goes through the downloader
    # middleware.

    # Must either:
    # - return None: continue processing this request
    # - or return a Response object
    # - or return a Request object
    # - or raise IgnoreRequest: process_exception() methods of
    #   installed downloader middleware will be called
    return request

Я получаю эту ошибку

Traceback (most recent call last):
  File "/home/timmy/.local/lib/python3.7/site-packages/twisted/internet/defer.py", line 1418, in _inlineCallbacks
    result = g.send(result)
  File "/home/timmy/.local/lib/python3.7/site-packages/scrapy/core/downloader/middleware.py", line 38, in process_request
    response = yield method(request=request, spider=spider)
  File "/home/timmy/test/middlewares.py", line 79, in process_request
    request=request.replace(url=url)
  File "/home/timmy/.local/lib/python3.7/site-packages/scrapy/http/request/__init__.py", line 106, in replace
    return cls(*args, **kwargs)
  File "/home/timmy/.local/lib/python3.7/site-packages/scrapy/http/request/__init__.py", line 40, in __init__
    self.headers = Headers(headers or {}, encoding=encoding)
  File "/home/timmy/.local/lib/python3.7/site-packages/scrapy/http/headers.py", line 12, in __init__
    super(Headers, self).__init__(seq)
  File "/home/timmy/.local/lib/python3.7/site-packages/scrapy/utils/datatypes.py", line 200, in __init__
    self.update(seq)
  File "/home/timmy/.local/lib/python3.7/site-packages/scrapy/utils/datatypes.py", line 234, in update
    seq = seq.items() if isinstance(seq, Mapping) else seq
  File "/home/timmy/.local/lib/python3.7/site-packages/scrapy/http/headers.py", line 71, in items
    return list(self.iteritems())
  File "/home/timmy/.local/lib/python3.7/site-packages/scrapy/http/headers.py", line 74, in <genexpr>
    return ((k, self.getlist(k)) for k in self.keys())
  File "/home/timmy/.local/lib/python3.7/site-packages/scrapy/http/headers.py", line 53, in getlist
    return super(Headers, self).__getitem__(key)
  File "/home/timmy/.local/lib/python3.7/site-packages/scrapy/utils/datatypes.py", line 203, in __getitem__
    return dict.__getitem__(self, self.normkey(key))
  File "/home/timmy/.local/lib/python3.7/site-packages/scrapy/http/headers.py", line 16, in normkey
    return self._tobytes(key.title())
  File "/home/timmy/.local/lib/python3.7/site-packages/scrapy/http/headers.py", line 30, in _tobytes
    if isinstance(x, bytes):
RecursionError: maximum recursion depth exceeded while calling a Python object

Как изменить URL-адрес запроса перед отправкой запроса в scrapy?

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...