Как написать DownloadHandler для scrapy, который делает запросы socks4 через txsocksx - PullRequest
1 голос
/ 18 марта 2019

Я работаю над проектом колледжа, но мне нужно, чтобы приведенный ниже код работал с socks4 вместо tor / socks5. Я попытался изменить SOCKS5Agent до SOCKS4Agent, но затем я получаю сообщение об ошибке:

Оригинальный код: https://stackoverflow.com/a/33944924/11219616

Мой код:

import scrapy.core.downloader.handlers.http11 as handler
from twisted.internet import reactor
from txsocksx.http import SOCKS4Agent
from twisted.internet.endpoints import TCP4ClientEndpoint
from scrapy.core.downloader.webclient import _parse


class TorScrapyAgent(handler.ScrapyAgent):
    _Agent = SOCKS4Agent

    def _get_agent(self, request, timeout):
        proxy = request.meta.get('proxy')

        if proxy:
            proxy_scheme, _, proxy_host, proxy_port, _ = _parse(proxy)

            if proxy_scheme == 'socks4':
                endpoint = TCP4ClientEndpoint(reactor, proxy_host, proxy_port)

                return self._Agent(reactor, proxyEndpoint=endpoint)

        return super(TorScrapyAgent, self)._get_agent(request, timeout)


class TorHTTPDownloadHandler(handler.HTTP11DownloadHandler):
    def download_request(self, request, spider):
        agent = TorScrapyAgent(contextFactory=self._contextFactory, pool=self._pool,
                               maxsize=getattr(spider, 'download_maxsize', self._default_maxsize),
                               warnsize=getattr(spider, 'download_warnsize', self._default_warnsize))

        return agent.download_request(request)

Я получаю ошибку:

Traceback (most recent call last):
File "C:\Python27\lib\site-packages\twisted\internet\defer.py", line 1416, in _inlineCallbacks
    result = result.throwExceptionIntoGenerator(g)
File "C:\Python27\lib\site-packages\twisted\python\failure.py", line 491, in throwExceptionIntoGenerator
    return g.throw(self.type, self.value, self.tb)
File "C:\Python27\lib\site-packages\scrapy\core\downloader\middleware.py", line 43, in process_request
    defer.returnValue((yield download_func(request=request,spider=spider)))
File "C:\Python27\lib\site-packages\ometa\protocol.py", line 53, in dataReceived
    self._parser.receive(data)
File "C:\Python27\lib\site-packages\ometa\tube.py", line 41, in receive
    status = self._interp.receive(data)
File "C:\Python27\lib\site-packages\ometa\interp.py", line 48, in receive
    for x in self.next:
File "C:\Python27\lib\site-packages\ometa\interp.py", line 177, in apply
    for x in self._apply(f, ruleName, argvals):
File "C:\Python27\lib\site-packages\ometa\interp.py", line 110, in _apply
    for x in rule():
File "C:\Python27\lib\site-packages\ometa\interp.py", line 256, in parse_Or
    for x in self._eval(subexpr):
File "C:\Python27\lib\site-packages\ometa\interp.py", line 241, in parse_And
    for x in self._eval(subexpr):
File "C:\Python27\lib\site-packages\ometa\interp.py", line 440, in parse_Action
    val = eval(expr.data, self.globals, self._localsStack[-1])
File "<string>", line 1, in <module>
File "C:\Python27\lib\site-packages\txsocksx\client.py", line 276, in serverResponse
    raise e.socks4ErrorMap.get(status)()
RequestRejectedOrFailed
...