Итак, я хотел проверить себя и попытаться сделать Scrapy больше похожим на Requests. главным образом для простоты использования, но я сталкиваюсь с ошибкой ниже, и я прочитал ее 10 раз и даже изменил некоторые вещи, так что теперь я обращаюсь к вам, ребята, чтобы выяснить, можете ли вы понять это. Я хочу, чтобы функция взяла входной URL и некоторые аргументы и вернула ответ сайта.
Код:
import fake_useragent
import time
import scrapy
from scrapy.crawler import CrawlerRunner
from twisted.internet import reactor
def Request(*args, **kwargs):
def RequestWrapper(*args, **kwargs):
class RequestControl(scrapy.Request):
settings_kwargs = {}
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
self.settings_kwargs = self.get_kwargs('FUA')
self.RequestSpider = self.subclass_container()
runner = CrawlerRunner()
d = runner.crawl(self.RequestSpider, *self.args, **self.kwargs)
d.addBoth(self.sleep)
d.addBoth(lambda _: reactor.stop())
# process = CrawlerProcess({})
# process.crawl(self._Request, *self.args, **self.kwargs)
# process.start()
def get_response(self):
return self.response
def get_kwargs(self, *args):
items = []
for arg in args:
items.append(True if self.kwargs.get(arg) == True else False)
if self.kwargs.get(arg) != None:
del self.kwargs[arg]
return dict(zip(args, items))
def get_settings_kwargs(self):
return self.settings_kwargs
def sleep(self, _, duration=5):
print(f'sleeping for: {duration}')
time.sleep(duration)
def subclass_container(self):
outer_class = self
class RequestSpider(scrapy.Spider):
name = '_Request'
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
self.settings_kwargs = outer_class.get_settings_kwargs()
if self.settings_kwargs.get('FUA') == True:
if self.kwargs.get('headers') != None:
self.kwargs['headers']['User-Agent'] = fake_useragent.UserAgent().chrome
else:
self.kwargs['headers'] = {}
self.kwargs['headers']['User-Agent'] = fake_useragent.UserAgent().chrome
print(self.kwargs)
def start_requests(self):
yield scrapy.Request(*self.args, **self.kwargs)
def parse(self, response):
outer_class.response: scrapy.Request = response
return RequestSpider
rc = RequestControl(*args, **kwargs)
return rc.get_response()
result = RequestWrapper(*args, **kwargs)
reactor.run()
return result
if __name__ == '__main__':
response: scrapy.Request = Request('https://webscraper.io/test-sites', FUA=True)
print(response.body)
Ошибка:
Connected to pydev debugger (build 193.5662.61)
{'headers': {'User-Agent': 'Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36'}}
Traceback (most recent call last):
File "C:\Users\<username>\AppData\Local\JetBrains\Toolbox\apps\PyCharm-C\ch-0\193.5662.61\plugins\python-ce\helpers\pydev\pydevd.py", line 1434, in _exec
pydev_imports.execfile(file, globals, locals) # execute the script
File "C:\Users\<username>\AppData\Local\JetBrains\Toolbox\apps\PyCharm-C\ch-0\193.5662.61\plugins\python-ce\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "F:/Python/Projects/SleepyCraft/CurseSpider.py", line 67, in <module>
cs.run()
File "F:/Python/Projects/SleepyCraft/CurseSpider.py", line 59, in run
response: scrapy.Request = Request('https://webscraper.io/test-sites', FUA=True)
File "F:\Python\Projects\SleepyCraft\ScrapyUtil.py", line 73, in Request
result = RequestWrapper(*args, **kwargs)
File "F:\Python\Projects\SleepyCraft\ScrapyUtil.py", line 72, in RequestWrapper
return rc.get_response()
File "F:\Python\Projects\SleepyCraft\ScrapyUtil.py", line 28, in get_response
return self.response
AttributeError: 'RequestControl' object has no attribute 'response'