def parse(self,response):
my_item={'test':123,'test2':321}
google_url = 'https://www.google.com/search?q=coffee+cans'
yield Request(url=google_url,callback=self.google,meta={'my_item':my_item})
def google(self,response):
my_item = response.meta['my_item']
rules = (
Rule(LinkExtractor(restrict_xpaths='//div[@class="r"]/a',allow='/dp',allow_domains='chewy.com'),
callback="chewy"),
Rule(LinkExtractor(restrict_xpaths='//div[@class="r"]/a',allow='/p/',allow_domains='homedepot.com'),
process_request=request.meta['my_item']=my_item,callback='homedepot')
)
def homedepot(self,response):
#my_item = response.meta['my_item']
сообщение об ошибке:
Traceback (most recent call last):
File "/home/timmy/.local/bin/scrapy", line 11, in <module>
sys.exit(execute())
File "/home/timmy/.local/lib/python3.6/site-packages/scrapy/cmdline.py", line 149, in execute
cmd.crawler_process = CrawlerProcess(settings)
File "/home/timmy/.local/lib/python3.6/site-packages/scrapy/crawler.py", line 251, in __init__
super(CrawlerProcess, self).__init__(settings)
File "/home/timmy/.local/lib/python3.6/site-packages/scrapy/crawler.py", line 137, in __init__
self.spider_loader = _get_spider_loader(settings)
File "/home/timmy/.local/lib/python3.6/site-packages/scrapy/crawler.py", line 338, in _get_spider_loader
return loader_cls.from_settings(settings.frozencopy())
File "/home/timmy/.local/lib/python3.6/site-packages/scrapy/spiderloader.py", line 61, in from_settings
return cls(settings)
File "/home/timmy/.local/lib/python3.6/site-packages/scrapy/spiderloader.py", line 25, in __init__
self._load_all_spiders()
File "/home/timmy/.local/lib/python3.6/site-packages/scrapy/spiderloader.py", line 47, in _load_all_spiders
for module in walk_modules(name):
File "/home/timmy/.local/lib/python3.6/site-packages/scrapy/utils/misc.py", line 71, in walk_modules
submod = import_module(fullpath)
File "/usr/lib/python3.6/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
File "<frozen importlib._bootstrap>", line 994, in _gcd_import
File "<frozen importlib._bootstrap>", line 971, in _find_and_load
File "<frozen importlib._bootstrap>", line 955, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 665, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 674, in exec_module
File "<frozen importlib._bootstrap_external>", line 781, in get_code
File "<frozen importlib._bootstrap_external>", line 741, in source_to_code
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "/home/timmy/scrapy_tut/myproject/spiders/amazon.py", line 62
process_request=request.meta['my_item']=my_item,callback='homedepot')
^
SyntaxError: invalid syntax
Я отредактировал вопрос, чтобы сделать его более тестируемым. Как я могу передать my_item
ссылкам, извлеченным из Rule(LinkExtractor...)
(я перенес правила из инициализациипаука, чтобы мне было легче делать (используйте мета), но я все еще не могу это сделать.
Любая помощь очень ценится
Я пытался использовать
rules = (
Rule(LinkExtractor(restrict_xpaths='//div[@class="r"]/a',allow='/dp',allow_domains='chewy.com'),
process_request=lambda request:request.meta.update({'my_item':my_item}),callback='chewy'),
Rule(LinkExtractor(restrict_xpaths='//div[@class="r"]/a',allow='/p/',allow_domains='homedepot.com')
,process_request=lambda request:request.meta.update({'my_item':my_item}),callback='homedepot')
)
Это не дает ошибки, но страница не запрашивается