Я довольно плохо знаком с Python и следил за видеоуроком на YouTube о том, как собирать информацию на Amazon с помощью Scrapy.
Вот мой код:
# -*- coding: utf-8 -*-
import scrapy
from ..items import AmazonItem
class AmazonproductSpider(scrapy.Spider):
name = 'amazonproduct'
page_number = 2
start_urls = ['https://www.amazon.com/air-purifier/s?k=air+purifier&rh=p_n_condition-type%3A6461716011&qid=1570164390&ref=sr_pg_1']
def parse(self, response):
items = AmazonItem()
product_name = response.css('#search .a-size-medium').css('::text').extract()
product_price = response.css('.sg-col-6-of-24 .a-price-whole').css('::text').extract()
product_reviews = response.css('.sg-col-12-of-28 .a-size-base').css('::text').extract()
items['product_name'] = product_name
items['product_price'] = product_price
items['product_reviews'] = product_reviews
yield items
next_page = ['https://www.amazon.com/air-purifier/s?k=air+purifier&rh=p_n_condition-type%3A6461716011&page=' + str(AmazonproductSpider.page_number) + '&qid=1570164578&ref=sr_pg_2']
if AmazonproductSpider.page_number <= 20:
AmazonproductSpider.page_number += 1
yield response.follow(next_page, callback = self.parse)
pass
Я прошел учебник, запустил свой код и получил информацию на первой странице, которая удалялась успешно, но когда Scrapy чистил другие страницы, ивозникла ошибка:
2019-10-04 13:16:26 [scrapy.core.scraper] ERROR: Spider error processing <GET https://www.amazon.com/air-purifier/s?k=air+purifier&rh=p_n_condition-type%3A6461716011> (referer: None)
Traceback (most recent call last):
File "/anaconda3/lib/python3.6/site-packages/scrapy/utils/defer.py", line 102, in iter_errback
yield next(it)
File "/anaconda3/lib/python3.6/site-packages/scrapy/core/spidermw.py", line 84, in evaluate_iterable
for r in iterable:
File "/anaconda3/lib/python3.6/site-packages/scrapy/spidermiddlewares/offsite.py", line 29, in process_spider_output
for x in result:
File "/anaconda3/lib/python3.6/site-packages/scrapy/core/spidermw.py", line 84, in evaluate_iterable
for r in iterable:
File "/anaconda3/lib/python3.6/site-packages/scrapy/spidermiddlewares/referer.py", line 339, in <genexpr>
return (_set_referer(r) for r in result or ())
File "/anaconda3/lib/python3.6/site-packages/scrapy/core/spidermw.py", line 84, in evaluate_iterable
for r in iterable:
File "/anaconda3/lib/python3.6/site-packages/scrapy/spidermiddlewares/urllength.py", line 37, in <genexpr>
return (r for r in result or () if _filter(r))
File "/anaconda3/lib/python3.6/site-packages/scrapy/core/spidermw.py", line 84, in evaluate_iterable
for r in iterable:
File "/anaconda3/lib/python3.6/site-packages/scrapy/spidermiddlewares/depth.py", line 58, in <genexpr>
return (r for r in result or () if _filter(r))
File "/Users/andrealien/Desktop/Work/Amazon_airpurifier/amazon/amazon/spiders/amazonproduct.py", line 27, in parse
yield response.follow(next_page, callback = self.parse)
File "/anaconda3/lib/python3.6/site-packages/scrapy/http/response/text.py", line 158, in follow
cb_kwargs=cb_kwargs,
File "/anaconda3/lib/python3.6/site-packages/scrapy/http/response/__init__.py", line 125, in follow
url = self.urljoin(url)
File "/anaconda3/lib/python3.6/site-packages/scrapy/http/response/text.py", line 82, in urljoin
return urljoin(get_base_url(self), url)
File "/anaconda3/lib/python3.6/urllib/parse.py", line 488, in urljoin
base, url, _coerce_result = _coerce_args(base, url)
File "/anaconda3/lib/python3.6/urllib/parse.py", line 120, in _coerce_args
raise TypeError("Cannot mix str and non-str arguments")
TypeError: Cannot mix str and non-str arguments
Кто-нибудь знает, как решить эту проблему? Заранее всем спасибо за чтение моих вопросов!