Таким образом, моя цель состоит в том, чтобы вычистить список из файла CSV (я эту часть выяснил), но когда я пытаюсь запустить мою программу по тестовому URL-адресу, этот URL-адрес очищается несколько раз и возвращает результаты, которые я хочу один раз. Я поясню свой код и несколько снимков экрана.
# -*- coding: utf-8 -*-
import scrapy
from ..items import LowesspiderItem
from scrapy.http import Request
import requests
class LowesSpider(scrapy.Spider):
name = 'lowes'
def start_requests(self):
start_urls = ['https://www.lowes.com/search?searchTerm=8654RM-42',
'https://www.lowes.com/search?searchTerm=RA36']
for url in start_urls:
yield Request(url,
headers={'Cookie': 'sn=2333;'}, #Preset a location
meta={'dont_merge_cookies': True, #Allows location cookie to get through
'url':url}) #Using to get the product SKU
def parse(self, response):
items = response.css('.grid-container')
for product in items:
item = LowesspiderItem()
#get product price
productPrice = product.css('.art-pd-price::text').get()
#get SKU
productSKU = response.meta['url']
productSKU = productSKU.split('=')[-1]
item["productSKU"] = productSKU
item["productPrice"] = productPrice
yield item
2020-04-21 14:09:48 [scrapy.core.engine] INFO: Spider opened
2020-04-21 14:09:48 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min)
2020-04-21 14:09:48 [scrapy.extensions.telnet] INFO: Telnet console listening on 127.0.0.1:6023
2020-04-21 14:09:48 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.lowes.com/robots.txt> (referer: None)
2020-04-21 14:09:48 [scrapy.downloadermiddlewares.redirect] DEBUG: Redirecting (301) to <GET https://www.lowes.com/pd/ZLINE-KITCHEN-BATH-Ducted-Red-Matte-Wall-Mounted-Range-Hood-Common-42-Inch-Actual-42-in/1001440644> from <GET https://www.lowes.com/search?searchTerm=8654RM-42>
2020-04-21 14:09:49 [scrapy.downloadermiddlewares.redirect] DEBUG: Redirecting (301) to <GET https://www.lowes.com/pd/ZLINE-KITCHEN-BATH-Professional-Deep-Recessed-6-Burners-Convection-Stainless-Steel-Common-36-in-Actual-36-in/1000525095> from <GET https://www.lowes.com/search?searchTerm=RA36>
2020-04-21 14:09:49 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.lowes.com/pd/ZLINE-KITCHEN-BATH-Ducted-Red-Matte-Wall-Mounted-Range-Hood-Common-42-Inch-Actual-42-in/1001440644> (referer: None)
2020-04-21 14:09:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.lowes.com/pd/ZLINE-KITCHEN-BATH-Ducted-Red-Matte-Wall-Mounted-Range-Hood-Common-42-Inch-Actual-42-in/1001440644>
{'productPrice': None, 'productSKU': '8654RM-42'}
2020-04-21 14:09:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.lowes.com/pd/ZLINE-KITCHEN-BATH-Ducted-Red-Matte-Wall-Mounted-Range-Hood-Common-42-Inch-Actual-42-in/1001440644>
{'productPrice': None, 'productSKU': '8654RM-42'}
2020-04-21 14:09:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.lowes.com/pd/ZLINE-KITCHEN-BATH-Ducted-Red-Matte-Wall-Mounted-Range-Hood-Common-42-Inch-Actual-42-in/1001440644>
{'productPrice': None, 'productSKU': '8654RM-42'}
2020-04-21 14:09:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.lowes.com/pd/ZLINE-KITCHEN-BATH-Ducted-Red-Matte-Wall-Mounted-Range-Hood-Common-42-Inch-Actual-42-in/1001440644>
{'productPrice': '1,449.95', 'productSKU': '8654RM-42'}
2020-04-21 14:09:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.lowes.com/pd/ZLINE-KITCHEN-BATH-Ducted-Red-Matte-Wall-Mounted-Range-Hood-Common-42-Inch-Actual-42-in/1001440644>
{'productPrice': None, 'productSKU': '8654RM-42'}
2020-04-21 14:09:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.lowes.com/pd/ZLINE-KITCHEN-BATH-Ducted-Red-Matte-Wall-Mounted-Range-Hood-Common-42-Inch-Actual-42-in/1001440644>
{'productPrice': None, 'productSKU': '8654RM-42'}
2020-04-21 14:09:49 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.lowes.com/pd/ZLINE-KITCHEN-BATH-Professional-Deep-Recessed-6-Burners-Convection-Stainless-Steel-Common-36-in-Actual-36-in/1000525095> (referer: None)
2020-04-21 14:09:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.lowes.com/pd/ZLINE-KITCHEN-BATH-Professional-Deep-Recessed-6-Burners-Convection-Stainless-Steel-Common-36-in-Actual-36-in/1000525095>
{'productPrice': None, 'productSKU': 'RA36'}
2020-04-21 14:09:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.lowes.com/pd/ZLINE-KITCHEN-BATH-Professional-Deep-Recessed-6-Burners-Convection-Stainless-Steel-Common-36-in-Actual-36-in/1000525095>
{'productPrice': None, 'productSKU': 'RA36'}
2020-04-21 14:09:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.lowes.com/pd/ZLINE-KITCHEN-BATH-Professional-Deep-Recessed-6-Burners-Convection-Stainless-Steel-Common-36-in-Actual-36-in/1000525095>
{'productPrice': None, 'productSKU': 'RA36'}
2020-04-21 14:09:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.lowes.com/pd/ZLINE-KITCHEN-BATH-Professional-Deep-Recessed-6-Burners-Convection-Stainless-Steel-Common-36-in-Actual-36-in/1000525095>
{'productPrice': '2,549.99', 'productSKU': 'RA36'}
2020-04-21 14:09:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.lowes.com/pd/ZLINE-KITCHEN-BATH-Professional-Deep-Recessed-6-Burners-Convection-Stainless-Steel-Common-36-in-Actual-36-in/1000525095>
{'productPrice': None, 'productSKU': 'RA36'}
2020-04-21 14:09:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.lowes.com/pd/ZLINE-KITCHEN-BATH-Professional-Deep-Recessed-6-Burners-Convection-Stainless-Steel-Common-36-in-Actual-36-in/1000525095>
{'productPrice': None, 'productSKU': 'RA36'}
Так что результаты, которые я хотел бы получить, были бы примерно такими: {'productPrice': '1449.95', ' productSKU ':' 8654RM-42 '}
Однако в моей программе я получаю многократные результаты, которые, как я полагаю, вызваны моим элементом верхнего уровня, где он выполняет итерацию для l oop
items = response.css('.grid-container')
for product in items:
item = LowesspiderItem()
Также вот скриншот вывода Excel: