Вы можете просто использовать переменную класса для отслеживания положения, например:
import scrapy
class ToscrapeSpider(scrapy.Spider):
name = 'toscrape'
allowed_domains = ['books.toscrape.com']
start_urls = ['http://books.toscrape.com/']
position = 0
def parse(self, response):
lists = response.css('li.col-xs-6')
for lis in lists:
title = lis.xpath('.//h3//@title').extract_first()
price = lis.xpath('.//p[@class="price_color"]//text()').extract_first()
self.position += 1
yield {
'Title': title,
'Price': price,
'Position': self.position,
}
next = response.xpath('//li[@class="next"]/a/@href').extract_first()
next = response.urljoin(next)
if next:
yield scrapy.Request(next)
Тогда:
scrapy runspider myspider.py -o out.json
Файл out.json
содержит:
[
{"Title": "A Light in the Attic", "Price": "\u00a351.77", "Position": 1},
{"Title": "Tipping the Velvet", "Price": "\u00a353.74", "Position": 2},
{"Title": "Soumission", "Price": "\u00a350.10", "Position": 3},
{"Title": "Sharp Objects", "Price": "\u00a347.82", "Position": 4},
{"Title": "Sapiens: A Brief History of Humankind", "Price": "\u00a354.23", "Position": 5},
{"Title": "The Requiem Red", "Price": "\u00a322.65", "Position": 6},
{"Title": "The Dirty Little Secrets of Getting Your Dream Job", "Price": "\u00a333.34", "Position": 7},
{"Title": "The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull", "Price": "\u00a317.93", "Position": 8},
{"Title": "The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics", "Price": "\u00a322.60", "Position": 9},
{"Title": "The Black Maria", "Price": "\u00a352.15", "Position": 10},
{"Title": "Starving Hearts (Triangular Trade Trilogy, #1)", "Price": "\u00a313.99", "Position": 11},
{"Title": "Shakespeare's Sonnets", "Price": "\u00a320.66", "Position": 12},
{"Title": "Set Me Free", "Price": "\u00a317.46", "Position": 13},
{"Title": "Scott Pilgrim's Precious Little Life (Scott Pilgrim #1)", "Price": "\u00a352.29", "Position": 14},
{"Title": "Rip it Up and Start Again", "Price": "\u00a335.02", "Position": 15},
{"Title": "Our Band Could Be Your Life: Scenes from the American Indie Underground, 1981-1991", "Price": "\u00a357.25", "Position": 16},
{"Title": "Olio", "Price": "\u00a323.88", "Position": 17},
{"Title": "Mesaerion: The Best Science Fiction Stories 1800-1849", "Price": "\u00a337.59", "Position": 18},
{"Title": "Libertarianism for Beginners", "Price": "\u00a351.33", "Position": 19},
{"Title": "It's Only the Himalayas", "Price": "\u00a345.17", "Position": 20}
]