У меня проблема с получением ссылки на следующую страницу с python.
Код
import scrapy
from scrapy.http import Request
from gharbheti.items import GharbhetiItem
from scrapy.contrib.loader import ItemLoader
from scrapy.contrib.loader.processor import TakeFirst, Identity, MapCompose, Join, Compose
from urllib.parse import urljoin
class ListSpider(scrapy.Spider):
name = 'list'
allowed_domains = ['gharbheti.com']
start_urls = ['https://www.gharbheti.com/sale','https://www.gharbheti.com/rent']
def parse(self, response):
properties=response.xpath('//li[@class="col-md-6 Search_building"]/descendant::a')
for property in properties:
link=property.xpath('./@href').extract_first()
urls=response.urljoin(link)
yield Request(urls,callback=self.parse_property, meta={'URL':urls, })
def parse_property(self, response):
l = ItemLoader(item=GharbhetiItem(), response=response)
URL=response.meta.get('URL')
l.add_value('URL', response.url)
l.add_xpath('Title','//div[@class="product-page-meta"]/h4/em/text()',MapCompose(str.strip,str.title))
l.add_xpath('Offering','//figcaption[contains(text(), "For Sale")]/text()|//figcaption[contains(text(),"For Rent")]/text()',MapCompose(lambda i:i.replace('For',''),str.strip))
l.add_xpath('Price','//div[@class="deal-pricebox"]/descendant::h3/text()',MapCompose(str.strip))
l.add_xpath('Type','//ul[@class="suitable-for"]/li/text()',MapCompose(str.strip))
bike_parking=response.xpath('//i[@class="fa fa-motorcycle"]/following-sibling::em/text()').extract_first()
car_parking=response.xpath('//i[@class="fa fa-car"]/following-sibling::em/text()').extract_first()
parking=("Bike Parking: {} Car Parking: {}".format(bike_parking,car_parking))
l.add_value('Parking',parking)
l.add_xpath('Description','//div[@class="comment more"]/text()',MapCompose(str.strip))
l.add_xpath('Bedroom','//i[@class="fa fa-bed"]/following-sibling::text()',MapCompose(lambda i:i.replace('Total Bed Room:',''),str.strip,int))
l.add_xpath('Livingroom','//i[@class="fa fa-inbox"]/following-sibling::text()',MapCompose(lambda i:i.replace('Total Living Room:',''),str.strip,int))
l.add_xpath('Kitchen','//i[@class="fa fa-cutlery"]/following-sibling::text()',MapCompose(lambda i:i.replace('Total kitchen Room:',''),str.strip,int))
l.add_xpath('Bathroom','//i[@class="fa fa-puzzle-piece"]/following-sibling::text()',MapCompose(lambda i:i.replace('Total Toilet/Bathroom:',''),str.strip,int))
l.add_xpath('Address','//b[contains(text(), "Map")]/text()',MapCompose(lambda i:i.replace('Map Loaction :-',''),str.strip))
l.add_xpath('Features','//div[@class="list main-list"]/ul/li/text()',MapCompose(str.strip))
images=response.xpath('//div[@class="carousel-inner dtl-carousel-inner text-center"]/descendant::img').extract()
images=[s.replace('<img src="', '') for s in images]
images=[i.split('?')[0] for i in images]
Image=["http://www.gharbheti.com" + im for im in images]
l.add_value('Images',Image)
return l.load_item()
Не удается получить следующую страницу из сети. Для другого сайта этоэто то, что я сделал (простая нумерация страниц без JavaScript
next_page=response.urljoin(response.xpath('//a[contains(text(), "Next")]/@href').extract_first()
yield Request(next_page, callback=self.parse)