Я практикую Scrapy и Selenium на Booking.com.
По какой-то причине использование следующего кода Selenium работает нормально.
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from shutil import which
# chrome_options = Options()
# chrome_options.add_argument("--headless")
chrome_path = which("chromedriver")
driver = webdriver.Chrome(executable_path=chrome_path)
driver.get("https://www.booking.com/searchresults.en-gb.html?label=gen173nr-1DCAEoggI46AdIM1gEaK4BiAEBmAEJuAEXyAEM2AED6AEBiAIBqAIDuAKPtaX4BcACAdICJDE5OWNlMDNkLWJjZTgtNGQyNC04ZjYwLTZiOTZmOTllYTZlN9gCBOACAQ&lang=en-gb&sb=1&src=searchresults&src_elem=sb&error_url=https%3A%2F%2Fwww.booking.com%2Fsearchresults.en-gb.html%3Flabel%3Dgen173nr-1DCAEoggI46AdIM1gEaK4BiAEBmAEJuAEXyAEM2AED6AEBiAIBqAIDuAKPtaX4BcACAdICJDE5OWNlMDNkLWJjZTgtNGQyNC04ZjYwLTZiOTZmOTllYTZlN9gCBOACAQ%3Btmpl%3Dsearchresults%3Bac_click_type%3Db%3Bac_position%3D0%3Bcheckin_month%3D7%3Bcheckin_monthday%3D13%3Bcheckin_year%3D2020%3Bcheckout_month%3D7%3Bcheckout_monthday%3D14%3Bcheckout_year%3D2020%3Bcity%3D900039038%3Bclass_interval%3D1%3Bdest_id%3D1855490%3Bdest_type%3Dhotel%3Bdtdisc%3D0%3Bfrom_sf%3D1%3Bgroup_adults%3D1%3Bgroup_children%3D0%3Bhighlighted_hotels%3D1855490%3Binac%3D0%3Bindex_postcard%3D0%3Blabel_click%3Dundef%3Bno_rooms%3D1%3Boffset%3D0%3Bpostcard%3D0%3Braw_dest_type%3Dhotel%3Broom1%3DA%3Bsb_price_type%3Dtotal%3Bsearch_selected%3D1%3Bshw_aparth%3D1%3Bslp_r_match%3D0%3Bsrc%3Dsearchresults%3Bsrc_elem%3Dsb%3Bsrpvid%3Da53e49abc8b20230%3Bss%3DKiwi%2520As%2520Guest%2520House%252C%2520Rotorua%252C%2520Bay%2520of%2520Plenty%252C%2520New%2520Zealand%3Bss_all%3D0%3Bss_raw%3Dkiwias%3Bssb%3Dempty%3Bsshis%3D0%3Bssne%3DRotorua%3Bssne_untouched%3DRotorua%3Btop_ufis%3D1%26%3B&ss=Rainbow+Holiday+House%2C+Rotorua%2C+Bay+of+Plenty%2C+New+Zealand&is_ski_area=&ssne=Rotorua&ssne_untouched=Rotorua&city=900039038&checkin_year=2020&checkin_month=7&checkin_monthday=13&checkout_year=2020&checkout_month=7&checkout_monthday=14&group_adults=1&group_children=0&no_rooms=1&from_sf=1&ss_raw=rainbow+holiday+house&ac_position=0&ac_langcode=en&ac_click_type=b&dest_id=5288382&dest_type=hotel&place_id_lat=-38.105882&place_id_lon=176.239504&search_pageview_id=a53e49abc8b20230&search_selected=true&search_pageview_id=a53e49abc8b20230&ac_suggestion_list_length=3&ac_suggestion_theme_list_length=0")
cards = driver.find_elements_by_xpath('//div[@class="sr_item sr_item_new sr_item_default sr_property_block sr_flex_layout "]')
for c in cards:
print('name', c.find_element_by_xpath('./div/div/div/div/h3/a/span[1]').text)
Приведенный выше код возвращает имена properties.
Но когда я пытаюсь использовать Selenium вместе со Scrapy, это терпит неудачу.
import scrapy
from scrapy.selector import Selector
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from scrapy_selenium import SeleniumRequest
class BookSpider(scrapy.Spider):
name = 'book'
allowed_domains = ['booking.com']
def start_requests(self):
yield SeleniumRequest(url='https://www.booking.com/searchresults.en-gb.html?label=gen173nr-1DCAEoggI46AdIM1gEaK4BiAEBmAEJuAEXyAEM2AED6AEBiAIBqAIDuAKPtaX4BcACAdICJDE5OWNlMDNkLWJjZTgtNGQyNC04ZjYwLTZiOTZmOTllYTZlN9gCBOACAQ&lang=en-gb&sb=1&src=searchresults&src_elem=sb&error_url=https%3A%2F%2Fwww.booking.com%2Fsearchresults.en-gb.html%3Flabel%3Dgen173nr-1DCAEoggI46AdIM1gEaK4BiAEBmAEJuAEXyAEM2AED6AEBiAIBqAIDuAKPtaX4BcACAdICJDE5OWNlMDNkLWJjZTgtNGQyNC04ZjYwLTZiOTZmOTllYTZlN9gCBOACAQ%3Btmpl%3Dsearchresults%3Bac_click_type%3Db%3Bac_position%3D0%3Bcheckin_month%3D7%3Bcheckin_monthday%3D13%3Bcheckin_year%3D2020%3Bcheckout_month%3D7%3Bcheckout_monthday%3D14%3Bcheckout_year%3D2020%3Bcity%3D900039038%3Bclass_interval%3D1%3Bdest_id%3D1855490%3Bdest_type%3Dhotel%3Bdtdisc%3D0%3Bfrom_sf%3D1%3Bgroup_adults%3D1%3Bgroup_children%3D0%3Bhighlighted_hotels%3D1855490%3Binac%3D0%3Bindex_postcard%3D0%3Blabel_click%3Dundef%3Bno_rooms%3D1%3Boffset%3D0%3Bpostcard%3D0%3Braw_dest_type%3Dhotel%3Broom1%3DA%3Bsb_price_type%3Dtotal%3Bsearch_selected%3D1%3Bshw_aparth%3D1%3Bslp_r_match%3D0%3Bsrc%3Dsearchresults%3Bsrc_elem%3Dsb%3Bsrpvid%3Da53e49abc8b20230%3Bss%3DKiwi%2520As%2520Guest%2520House%252C%2520Rotorua%252C%2520Bay%2520of%2520Plenty%252C%2520New%2520Zealand%3Bss_all%3D0%3Bss_raw%3Dkiwias%3Bssb%3Dempty%3Bsshis%3D0%3Bssne%3DRotorua%3Bssne_untouched%3DRotorua%3Btop_ufis%3D1%26%3B&ss=Rainbow+Holiday+House%2C+Rotorua%2C+Bay+of+Plenty%2C+New+Zealand&is_ski_area=&ssne=Rotorua&ssne_untouched=Rotorua&city=900039038&checkin_year=2020&checkin_month=7&checkin_monthday=13&checkout_year=2020&checkout_month=7&checkout_monthday=14&group_adults=1&group_children=0&no_rooms=1&from_sf=1&ss_raw=rainbow+holiday+house&ac_position=0&ac_langcode=en&ac_click_type=b&dest_id=5288382&dest_type=hotel&place_id_lat=-38.105882&place_id_lon=176.239504&search_pageview_id=a53e49abc8b20230&search_selected=true&search_pageview_id=a53e49abc8b20230&ac_suggestion_list_length=3&ac_suggestion_theme_list_length=0',
wait_time=5,
callback=self.parse)
def parse(self, response):
cards = response.xpath('//div[@class="sr_item sr_item_new sr_item_default sr_property_block sr_flex_layout "]')
for c in cards:
yield{'name': c.xpath('./div/div/div/div/h3/a/span[1]/text()').get()}
Я действительно не мог этого понять, потому что единственное отличие, которое я делаю, - это использование SeleniumRequest , если у кого-то есть идеи, было бы здорово ...
Спасибо