Получение данных из airbnb с использованием python, селена и красивого супа - PullRequest
0 голосов
/ 26 апреля 2020

Немного сложно привести полный пример кода, так как я пытаюсь получить данные о ценах из моего личного календаря хостинга. Поэтому я опущу личные данные, которые могут затруднить повторение проблем для зрителей, но, надеюсь, кто-то знает, как преодолеть эту проблему.

Итак, мой код, который используется для входа в Airbnb:

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
import requests
from bs4 import BeautifulSoup
from urllib.request import urlopen

UserName = [PersonalData]
Password = [PersonalData]

Driver = webdriver.Firefox() #Define webdriver to use
Driver.get('https://www.airbnb.co.uk/login')
wait = WebDriverWait(Driver, 20)
wait.until(EC.presence_of_element_located((By.XPATH, "//div[contains(text(),'Continue with email')]")))
time.sleep(6)
Element = Driver.find_element_by_xpath("//div[contains(text(),'Continue with email')]")
Element.click()
wait.until(EC.presence_of_element_located((By.XPATH, "//input[@id='email']")))
Element = Driver.find_element_by_xpath("//input[@id='email']")
Element.send_keys(UserName)
Element = Driver.find_element_by_xpath("//input[@id='password']")
Element.send_keys(Password)
Element = Driver.find_element_by_xpath("//div[@class='_wfo3ii']/button[@type='submit']")
time.sleep(6)
Element.click()

wait = WebDriverWait(Driver, 240)
wait.until(EC.presence_of_element_located((By.XPATH, "//div[@class='_167wsvl']/button[@id='field-guide-toggle']")))
time.sleep(2)
Element = Driver.find_element_by_xpath("//div[@class='_167wsvl']/button[@id='field-guide-toggle']") 
Element.click()
wait = WebDriverWait(Driver, 20)
wait.until(EC.presence_of_element_located((By.XPATH, "//a[@class='_1b50maqh']/div[@class='_ojs7nk']")))
time.sleep(2)
Element = Driver.find_element_by_xpath("//a[@class='_1b50maqh']/div[@class='_ojs7nk']")
Element.click()
wait.until(EC.presence_of_element_located((By.XPATH, "//div[@class='_121z06r2']/a[@href='/hosting/calendar']")))
time.sleep(2)
Element = Driver.find_element_by_xpath("//div[@class='_121z06r2']/a[@href='/hosting/calendar']")
Element.click()

PageSourceURL = Driver.page_source

Soup = BeautifulSoup(PageSourceURL, features='html.parser')
PageHTML = Soup
print(PageHTML)

На самом деле это только последние 4 строки, с которыми у меня проблема Распечатка страницы HTML, которую я получаю:

<html class="js-focus-visible" data-is-hyperloop="true" dir="ltr" lang="en-GB" xmlns:fb="http://ogp.me/ns/fb#"><head><script>window.sherlock_firstbyte = window.performance && window.performance.timing ? window.performance.timing.responseStart : Number(new Date());</script><script>!function(){"use strict";var 
e=730,n="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";var t=/(?:^| )bev=(.*?)(?:;|$)/,o=!1;function a(){return window.bev=window.bev||function(){if(o||"undefined"==typeof document)return null;o=!0;var e=(document.cookie||"").match(t);return e&&2===e.length?decodeURIComponent(e[1]):null}(),window.bev}!function(){try{if(!a()){var t=function(){for(var e=[],t=15;t>=0;t--)e.push(n[Math.floor(Math.random()*n.length)]);var o=Math.floor(Date.now()/1e3);return"".concat(o,"_").concat(e.join(""))}();o=t,r=document.location.hostname,c=".".concat(r.slice(r.indexOf("airbnb."))),(i=new Date).setDate(i.getDate()+e),document.cookie=["bev=".concat(encodeURIComponent(o)),"expires=".concat(i.toUTCString()),"path=/","domain=".concat(c),"secure"].join("; "),window.bev=t,function(e){var n=new XMLHttpRequest;n.open("POST","/tracking/events",!0),n.setRequestHeader("Content-Type","application/json; charset=utf-8");var t={event_name:"bev_created",event_data:{bev:e,page_uri:document.location.pathname,page_referrer:document.referrer}};n.send(JSON.stringify(t))}(t)}}catch(e){window.console&&console.error("Could not set bev cookie:",e)}var o,r,c,i}()}();
</script><script>(function() {
  var pgRequest = new XMLHttpRequest();
  var diffStamp = Date.now().toString() + Math.random().toString().substring(2);
  pgRequest.open('GET', '/pg_pixel?r=' + encodeURIComponent(document.referrer || '') + '&diff=' + diffStamp, true);
  pgRequest.send();
})()</script><script>
!function(n){var t,r;n(function e(){document.body&&null!==document.createNodeIterator(document.body,NodeFilter.SHOW_TEXT,function(e){return!!e&&/[^s]/.test(e.nodeValue)&&"SCRIPT"!==e.parentNode.tagName&&"STYLE"!==e.parentNode.tagName&&0<e.parentNode.offsetHeight},!1).nextNode()||null!==document.querySelector("input[placeholder]")?n(function(){var e=performance.now();t?t(e):r=e,performance.measure("TTFCP")}):n(e)}),self.perfMetrics=self.perfMetrics||{},self.perfMetrics.onFirstContentfulPaint=function(e){r?e(r):t=e}}(requestAnimationFrame),function(){var t,r,i,o,a="FMP-target";function c(){var e=document.getElementById(a);if(o=0,e)if(i===e)t=requestAnimationFrame(c);else if("IMG"!==e.tagName||e.complete){var n=performance.now();i=e,r?r(n):o=n,performance.measure("TTFMP")}else t=requestAnimationFrame(c);else t=requestAnimationFrame(c)}t=requestAnimationFrame(c),self.perfMetrics=self.perfMetrics||{},self.perfMetrics.onFirstMeaningfulPaint=function(e){o?e(o):r=e},self.perfMetrics.startSearchingForFirstMeaningfulPaint=function(){i=document.getElementById(a),c()},self.perfMetrics.stopSearchingForFirstMeaningfulPaint=function(){cancelAnimationFrame(t)}}(requestAnimationFrame),function(c,f){var t,r,i,n,o=[],s={passive:!0,capture:!0},e=new Date,u="pointerup",l="pointercancel";function m(e,n){t||(t=n,r=e,i=new Date,a())}function a(){0<=r&&r<i-e&&(o.forEach(function(e){e(r,t)}),o=[])}function p(e){if(e.cancelable){var n=(1e12<e.timeStamp?new Date:performance.now())-e.timeStamp;"pointerdown"==e.type?(o=n,a=e,c(u,t,s),c(l,r,s)):m(n,e)}function t(){m(o,a),i()}function r(){i()}function i(){f(u,t,s),f(l,r,s)}var o,a}n=c,["click","mousedown","keydown","touchstart","pointerdown"].forEach(function(e){n(e,p,s)}),self.perfMetrics=self.perfMetrics||{},self.perfMetrics.onFirstInputDelay=function(e){o.push(e),a()},self.perfMetrics.clearFirstInputDelay=function(){t&&(i=r=t=null)}}(addEventListener,removeEventListener);
</script><meta charset="utf-8"/><meta content="en-GB" name="locale"/><meta content="notranslate" name="google"/><meta content="138566025676" property="fb:app_id"/><meta content="Airbnb" property="og:site_name"/><meta content="en_GB" property="og:locale"/><meta content="https://www.airbnb.co.uk/multicalendar/27667289" property="og:url"/><meta content="" property="og:title"/><meta content="" property="og:description"/><meta content="website" property="og:type"/><link crossorigin="anonymous" href="https://a0.muscache.com/airbnb/static/packages/common-59f479fe1e596df7f1f7830bd5ea15bb.css" media="all" rel="stylesheet" type="text/css"/><link href="https://a0.muscache.com/airbnb/static/packages/dls/dls-lite_cereal-d9f6fdb2a0dd4a18c37f8ee01de8ec3d.css" media="all" rel="stylesheet" type="text/css"/><link href="https://a0.muscache.com/airbnb/static/packages/dls/dls-lite_o2-leftover-3644a5fa97a2e311cd1cd1dab8abaf5f.css" media="all" rel="stylesheet" type="text/css"/><link href="https://a0.muscache.com/airbnb/static/packages/dls/common_o2.1_cereal-4e0adc51966c85fefe84f5454591ffe2.css" media="all" rel="stylesheet" type="text/css"/><meta content="https://a0.muscache.com/airbnb/static/logos/trips-og-1280x630-9de9c338cc3fd9b5663fb80be0cbe8c2.jpg" property="og:image"/><meta content="width=device-width, initial-scale=1" name="viewport"/><link href="https://a0.muscache.com/airbnb/static/logos/trips-og-200x200-a3be4fbbb3b6c5e758804438dea35adc.jpg" rel="image_src"/><meta content="authenticity_token" id="csrf-param-meta-tag" name="csrf-param"/><meta content="V4$.airbnb.co.uk$J4KawdPCgyI$rju-wB8sPA3cJtPA9711sPr2V3WmRPbHhdpLGH7j2UQ=" id="csrf-token-meta-tag" name="csrf-token"/><title>Edit calendar for 'Luxury Grade ll Listed Apartment 3' - Airbnb</title><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta content="" id="english-canonical-url"/><meta content="on" name="twitter:widgets:csp"/><link href="https://www.airbnb.co.uk/multicalendar/27667289" rel="canonical"/><link href="https://www.airbnb.com/multicalendar/27667289" hreflang="en" rel="alternate"/><link href="https://www.airbnb.de/multicalendar/27667289" hreflang="de" rel="alternate"/><link href="https://www.airbnb.it/multicalendar/27667289" hreflang="it" rel="alternate"/><link href="https://www.airbnb.es/multicalendar/27667289" hreflang="es-ES" rel="alternate"/><link href="https://www.airbnb.fr/multicalendar/27667289" hreflang="fr" rel="alternate"/><link href="https://www.airbnb.com.br/multicalendar/27667289" hreflang="pt" rel="alternate"/><link href="https://www.airbnb.dk/multicalendar/27667289" hreflang="da" rel="alternate"/><link href="https://www.airbnb.co.uk/multicalendar/27667289" hreflang="en-GB" rel="alternate"/><link href="https://www.airbnb.ru/multicalendar/27667289" hreflang="ru" 
rel="alternate"/><link href="https://www.airbnb.pl/multicalendar/27667289" hreflang="pl" rel="alternate"/><link href="https://www.airbnb.co.kr/multicalendar/27667289" hreflang="ko" rel="alternate"/><link href="https://www.airbnb.cz/multicalendar/27667289" hreflang="cs" rel="alternate"/><link href="https://www.airbnb.hu/multicalendar/27667289" hreflang="hu" rel="alternate"/><link href="https://www.airbnb.at/multicalendar/27667289" hreflang="de-AT" rel="alternate"/><link href="https://www.airbnb.pt/multicalendar/27667289" hreflang="pt-PT" rel="alternate"/><link href="https://www.airbnb.gr/multicalendar/27667289" hreflang="el" rel="alternate"/><link href="https://www.airbnb.com.tr/multicalendar/27667289" hreflang="tr" rel="alternate"/><link href="https://www.airbnb.nl/multicalendar/27667289" hreflang="nl" rel="alternate"/><link href="https://www.airbnb.se/multicalendar/27667289" hreflang="sv" rel="alternate"/><link href="https://www.airbnb.com.tw/multicalendar/27667289" hreflang="zh-TW" rel="alternate"/><link href="https://www.airbnb.com.hk/multicalendar/27667289" hreflang="zh-HK" rel="alternate"/><link href="https://www.airbnb.com.sg/multicalendar/27667289" hreflang="en-SG" rel="alternate"/><link href="https://www.airbnb.co.id/multicalendar/27667289" hreflang="id" rel="alternate"/><link href="https://www.airbnb.com.my/multicalendar/27667289" hreflang="ms" rel="alternate"/><link href="https://www.airbnb.com.au/multicalendar/27667289" hreflang="en-AU" rel="alternate"/><link href="https://www.airbnb.jp/multicalendar/27667289" hreflang="ja" rel="alternate"/><link href="https://www.airbnb.is/multicalendar/27667289" hreflang="is" rel="alternate"/><link href="https://www.airbnb.no/multicalendar/27667289" hreflang="no" rel="alternate"/><link href="https://www.airbnb.ch/multicalendar/27667289" hreflang="de-CH" rel="alternate"/><link href="https://fr.airbnb.ch/multicalendar/27667289" hreflang="fr-CH" rel="alternate"/><link href="https://it.airbnb.ch/multicalendar/27667289" hreflang="it-CH" rel="alternate"/><link href="https://www.airbnb.co.nz/multicalendar/27667289" hreflang="en-NZ" rel="alternate"/><link href="https://www.airbnb.ca/multicalendar/27667289" hreflang="en-CA" rel="alternate"/><link href="https://fr.airbnb.ca/multicalendar/27667289" hreflang="fr-CA" rel="alternate"/><link href="https://www.airbnb.be/multicalendar/27667289" hreflang="nl-BE" rel="alternate"/><link href="https://fr.airbnb.be/multicalendar/27667289" hreflang="fr-BE" rel="alternate"/><link href="https://www.airbnb.fi/multicalendar/27667289" hreflang="fi" rel="alternate"/><link href="https://www.airbnb.ie/multicalendar/27667289" hreflang="en-IE" rel="alternate"/><link href="https://ga.airbnb.ie/multicalendar/27667289" hreflang="ga-IE" rel="alternate"/><link href="https://www.airbnb.cat/multicalendar/27667289" hreflang="ca" rel="alternate"/><link href="https://www.airbnb.co.in/multicalendar/27667289" hreflang="en-IN" rel="alternate"/><link href="https://hi.airbnb.co.in/multicalendar/27667289" hreflang="hi-IN" rel="alternate"/><link href="https://www.airbnb.mx/multicalendar/27667289" 
hreflang="es-MX" rel="alternate"/><link href="https://www.airbnb.cl/multicalendar/27667289" hreflang="es-CL" rel="alternate"/><link href="https://www.airbnb.co.cr/multicalendar/27667289" hreflang="es-CR" rel="alternate"/><link href="https://www.airbnb.co.ve/multicalendar/27667289" hreflang="es-VE" rel="alternate"/><link href="https://www.airbnb.com.ar/multicalendar/27667289" hreflang="es-AR" rel="alternate"/><link href="https://www.airbnb.com.bo/multicalendar/27667289" hreflang="es-BO" rel="alternate"/><link href="https://www.airbnb.com.bz/multicalendar/27667289" hreflang="es-BZ" rel="alternate"/><link href="https://www.airbnb.com.co/multicalendar/27667289" hreflang="es-CO" rel="alternate"/><link href="https://www.airbnb.com.ec/multicalendar/27667289" hreflang="es-EC" rel="alternate"/><link href="https://www.airbnb.com.gt/multicalendar/27667289" hreflang="es-GT" rel="alternate"/><link href="https://www.airbnb.com.hn/multicalendar/27667289" hreflang="es-HN" rel="alternate"/><link href="https://www.airbnb.com.ni/multicalendar/27667289" hreflang="es-NI" rel="alternate"/><link href="https://www.airbnb.com.pa/multicalendar/27667289" hreflang="es-PA" rel="alternate"/><link href="https://www.airbnb.com.pe/multicalendar/27667289" hreflang="es-PE" rel="alternate"/><link href="https://www.airbnb.com.py/multicalendar/27667289" hreflang="es-PY" rel="alternate"/><link href="https://www.airbnb.com.sv/multicalendar/27667289" hreflang="es-SV" rel="alternate"/><link href="https://www.airbnb.com.mt/multicalendar/27667289" hreflang="en-MT" rel="alternate"/><link href="https://mt.airbnb.com.mt/multicalenda

Сейчас то, что я хотел бы найти на фактической странице, - это фактическая дата: enter image description here

Очевидно, что это не показано. Итак, я понимаю, что неправильно разбираю данные в BeautifulSoup - но не уверен, как получить полностраничные элементы в BS.

Надеюсь, у меня есть смысл - любая помощь очень ценится.

Спасибо, Роб

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...