Я пытаюсь получить некоторые данные из TripAdvisor, используя селен.Мне удалось получить отзывы, рейтинг и некоторые даты, но когда я пытался получить комментарии, местоположение рецензента и помощь в голосовании, я всегда получал первый результат со страницы 5 раз.Это не происходит с другими данными.
Вы можете видеть мой код ниже
import csv
import time
from selenium import webdriver
container = driver.find_elements_by_xpath("//div[@class='hotels-review-list-parts-SingleReview__reviewContainer--d54T4']")
num_page_items = len(container)
for j in range(num_page_items):
# to save the data
string = container[j].find_element_by_xpath(".//span[contains(@class, 'ui_bubble_rating bubble_')]").get_attribute("class")
rating = string.split("_")
review=container[j].find_element_by_xpath(".//q[@class='hotels-review-list-parts-ExpandableReview__reviewText--3oMkH']").text.replace("\n","")
check_in=container[j].find_element_by_xpath(".//div[@class='hotels-review-list-parts-EventDate__event_date--CRXs4']").text.replace("\n","").replace("Date of stay: ","")
name_remove=container[j].find_element_by_xpath(".//a[@class='ui_header_link social-member-event-MemberEventOnObjectBlock__member--35-jC']").text.replace("\n","")
review_date=container[j].find_element_by_xpath(".//div[@class='social-member-event-MemberEventOnObjectBlock__event_type--3njyv']").text.replace(name_remove,"").replace(" wrote a review ","")
#data location,contributions,helpful_vote may not be always available and we use if to check and give values
if (check_exists_by_xpath('//div[@class="social-member-MemberHeaderStats__event_info--30wFs"]')):
location=container[j].find_element_by_xpath('//span[@class="social-member-MemberHeaderStats__hometown_stat_item--231iN"]').text.replace("\n","")
time.sleep(5)
else:
location=""
if (check_exists_by_xpath('//div[@class="social-member-MemberHeaderStats__event_info--30wFs"]/span[3]')):
contributions=container[j].find_element_by_xpath('//div[@class="social-member-MemberHeaderStats__event_info--30wFs"]/span[2]').text.replace("\n","").replace(" contributions","").replace(" helpful votes","").replace(" helpful vote","")
helpfull_votes=container[j].find_element_by_xpath('//div[@class="social-member-MemberHeaderStats__event_info--30wFs"]/span[3]').text.replace("\n","").replace(" helpful votes","").replace(" helpful vote","")
elif(check_exists_by_xpath('//div[@class="social-member-MemberHeaderStats__event_info--30wFs"]/span[2]')):
contributions=container[j].find_element_by_xpath('//div[@class="social-member-MemberHeaderStats__event_info--30wFs"]/span[2]').text.replace("\n","").replace(" contributions","").replace(" helpful votes","").replace(" helpful vote","")
helpfull_votes=""
rating review_date stay location contributions helpful_votes
50 9-Jun Jun-19 Bucharest, Romania 1 23
50 8-Jun Jun-19 Bucharest, Romania 1 23
50 6-Jun Jun-19 Bucharest, Romania 1 23
50 4-Jun May-19 Bucharest, Romania 1 23
50 May-19 May-19 Bucharest, Romania 1 23
50 May-19 May-19 Monaco 10 1
50 May-19 May-19 Monaco 10 1
50 May-19 May-19 Monaco 10 1
50 May-19 May-19 Monaco 10 1
50 May-19 May-19 Monaco 10 1
50 May-19 May-19 Limassol, Cyprus 4 2
50 May-19 May-19 Limassol, Cyprus 4 2
Вы можете видеть, что stay
, location
, contributions
,и helpful_votes
одинаковы 5 раз.