Я рекомендую вам использовать Scrapy для извлечения данных с определенным Xpath.Я проверил ваш код и Xpath немного не прав.Я перенастроил ваш код.Я присвоил имена классов переменной.После этого я объединил их, чтобы создать точный xpath .В следующем примере кода;Я создал 7 списков, которые содержат соответственно цену , имя , тип сайта , доход , трафик , прибыль , инвентарная стоимость .
import requests
from scrapy.selector import Selector
start_urls = ["https://exchange.shopify.com/shops?sale_price=1000&shopify_shop_created_at=6m%2C0m"]
price_list = []
name_list = []
website_type_list = []
revenue_list = []
traffic_list = []
profit_list = []
inventory_value_list = []
for link in start_urls:
# Getting the webpage, creating a Response object.
response = requests.get(link,proxies=None)
# Extracting the source code of the page.
data = response.text
first_class_name = "'grid grid--equal-height'"
second_class_name = "'grid__item grid__item--tablet-up-third grid__item--desktop-up-quarter grid__item--wide-up-quarter gutter-bottom category-tile-grid-item layout-flex'"
# We'll iterate this second class for 24 times.
third_class_name = "'shop-tile__price'"
# this is price xpath
fourth_class_name = "'shop-tile__url heading--truncated'"
# this is name xpath
fifth_class_name = "'shop-tile__content shop-tile__metrics-container'"
# this is table xpath (from Revenue (USD) to Inventory Value)
sixth_class_name = "'shop-tile__metric'"
# this is all four row's xpath in the table. We'll iterate this four times from revenue to Inventory Value
seventh_class_name = "'shop-tile__category heading--truncated'"
# this is website's type (automative, sports etc.)
for i in range(1,25):
price_xpath = "normalize-space(//div[@class="+first_class_name+"]//div[@class="+second_class_name+"]["+str(i)+"]//div[@class="+third_class_name+"])"
test_price = Selector(text=data).xpath(price_xpath).extract()[0]
price_list.append(test_price)
name_xpath = "normalize-space(//div[@class="+first_class_name+"]//div[@class="+second_class_name+"]["+str(i)+"]//p[@class="+fourth_class_name+"])"
test_name = Selector(text=data).xpath(name_xpath).extract()[0]
name_list.append(test_name)
website_type_xpath = "normalize-space(//div[@class="+first_class_name+"]//div[@class="+second_class_name+"]["+str(i)+"]//p[@class="+seventh_class_name+"])"
website_type = Selector(text=data).xpath(website_type_xpath).extract()[0]
website_type_list.append(website_type)
revenue_xpath = "normalize-space(//div[@class="+first_class_name+"]//div[@class="+second_class_name+"]["+str(i)+"]//div[@class="+fifth_class_name+"]"+"//div[@class="+sixth_class_name+"][1]"+"//span[@class='shop-tile__metric__value text-bold'][1])"
test_revenue = Selector(text=data).xpath(revenue_xpath).extract()[0]
revenue_list.append(test_revenue)
traffic_xpath = "normalize-space(//div[@class="+first_class_name+"]//div[@class="+second_class_name+"]["+str(i)+"]//div[@class="+fifth_class_name+"]"+"//div[@class="+sixth_class_name+"][2]"+"//span[@class='shop-tile__metric__value text-bold'][1])"
test_traffic = Selector(text=data).xpath(traffic_xpath).extract()[0]
traffic_list.append(test_traffic)
profit_xpath = "normalize-space(//div[@class="+first_class_name+"]//div[@class="+second_class_name+"]["+str(i)+"]//div[@class="+fifth_class_name+"]"+"//div[@class="+sixth_class_name+"][3]"+"//span[@class='shop-tile__metric__value text-bold'][1])"
test_profit = Selector(text=data).xpath(profit_xpath).extract()[0]
profit_list.append(test_profit)
inventory_value_xpath = "normalize-space(//div[@class="+first_class_name+"]//div[@class="+second_class_name+"]["+str(i)+"]//div[@class="+fifth_class_name+"]"+"//div[@class="+sixth_class_name+"][4]"+"//span[@class='shop-tile__metric__value text-bold'][1])"
test_inventory_value = Selector(text=data).xpath(inventory_value_xpath).extract()[0]
inventory_value_list.append(test_inventory_value)
print(len(price_list))
print(len(name_list))
print(len(website_type_list))
print(len(revenue_list))
print(len(traffic_list))
print(len(profit_list))
print(len(inventory_value_list))
Вывод:
24
24
24
24
24
24
24
Проверка списков:
print(price_list[:5])
print(name_list[:5])
print(website_type_list[:5])
print(revenue_list[:5])
print(traffic_list[:5])
print(profit_list[:5])
print(inventory_value_list[:5])
Вывод:
['$1,150USD', '$3,000USD', '$2,500USD', '$1,000USD', '$2,300USD']
['www.cosmicdetail.co.uk', 'prestige-timepiece.com', 'gomommyboutique.com', 'sunnysx.com', 'squishywishy.com']
['Automotive', 'Fashion and apparel', 'Toys and games', 'Fashion and apparel', 'Gifts and collectibles']
['$56', '$961', '$70', '$1.3K', '$403']
['111', '7.5K', '454', '2.8K', '2.6K']
['$50', '$1.0K', '$700', '$500', '$100']
['$1.8K', '', '', '', '']
Проверка xpath дохода (если вы хотите попробовать его на консоли):
revenue_xpath
Вывод:
"normalize-space(//div[@class='grid grid--equal-height']//div[@class='grid__item grid__item--tablet-up-third grid__item--desktop-up-quarter grid__item--wide-up-quarter gutter-bottom category-tile-grid-item layout-flex'][24]//div[@class='shop-tile__content shop-tile__metrics-container']//div[@class='shop-tile__metric'][1]//span[@class='shop-tile__metric__value text-bold'][1])"