Imports required to run
'''
# -*- coding: utf-8 -*-
import scrapy
from scrapy.selector import Selector
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from shutil import which
import logging
import time
from ..items import B2BItem
'''
начало паука
'' '
class ProductsSpider(scrapy.Spider):
name = 'products'
page_number = 2
allowed_domains = ['b2b.mile-stone.jp/']
start_urls = ['https://b2b.mile-stone.jp/ja/search/0/status=preOrder,tentativePreOrder/']
# userName = '******'
# password = '******'
'''
начало селена для входа в систему, затем переход на страницы к начальной странице с ключевым словом поиска опции' '' def init (self): chrome_options = Options () chrome_options.add_argument ("- headless")
chrome_path = which("chromedriver")
driver = webdriver.Chrome(executable_path=chrome_path, options=chrome_options)
# to set screen-size resolution
driver.set_window_size(1920, 1080)
driver.get("https://b2b.mile-stone.jp/ja/v1/login/")
login_input = driver.find_element_by_xpath("//input[@name='account']")
login_input.send_keys('******')
password_input = driver.find_element_by_xpath("//input[@name='passphrase']")
password_input.send_keys('******')
password_input.send_keys(Keys.ENTER)
time.sleep(5)
# search_input = driver.find_element_by_xpath("//input[@id='param-free-form']")
# search_input.send_keys('KADOKAWA')
# search_input.send_keys(Keys.ENTER)
site_map = driver.find_element_by_xpath("//div[@class='footer footer-
links']/div/div/div/div[3]/ul/li/a")
site_map.click()
time.sleep(10)
product_path = driver.find_element_by_xpath("//div[@class='sitemap']/ul/li[4]/ul/li[1]/a")
product_path.click()
time.sleep(5)
# Pre-order
pre_order = driver.find_element_by_xpath("//ul[@id='param-status-form']/li[1]/label")
pre_order.click()
time.sleep(5)
# In-stock
# in_stock = driver.find_element_by_xpath("//ul[@id='param-status-form']/li[2]/input")
# in_stock.click()
# time.sleep(5)
# Back-order
# back_order = driver.find_element_by_xpath("//ul[@id='param-status-form']/li[3]/input")
# back_order.click()
# time.sleep(5)
# Waiting for Stock
# waiting_for_stock = driver.find_element_by_xpath("//ul[@id='param-status-form']/li[4]/input")
# waiting_for_stock.click()
# time.sleep(5)
# Tentative Pre-order
tentative_pre_order = driver.find_element_by_xpath("//ul[@id='param-status-form']/li[5]/label")
tentative_pre_order.click()
time.sleep(5)
# print(driver.page_source)
self.html = driver.page_source
driver.close()
'''
конец прогона без заголовка для запуска селена "crawler" '' 'def parse (self, response ): resp = Selector (text = self. html)
#items inside product page
items = B2BItem()
product_url = resp.xpath("//h3[@class='h5 text-primary h-fix-32']/a/@href").extract()
product_name = resp.xpath("//h3[@class='h5 text-primary h-fix-32']/a/font/font/text()").extract()
stockStatus_name = resp.xpath("//h4[@class='small border-gray-
light']/p/span/font/font/text()").extract()
company_name = resp.xpath("//h4[@class='small border-gray-
light']/p/span[2]/font/font/text()").extract()
retail_price = resp.xpath("//div[@class='row small']/dl/dd/span/font/font/text()").extract()
wholesale_price = resp.xpath("//div[@class='row
small']/dl/dd[2]/span/span[2]/font/font/text()").extract()
stock_date = resp.xpath("//div[@class='caption']/div/dl/dd/font/font/text()").extract()
scheduledRelease_date = resp.xpath("//div[@class='caption']/div/dl/dd[2]/font/font/text()").extract()
reservationDeadline_date =
resp.xpath("//div[@class='caption']/div/dl/dd[3]/font/font/text()").extract()
image1_url = resp.xpath("//div[@class='sash-inline']/a/img/@src").extract()
sales_restriction = resp.xpath("//form/p[@class='moq m-8 bg-danger text-
danger']/font/font/text()").extract()
items['product_url'] = product_url
items['product_name'] = product_name
items['stockStatus_name'] = stockStatus_name
items['company_name'] = company_name
items['retail_price'] = retail_price
items['wholesale_price'] = wholesale_price
items['stock_date'] = stock_date
items['scheduledRelease_date'] = scheduledRelease_date
items['reservationDeadline_date'] = reservationDeadline_date
items['image1_url'] = image1_url
items['sales_restriction'] = sales_restriction
yield items
'''
по-прежнему возникает проблема при попытке получить информацию отсюда, кажется, что некоторые не отображаются из-за изменения входа в систему / изменения запроса на сайте? или, скорее всего, просто не знаю, как его перехватить
начало запроса на следующую страницу, здесь не удается перейти на страницу 2 и т. д. '' '
next_page = 'https://b2b.mile-stone.jp/ja/search/0/status=preOrder,tentativePreOrder/?page=' +
str(ProductsSpider.page_number)
if ProductsSpider.page_number <= 318:
ProductsSpider.page_number += 1
yield response.follow(next_page, callback = self.parse)
'''