Очистка пользовательского поиска Ebay с BeautifulSoup.Как справиться с нумерацией страниц? - PullRequest
0 голосов
/ 01 июня 2018

Я пытаюсь очистить пользовательский поиск по eBay, который показывает 200 элементов на одной странице.Мне нужно получить название товара, цену и ссылку на этот товар.Все идет нормально.Но я также хотел бы, чтобы код следовал по ссылке на следующую страницу с 200 или менее элементами и извлекал их.

Это код, который я использую:

from urllib.request import urlopen as Req
from bs4 import BeautifulSoup as souce

start_url='https://www.ebay.de/sch/i.html?_fosrp=1&_from=R40&_nkw=iphone&_in_kw=1&_ex_kw=&_sacat=0&_mPrRngCbx=1&_udlo=600&_udhi=4.800&LH_BIN=1&LH_ItemCondition=4&_ftrt=901&_ftrv=1&_sabdlo=&_sabdhi=&_samilow=&_samihi=&_sadis=10&_fpos=&LH_SubLocation=1&_sargn=-1%26saslc%3D0&_fsradio2=%26LH_LocatedIn%3D1&_salic=77&_saact=77&LH_SALE_CURRENCY=0&_sop=2&_dmd=1&_ipg=200'


Client=Req(start_url)
page_html=Client.read()
Client.close()


page_soup=souce(page_html, "html.parser")


containers_listings = page_soup.findAll("li",{"class":"sresult lvresult clearfix li"})
container_next=page_soup.find("td",{"class":"pagn-next"})
next_url=container_next.a["href"]

filename="scrape_ebay.csv"
f=open(filename,"w")
headers="item_title,item_link,item_price\n"
f.write(headers)

for container in containers_listings:
item_title=container.h3.text.strip()
item_link=container.h3.a["href"].strip()
item_price=container.span.text.strip()

f.write(item_title + "," + item_link + "," + item_price.replace(",",".") + "\n")

f.close()

Я столкнулся с проблемой с нумерацией страниц eBay.Мне удалось выделить и извлечь следующую ссылку, но я понятия не имею, как реализовать ее в цикле, который будет посещать следующие страницы и извлекать информацию.Любая помощь будет принята с благодарностью!Заранее спасибо.

Ответы [ 2 ]

0 голосов
/ 01 июня 2018

Ваша текущая ссылка набирает меньше 200, таким образом, нумерация страниц не предоставляется, однако переход на более популярную страницу, например, списки для "macbooks", дает результаты на нескольких страницах.Ссылка, используемая для демонстрации, может быть найдена здесь .Чтобы найти страницы, можно найти полный текст тега нумерации a, а при циклическом просмотре последних результатов номер страницы на текущей итерации можно объединить в конце ссылки:

from bs4 import BeautifulSoup as soup
import requests, re
from collections import namedtuple
def check_under_val(val = 200):
  def outer(f):
    def wrapper(cls):
      if cls.page is not None:
        raise StopIteration("Search results for given link under {}".format(val))
      return f(cls)
    return wrapper
  return outer

class Results:
   product = namedtuple('product', ['title', 'description', 'price', 'rating', 'link'])
   def __init__(self, link):
      self.link = link
      self.pagination = [i.text for i in soup(requests.get(link).text, 'html.parser').find_all('li', {'class':'x-pagination__li'})] 
      self.page = [Results.product(*i) for i in Results.scrape_page(self.link)] if not self.pagination else None 
   @staticmethod
   def concatenate(link, num:str):
      return re.sub('\d+$', num, link)
   @staticmethod
   def scrape_page(url):
     current_page = soup(requests.get(url).text, 'html.parser')
     tags = [['h3', 's-item__title'], ['span', 'SECONDARY_INFO'], ['span', 's-item__price'], ['span', 'b-starrating__star'], ['a', 's-item__link']]
     items = [i for i in current_page.find_all('li', {'class':'s-item'})]
     return [[getattr(i.find(tag, {'class':c}), 'text', 'N/A') for tag, c in tags] for i in items]
   @check_under_val(val = 200)
   def __iter__(self):
     for page in self.pagination:
       yield [Results.product(*i) for i in Results.scrape_page(Results.concatenate(self.link, page))]

start = 'https://www.ebay.de/sch/i.html?_sacat=0&_sop=2&_nkw=macbook&_frs=1&_pgn=1'
r = Results(start)
for page_results in r:
  print(page_results)

Выходные данные (первый напечатанный результат):

[product(title='SSD 1TB 1Terabyte passend für Apple Macbook Pro und Air Modelle', description='Gebraucht', price='EUR 1,00', rating='N/A', link='SSD 1TB 1Terabyte passend für Apple Macbook Pro und Air Modelle'), product(title='Apple MacBook Pro A1398 39,1 cm (15,4 Zoll) Nur Display', description='Gebraucht', price='EUR 1,00', rating='5.0 von 5 Sternen - Apple MacBook Pro A1398 39,1 cm (15,4 Zoll) Nur Display', link='Apple MacBook Pro A1398 39,1 cm (15,4 Zoll) Nur Display'), product(title='Neues AngebotMacbook Late 2008', description='Gebraucht', price='EUR 1,00', rating='N/A', link='Neues AngebotMacbook Late 2008'), product(title='Apple MacBook Pro 39,1 cm (15,4 Zoll) Laptop - MB134D/A (Februar, 2008)', description='Nur Ersatzteile', price='EUR 1,00', rating='3.5 von 5 Sternen - Apple MacBook Pro 39,1 cm (15,4 Zoll) Laptop - MB134D/A (Februar, 2008)', link='Apple MacBook Pro 39,1 cm (15,4 Zoll) Laptop - MB134D/A (Februar, 2008)'), product(title='Neues AngebotApple MacBook Pro 2,53 Ghz (13,3 Zoll) Laptop (Juni, 2009), viel Leistung!', description='Gebraucht', price='EUR 1,00', rating='4.5 von 5 Sternen - Apple MacBook Pro 2,53 Ghz (13,3 Zoll) Laptop (Juni, 2009), viel Leistung!', link='Neues AngebotApple MacBook Pro 2,53 Ghz (13,3 Zoll) Laptop (Juni, 2009), viel Leistung!'), product(title='Neues AngebotMacBook Pro 15.4" Matt - Gebraucht, Mitte 2009', description='Gebraucht', price='EUR 1,00', rating='N/A', link='Neues AngebotMacBook Pro 15.4" Matt - Gebraucht, Mitte 2009'), product(title='Neues AngebotApple Macbook Air 11 inch late 2010 128 gb', description='Gebraucht', price='EUR 1,00', rating='N/A', link='Neues AngebotApple Macbook Air 11 inch late 2010 128 gb'), product(title='Neues AngebotApple MacBook A1342 33,8 cm (13,3 Zoll) Laptop (Mai, 2010) - Individuelle Konfig', description='Gebraucht', price='EUR 1,00', rating='4.5 von 5 Sternen - Apple MacBook A1342 33,8 cm (13,3 Zoll) Laptop (Mai, 2010) - Individuelle Konfig', link='Neues AngebotApple MacBook A1342 33,8 cm (13,3 Zoll) Laptop (Mai, 2010) - Individuelle Konfig'), product(title='Neues AngebotApple MacBook Air A1370 29,5 cm (11,6 Zoll) Laptop - MC506D/A (Oktober, 2010)', description='Gebraucht', price='EUR 1,00', rating='5.0 von 5 Sternen - Apple MacBook Air A1370 29,5 cm (11,6 Zoll) Laptop - MC506D/A (Oktober, 2010)', link='Neues AngebotApple MacBook Air A1370 29,5 cm (11,6 Zoll) Laptop - MC506D/A (Oktober, 2010)'), product(title='Neues AngebotApple MacBook A1342 13,3 Zoll mid 2010, 2,4GHz, 320GB HDD', description='Gebraucht', price='EUR 1,00', rating='4.5 von 5 Sternen - Apple MacBook A1342 13,3 Zoll mid 2010, 2,4GHz, 320GB HDD', link='Neues AngebotApple MacBook A1342 13,3 Zoll mid 2010, 2,4GHz, 320GB HDD'), product(title='Neues AngebotApple MacBook Pro 15" Mitte 2009 2,66 Ghz 8GB RAM 64 BIT 500GB HDD Laptop Matt', description='Gebraucht', price='EUR 1,00', rating='N/A', link='Neues AngebotApple MacBook Pro 15" Mitte 2009 2,66 Ghz 8GB RAM 64 BIT 500GB HDD Laptop Matt'), product(title='Neues AngebotApple MacBook Air - 8 GB! - 13,3 Zoll Laptop, MJVE2D/A, kaum benutzt / quasi neu', description='Gebraucht', price='EUR 1,00', rating='N/A', link='Neues AngebotApple MacBook Air - 8 GB! - 13,3 Zoll Laptop, MJVE2D/A, kaum benutzt / quasi neu'), product(title='Neues AngebotApple MacBook Pro A1278 33,8 cm (13,3 Zoll) Laptop (Juni, 2009)', description='Gebraucht', price='EUR 1,00', rating='N/A', link='Neues AngebotApple MacBook Pro A1278 33,8 cm (13,3 Zoll) Laptop (Juni, 2009)'), product(title='Neues AngebotApple Macbook Pro Modell (15 Zoll) Top Zustand  DVD Brenner, MacOS X', description='Gebraucht', price='EUR 1,00', rating='N/A', link='Neues AngebotApple Macbook Pro Modell (15 Zoll) Top Zustand  DVD Brenner, MacOS X'), product(title='Apple Macbook Pro 13 Zoll silber   ', description='Gebraucht', price='EUR 1,50', rating='N/A', link='Apple Macbook Pro 13 Zoll silber   '), product(title='Neues AngebotApple MacBook Air A1369 33,8 cm (13,3 Zoll) Laptop - MC965D/A (Juli, 2011)', description='Gebraucht', price='EUR 1,50', rating='5.0 von 5 Sternen - Apple MacBook Air A1369 33,8 cm (13,3 Zoll) Laptop - MC965D/A (Juli, 2011)', link='Neues AngebotApple MacBook Air A1369 33,8 cm (13,3 Zoll) Laptop - MC965D/A (Juli, 2011)'), product(title='Neues AngebotApple MacBook Pro A1278 13,3 Zoll Laptop Anfang 2011 i5 2,3Ghz 8GB DDR3 in OVP !', description='Gebraucht', price='EUR 2,00', rating='5.0 von 5 Sternen - Apple MacBook Pro A1278 13,3 Zoll Laptop Anfang 2011 i5 2,3Ghz 8GB DDR3 in OVP !', link='Neues AngebotApple MacBook Pro A1278 13,3 Zoll Laptop Anfang 2011 i5 2,3Ghz 8GB DDR3 in OVP !'), product(title='Apple MacBook Pro (13,3 Zoll, 2009) - 120GB SSD - 2GB RAM', description='Gebraucht', price='EUR 2,50', rating='N/A', link='Apple MacBook Pro (13,3 Zoll, 2009) - 120GB SSD - 2GB RAM'), product(title='Neues AngebotApple MacBook 12“ Space Grau/ 1.1 GHz/ 8GB/ 256 GB inkl. Zubehör', description='Gebraucht', price='EUR 2,72', rating='N/A', link='Neues AngebotApple MacBook 12“ Space Grau/ 1.1 GHz/ 8GB/ 256 GB inkl. Zubehör'), product(title='Apple MacBook Pro A1286 39,1 cm, 2011, 16gb, SSD + HDD, HI-RES', description='Gebraucht', price='EUR 4,00', rating='5.0 von 5 Sternen - Apple MacBook Pro A1286 39,1 cm, 2011, 16gb, SSD + HDD, HI-RES', link='Apple MacBook Pro A1286 39,1 cm, 2011, 16gb, SSD + HDD, HI-RES'), product(title='Original Karton - Apple MacBook Air (2014) - Intel i5 1,4 GHz, 4GB - NUR KARTON', description='Gebraucht', price='EUR 4,95', rating='N/A', link='Original Karton - Apple MacBook Air (2014) - Intel i5 1,4 GHz, 4GB - NUR KARTON'), product(title='Macbook Air 13 Zoll Baujahr 2012 OHNE Festplatte Silber Space', description='Gebraucht', price='EUR 5,00', rating='N/A', link='Macbook Air 13 Zoll Baujahr 2012 OHNE Festplatte Silber Space'), product(title='Apple MacBook Pro A1286 39,1 cm (15,4 Zoll) Laptop -  (Juni, 2009)', description='Gebraucht', price='EUR 6,00', rating='5.0 von 5 Sternen - Apple MacBook Pro A1286 39,1 cm (15,4 Zoll) Laptop -  (Juni, 2009)', link='Apple MacBook Pro A1286 39,1 cm (15,4 Zoll) Laptop -  (Juni, 2009)'), product(title='Neues AngebotApple MacBook 12" Laptop , 256GB - MNYF2D/A - August 2017, Space Grau neuwertig!', description='Gebraucht', price='EUR 6,50', rating='1.0 von 5 Sternen - Apple MacBook 12" Laptop , 256GB - MNYF2D/A - August 2017, Space Grau neuwertig!', link='Neues AngebotApple MacBook 12" Laptop , 256GB - MNYF2D/A - August 2017, Space Grau neuwertig!'), product(title='MacBook Pro 15 Zoll A1211 Defekt', description='Gebraucht', price='EUR 8,00', rating='N/A', link='MacBook Pro 15 Zoll A1211 Defekt'), product(title='Apple Macbook Pro 13', description='Brandneu', price='EUR 8,26', rating='N/A', link='Apple Macbook Pro 13'), product(title='Apple MacBook A1342 33,8 cm (13,3 Zoll) Laptop - MC207D/A (Oktober, 2009)', description='Gebraucht', price='EUR 8,50', rating='4.5 von 5 Sternen - Apple MacBook A1342 33,8 cm (13,3 Zoll) Laptop - MC207D/A (Oktober, 2009)', link='Apple MacBook A1342 33,8 cm (13,3 Zoll) Laptop - MC207D/A (Oktober, 2009)'), product(title='Apple Macbook Pro 13', description='Brandneu', price='EUR 8,63', rating='N/A', link='Apple Macbook Pro 13'), product(title='Apple MacBook 33,8 cm (13,3 Zoll) Laptop -(2008)', description='Gebraucht', price='EUR 10,00', rating='4.5 von 5 Sternen - Apple MacBook 33,8 cm (13,3 Zoll) Laptop -(2008)', link='Apple MacBook 33,8 cm (13,3 Zoll) Laptop -(2008)'), product(title='Neues AngebotApple MacBook Pro A1502 33,8 cm (13,3 Zoll) Laptop - ME865D/A (Oktober, 2013)', description='Gebraucht', price='EUR 10,00', rating='5.0 von 5 Sternen - Apple MacBook Pro A1502 33,8 cm (13,3 Zoll) Laptop - ME865D/A (Oktober, 2013)', link='Neues AngebotApple MacBook Pro A1502 33,8 cm (13,3 Zoll) Laptop - ME865D/A (Oktober, 2013)'), product(title='Apple MacBook A1181 33,8 cm (13,3 Zoll) Laptop - MB061D/A ', description='Gebraucht', price='EUR 10,50', rating='5.0 von 5 Sternen - Apple MacBook A1181 33,8 cm (13,3 Zoll) Laptop - MB061D/A ', link='Apple MacBook A1181 33,8 cm (13,3 Zoll) Laptop - MB061D/A '), product(title="Neues AngebotApple MacBook Pro  13.3'' A1706 256GB Laptop mit Touchbar - DEFEKT", description='Nur Ersatzteile', price='EUR 10,50', rating="5.0 von 5 Sternen - Apple MacBook Pro  13.3'' A1706 256GB Laptop mit Touchbar - DEFEKT", link="Neues AngebotApple MacBook Pro  13.3'' A1706 256GB Laptop mit Touchbar - DEFEKT"), product(title='apple macbook pro 13', description='Gebraucht', price='EUR 10,50', rating='N/A', link='apple macbook pro 13'), product(title='Neues AngebotMacbook Pro 15 Zoll von 2007 A1211, 2 GB ram, Core 2 Duo, ATI Grafikkarte, 350GB', description='Gebraucht', price='EUR 11,50', rating='N/A', link='Neues AngebotMacbook Pro 15 Zoll von 2007 A1211, 2 GB ram, Core 2 Duo, ATI Grafikkarte, 350GB'), product(title='Apple MacBook A1181 33,8 cm (13,3 Zoll) Laptop\xa0', description='Gebraucht', price='EUR 11,50', rating='5.0 von 5 Sternen - Apple MacBook A1181 33,8 cm (13,3 Zoll) Laptop\xa0', link='Apple MacBook A1181 33,8 cm (13,3 Zoll) Laptop\xa0'), product(title='Neues AngebotApple MacBook Air Mitte 2011 13,3 Zoll i5 121 GB SSD 4 GB RAM', description='Gebraucht', price='EUR 11,50', rating='N/A', link='Neues AngebotApple MacBook Air Mitte 2011 13,3 Zoll i5 121 GB SSD 4 GB RAM'), product(title='Apple MacBook Pro 43,2 cm (17 Zoll) Laptop - MA611D/A (Oktober, 2006)', description='Gebraucht', price='EUR 11,50', rating='5.0 von 5 Sternen - Apple MacBook Pro 43,2 cm (17 Zoll) Laptop - MA611D/A (Oktober, 2006)', link='Apple MacBook Pro 43,2 cm (17 Zoll) Laptop - MA611D/A (Oktober, 2006)'), product(title='Apple MacBook Pro 15“ 2,33 GHz 500GB HDD 2GB RAM', description='Gebraucht', price='EUR 12,50', rating='N/A', link='Apple MacBook Pro 15“ 2,33 GHz 500GB HDD 2GB RAM'), product(title='Apple MacBook A1181 13,3 Zoll, 2,2 GHz 256GB HDD, 4GB Ram, OS X Lion & Bootcamp', description='Gebraucht', price='EUR 12,50', rating='N/A', link='Apple MacBook A1181 13,3 Zoll, 2,2 GHz 256GB HDD, 4GB Ram, OS X Lion & Bootcamp'), product(title='MacBook 13‘‘ Ende 2009', description='Gebraucht', price='EUR 13,50', rating='N/A', link='MacBook 13‘‘ Ende 2009'), product(title='Apple MacBook Pro 17 Zoll Core 2 Duo 2.33 GHz 3Gb Ram 100GB 7200rpm HDD', description='Gebraucht', price='EUR 15,50', rating='N/A', link='Apple MacBook Pro 17 Zoll Core 2 Duo 2.33 GHz 3Gb Ram 100GB 7200rpm HDD'), product(title='Macbook pro A1278 Originalverpackung TOP', description='Gebraucht', price='EUR 19,00', rating='N/A', link='Macbook pro A1278 Originalverpackung TOP'), product(title='Macbook Pro 13" 2017', description='Brandneu', price='EUR 21,41', rating='N/A', link='Macbook Pro 13" 2017'), product(title='2 x Original Apple Macbook Pro Retina 15“ Verpackung OVP Karton Box A1398', description='Gebraucht', price='EUR 22,00', rating='N/A', link='2 x Original Apple Macbook Pro Retina 15“ Verpackung OVP Karton Box A1398'), product(title='Apple MacBook Air A1466 33,8 cm (13,3 Zoll) Laptop - MD760B/A (Juni, 2013)', description='Gebraucht', price='EUR 22,50', rating='5.0 von 5 Sternen - Apple MacBook Air A1466 33,8 cm (13,3 Zoll) Laptop - MD760B/A (Juni, 2013)', link='Apple MacBook Air A1466 33,8 cm (13,3 Zoll) Laptop - MD760B/A (Juni, 2013)'), product(title='MacBook Air 13, 2013', description='Brandneu', price='EUR 23,50', rating='N/A', link='MacBook Air 13, 2013'), product(title='Apple MacBook Pro A1278 33,8 cm (13,3 Zoll) Laptop (Juni, 2009) -...', description='Gebraucht', price='EUR 25,00', rating='4.5 von 5 Sternen - Apple MacBook Pro A1278 33,8 cm (13,3 Zoll) Laptop (Juni, 2009) -...', link='Apple MacBook Pro A1278 33,8 cm (13,3 Zoll) Laptop (Juni, 2009) -...'), product(title='macbook pro 13', description='Gebraucht', price='EUR 25,00', rating='N/A', link='macbook pro 13'), product(title='Neues AngebotMacBook Pro 15 Zoll Late 2011 Batterie', description='Gebraucht', price='EUR 25,00', rating='N/A', link='Neues AngebotMacBook Pro 15 Zoll Late 2011 Batterie'), product(title='MacBook Air SuperDrive', description='Gebraucht', price='EUR 29,99', rating='N/A', link='MacBook Air SuperDrive')]

Однако, если входные данные не содержат нумерации страниц, будет доступна только первая страница:

start = 'https://www.ebay.de/sch/i.html?_fosrp=1&_from=R40&_nkw=iphone&_in_kw=1&_ex_kw=&_sacat=0&_mPrRngCbx=1&_udlo=600&_udhi=4.800&LH_BIN=1&LH_ItemCondition=4&_ftrt=901&_ftrv=1&_sabdlo=&_sabdhi=&_samilow=&_samihi=&_sadis=10&_fpos=&LH_SubLocation=1&_sargn=-1%26saslc%3D0&_fsradio2=%26LH_LocatedIn%3D1&_salic=77&_saact=77&LH_SALE_CURRENCY=0&_sop=2&_dmd=1&_ipg=200'
r1 = Results(start)
for page in r1:
  print(page)

StopItered:Результаты поиска по указанной ссылке под 200

0 голосов
/ 01 июня 2018

Вам нужно добавлять новые URL-адреса в список по мере их нахождения и постоянно перебирать список URL-адресов, извлекая искомый контент.

from urllib.request import urlopen as Req
from bs4 import BeautifulSoup as souce

start_url='https://www.ebay.de/sch/i.html?_fosrp=1&_from=R40&_nkw=iphone&_in_kw=1&_ex_kw=&_sacat=0&_mPrRngCbx=1&_udlo=600&_udhi=4.800&LH_BIN=1&LH_ItemCondition=4&_ftrt=901&_ftrv=1&_sabdlo=&_sabdhi=&_samilow=&_samihi=&_sadis=10&_fpos=&LH_SubLocation=1&_sargn=-1%26saslc%3D0&_fsradio2=%26LH_LocatedIn%3D1&_salic=77&_saact=77&LH_SALE_CURRENCY=0&_sop=2&_dmd=1&_ipg=200'

page_urls =[start_url]

for purl in page_urls:

    Client=Req(purl)
    page_html=Client.read()
    Client.close()


    page_soup=souce(page_html, "html.parser")


    containers_listings = page_soup.findAll("li",{"class":"sresult lvresult clearfix li"})
    container_next=page_soup.find("td",{"class":"pagn-next"})
    next_url=container_next.a["href"]

    #this is where you would add your new link
    if next_url not in page_urls:
        page_urls.append(next_url)

    filename="scrape_ebay.csv"
    f=open(filename,"w")
    headers="item_title,item_link,item_price\n"
    f.write(headers)

    for container in containers_listings:
        item_title=container.h3.text.strip()
        item_link=container.h3.a["href"].strip()
        item_price=container.span.text.strip()

    f.write(item_title + "," + item_link + "," + item_price.replace(",",".") + "\n")

    f.close()
...