Я не могу найти способ вставить значения из 4 списков вместо того, чтобы вставлять их вручную. (Список товаров (текст), прайс-лист (плавающий), список продавцов (текст), дата (дата). Списки создаются на основе чистящих кодов URL, включенных в файл Excel. Код нужен только для академического использования, а не для бизнес-проектов.
Я тоже работаю в Пихарме и Юпитере.
import pandas as pd
from openpyxl import load_workbook
xl = pd.ExcelFile("C:/Users/Aleksander Delev/Desktop/product.xlsx") # get data from Excel file
df = xl.parse("Tabelle1") #get Table
i=0 # 0 to pick url from first row with data in Excel file
import requests
from bs4 import BeautifulSoup
from datetime import date
prodname = [] # list to append product's names
prodprice = [] # list to append prooduct's prices
scrapetime = [] # list to append date of present price
sellername = [] # list to append seller's namess
while i <= 9: # while loop to go over all urls for every product
link = (df['Link'][i]) # picking url code from column 'Link' and first data row
source = requests.get(link)
parsed = 'html.parser'
bsObj = BeautifulSoup(source.text, parsed) #parsing
print(link)
#tags = bsObj.find_all('a', {'class': 'productOffers-listItemTitle'})
#print(tags[0])
nametags = bsObj.find_all('span', {'class': 'productOffers-listItemTitleInner'})
print(type(nametags))
print(len(nametags))
#for tag in nametags:
#print(tag.get_text())
print(nametags[0].get_text())
namevar = nametags[0].get_text()
nameclear = namevar.lstrip() #shift to the left side
prodname.append(nameclear) # add names of products to the list
price = bsObj.find_all('div', {'class':'table-cell productOffers-listItemOfferShippingDetailsLeft'})
print(len(price))
#print(price[0].get_text())
pricevar=price[0].get_text()
priceclear=pricevar.lstrip() # shift to the left side
import re
pricefloat = re.findall(r'[\d\,\d]+',priceclear) # eliminating string and setting value as float with specified decimal
prodprice.append(pricefloat) # add price to the list
today = date.today() # setting date
d1 = today.strftime("%d/%m/%Y") # format
print(d1)
scrapetime.append(d1) #adding date to the list
seller = bsObj.find_all('a', {'class':'productOffers-listItemOfferLink'})
print(len(seller))
sellerstring= seller[0]
print(sellerstring)
sellerclear = sellerstring['data-shop-name'] # getting seller name from attribute
print(len(sellerclear))
print(sellerclear)
sellername.append(sellerclear)
i= i+ 1 # next url code - next product
print(prodname)
print(prodprice)
print(scrapetime)
print(sellername)
print(type(prodname))
print(type(prodprice))
print(type(scrapetime))
import sqlite3
import pandas as pd
conn = sqlite3.connect('Price_Table.db')
c = conn.cursor()
def price_table():
c.execute('Create Table If NOT Exists Price_Data(ID Real, Product Text, Date Text, Seller Text, Price Real)')
def price_loading():
c.execute("Insert Into Price_Data Values(1, 's', '10/07/2019', 'laufbursche.shop', 449.00)")
c.execute("Insert Into Price_Data Values(2, 'Apple Watch Series 3 GPS', '10/07/2019', 'talk-point-gmbh2', 355.99)")
c.execute("Insert Into Price_Data Values(3, 'Amazfit BIP', '10/07/2019', 'flodi_7604', 199.00)")
c.execute("Insert Into Price_Data Values(4, 'Apple Watch Series 4 GPS', '10/07/2019', 'cyberport', 458.90)")
c.execute("Insert Into Price_Data Values(5, 'Garmin fenix 5', '10/07/2019', 'hondatestfahrer', 333.00)")
c.execute("Insert Into Price_Data Values(6, 'Garmin fenix 5S', '10/07/2019', 'comtech-shop', 396.83)")
c.execute("Insert Into Price_Data Values(7, 'Suunto 9', '10/07/2019', 'bergzeit_outlet', 401.95)")
c.execute("Insert Into Price_Data Values(8, 'Garmin Vívoactive 3 Music', '10/07/2019', 'saturn', 249.00)")
c.execute("Insert Into Price_Data Values(9, 'Garmin fenix 5x', '10/07/2019', 'n_456', 489.00)")
c.execute("Insert Into Price_Data Values(10, 'Garmin Forerunner 735XT', '10/07/2019', 'yeppon-shop', 287.99)")
conn.commit()
price_table()
price_loading()
conn = sqlite3.connect('Price_Table.db')
df = pd.read_sql_query('Select * from Price_Data', conn)
print(df)
#%%
#Our Company price data
conn = sqlite3.connect('Price_Table.db')
c = conn.cursor()
def our_price():
c.execute('Create Table If NOT Exists Our_Price_Data(ID Real, Product Text, Date Text, Seller Text, Our_Price Real)')
def our_price_loading():
c.execute("Insert Into Our_Price_Data Values(1, 's', '10/07/2019', 'Our Company', 350)")
c.execute("Insert Into Our_Price_Data Values(2, 'Apple Watch Series 3 GPS', '10/07/2019', 'Our Company', 160)")
c.execute("Insert Into Our_Price_Data Values(3, 'Amazfit BIP', '10/07/2019', 'Our Company', 220)")
c.execute("Insert Into Our_Price_Data Values(4, 'Apple Watch Series 4 GPS', '10/07/2019', 'Our Company', 450)")
c.execute("Insert Into Our_Price_Data Values(5, 'Garmin fenix 5', '10/07/2019', 'Our Company', 350)")
c.execute("Insert Into Our_Price_Data Values(6, 'Garmin fenix 5S', '10/07/2019', 'Our Company', 400)")
c.execute("Insert Into Our_Price_Data Values(7, 'Suunto 9', '10/07/2019', 'Our Company', 360)")
c.execute("Insert Into Our_Price_Data Values(8, 'Garmin Vívoactive 3 Music', '10/07/2019', 'Our Company', 240)")
c.execute("Insert Into Our_Price_Data Values(9, 'Garmin fenix 5x', '10/07/2019', 'Our Company', 490)")
c.execute("Insert Into Our_Price_Data Values(10, 'Garmin Forerunner 735XT', '10/07/2019', 'Our Company', 240)")
conn.commit()
our_price()
our_price_loading()
conn = sqlite3.connect('Price_Table.db')
df = pd.read_sql_query('Select * from Our_Price_Data', conn)
print(df)
Извините за отсутствие правильной структуры и описания. Это мой первый пост.
В результате я хотел бы иметь 10 строк с данными, вставленными из prodname, prodprice, scrapetime, sellername.
Большое спасибо заранее