Вы можете использовать nth-of-type (я использую с bs4 4.7.1+), чтобы получить первый столбец как заголовки, а второй столбец как значения под этими заголовками.
import pandas as pd
from bs4 import BeautifulSoup as bs
with open('filepath/sample.html','r') as f:
soup = bs(f.read(), 'html.parser')
headers = [td.text for td in soup.select_one('table').select('td:nth-of-type(1)')]
results = []
for table in soup.select('table'):
row = [td.text for td in table.select('td:nth-of-type(2)')]
results.append(row)
df = pd.DataFrame(results, columns = headers)
print(df)
Вы также можете использовать CSV
import csv
from bs4 import BeautifulSoup as bs
with open('filepath/sample.html','r') as f:
soup = bs(f.read(), 'html.parser')
headers = [td.text for td in soup.select_one('table').select('td:nth-of-type(1)')]
with open("data.csv", "w", encoding="utf-8-sig", newline='') as csv_file:
w = csv.writer(csv_file, delimiter = ",", quoting=csv.QUOTE_MINIMAL)
w.writerow(headers)
for table in soup.select('table'):
w.writerow([td.text for td in table.select('td:nth-of-type(2)')])