Не уверен, почему вы получаете эту ошибку, поскольку код, который вы показываете, не использует Pandas. Но это не значит, что ты не должен. Я использовал pandas .read_html()
здесь, так как он использует Beautifulsoup под капотом для анализа <table>
тегов. Я не знаю, хотите ли вы вложенные таблицы, но это даст вам основную таблицу, и я также добавил часть, чтобы увидеть вложенные таблицы.
import urllib.request, urllib.parse, urllib.error
from bs4 import BeautifulSoup
import pandas as pd
import ssl
head = []
headers = ({'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'})
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
url = "https://www.bkt-tires.com/es/pattern/fs-216"
html = urllib.request.urlopen(url, context=ctx).read()
soup = BeautifulSoup(html, 'html.parser')
# I'm keeping this because it does a better job of pullin the headers than pandas did
tags = soup.find_all('th', class_="tableheader")
for tag in tags:
if not tag.text in head:
head.append(tag.text)
# Takes the html and parses the <table> tag
# This will return a list of dataframes
dfs = pd.read_html(html)
# Grabs the dataframe from that list, in index position 0
# The other index postions are the sub tables
df = dfs[0]
# Here we will clean the data
# Drop the "messy" rows
df = df[~df.iloc[:,0].str.contains("mph")]
df = df.dropna(how='all', axis=1)
df.columns = head + ['']
df = df.dropna(subset=['TRA Code']).reset_index(drop=True)
print (df.to_string())
df.to_csv(“file.csv”, index=False)
for idx, temp_df in enumerate(dfs):
print (df.iloc[idx-1,:].to_string() + '\n\n' + temp_df.to_string() + '\n'+'*'*50 + '\n'+'*'*50)
Вывод:
print (df.to_string())
Size Version US Code LI/SS PR RIM REC SW OD SLR RC Type TMPH ECE TRA Code
0 18.4 - 26 ARAMID BELTED 94027552 NaN 10 DW 16 A 18.4 58.1 26.9 174.5 TT 0 E11-106R-003404 LS-2
1 18.4 - 26 STEEL BELTED 94027996 NaN 10 DW 16 A 18.4 58.1 26.9 174.5 TT 0 E11-106R-003637 LS-2
2 23.1 - 26 ARAMID BELTED 94026333 NaN 10 DW 20 A ; DW 20 B 23.1 63.6 28.6 188.2 TT 0 E11-106R-003649 LS-2
3 23.1 - 26 STEEL BELTED 94028108 NaN 10 DW 20 A ; DW 20 B 23.1 63.6 28.6 188.2 TT 0 E11-106R-003650 LS-2
4 23.1 - 26 ARAMID BELTED 94026562 NaN 12 DW 20 A ; DW 20 B 23.1 63.6 28.6 188.2 TT 0 E11-106R-003405 LS-2
5 23.1 - 26 STEEL BELTED 94028603 NaN 12 DW 20 A ; DW 20 B 23.1 63.6 28.6 188.2 TT 0 E11-106R-003651 LS-2
6 23.1 - 26 ARAMID BELTED 94031825 NaN 14 DW 20 A ; DW 20 B 23.1 63.6 28.6 188.2 TT 0 E11-106R-003658 LS-2
7 23.1 - 26 STEEL BELTED 94029457 NaN 14 DW 20 A ; DW 20 B 23.1 63.6 28.6 188.2 TT 0 E11-106R-003411 LS-2
8 23.1 - 26 ARAMID BELTED 94026685 NaN 16 DW 20 A ; DW 20 B 23.1 63.6 28.6 188.2 TT 0 E11-106R-003660 LS-2
9 23.1 - 26 STEEL BELTED 94031795 NaN 16 DW 20 A ; DW 20 B 23.1 63.6 28.6 188.2 TT 0 E11-106R-003661 LS-2
10 23.1 - 26 STEEL BELTED 94040391 NaN 16 DW 20 A ; DW 20 B 23.1 63.6 28.6 188.2 TL 0 E11-106R-003662 LS-2
11 23.1 - 26 NaN 94054848 174 A2 / 165 A6 20 DW 20 A ; DW 20 B 23.1 63.6 28.6 188.2 TT NaN E11-106R-004635 LS-2
12 28L - 26 STEEL BELTED 94029334 NaN 14 DW 25 A ; DW 25 B 28.3 65.4 29.2 193.4 TL 0 E11-106R-003413 LS-2
13 28L - 26 STEEL BELTED 94031887 NaN 18 DW 25 A ; DW 25 B 28.3 65.4 29.2 193.4 TL NaN E11-106R-003425 LS-2
14 28L - 26 STEEL BELTED 94049608 NaN 20 DW 25 A ; DW 25 B 28.3 65.4 29.2 193.4 TL NaN NaN LS-2
15 28L - 26 STEEL BELTED 94057856 NaN 26 DW 25 A ; DW 25 B 28.3 65.4 29.2 193.4 TL NaN NaN LS-2
16 18.4 - 30 ARAMID BELTED 94027712 NaN 10 DW 16 A 18.4 62.1 28.5 186.5 TT 0 E11-106R-003652 LS-2
17 18.4 - 30 STEEL BELTED 94028115 NaN 10 DW 16 A 18.4 62.1 28.5 186.5 TT 0 E11-106R-003638 LS-2
18 18.4 - 30 STEEL BELTED 94029969 NaN 14 DW 16 A 18.4 62.1 28.5 186.5 TT NaN E11-106R-004644 LS-2
19 24. 5 - 32 STEEL BELTED 94033669 NaN 16 DH 21 24.5 72.1 32.9 214.8 TL 0 E11-106R-003421 LS-2
20 24.5 - 32 ARAMID BELTED 94054855 182 A2 / 172 A6 24 DH 21 24.5 72.1 32.9 214.8 TL NaN E11-106R-004636 LS-2
21 30.5L - 32 ARAMID BELTED 94027736 NaN 16 DH 27 30.5 72.7 32.8 215.2 TL 0 E11-106R-003424 LS-2
22 30.5L - 32 STEEL BELTED 94027620 NaN 16 DH 27 30.5 72.7 32.8 215.2 TL NaN E11-106R-003426 LS-2
23 30.5L - 32 STEEL BELTED 94028900 NaN 20 DH 27 30.5 72.7 32.8 215.2 TL NaN E11-106R-003439 LS-2
24 30.5L - 32 STEEL BELTED 94044313 NaN 26 DH 27 30.5 72.7 32.8 215.2 TL NaN NaN LS-2
25 30.5L - 32 STEEL BELTED 94048397 NaN 32 DH 27 30.5 72.7 32.8 215.2 TL NaN NaN LS-2
26 DH 35.5L - 32 STEEL BELTED 94043729 NaN 20 DH 31 35.7 79.2 35.9 235.9 TL NaN E11-106R-003349 LS-2
27 DH 35.5L - 32 STEEL BELTED 94062713 NaN 24 DH 31 35.7 79.2 35.9 235.9 TL NaN NaN LS-2
28 DH 35.5L - 32 STEEL BELTED 94048380 NaN 30 DH 31 35.7 79.2 35.9 235.9 TL NaN NaN LS-2
29 18.4 - 34 ARAMID BELTED 94027743 NaN 10 DW 16 A 18.4 66.1 30.2 198.5 TT 0 E11-106R-003653 LS-2
30 18.4 - 34 STEEL BELTED 94028122 NaN 10 DW 16 A 18.4 66.1 30.2 198.5 TT 0 E11-106R-004560 LS-2
31 18.4 - 34 ARAMID BELTED 94029532 NaN 14 DW 16 A 18.4 66.1 30.2 198.5 TL NaN NaN LS-2
32 18.4 - 34 STEEL BELTED 94029297 NaN 14 DW 16 A 18.4 66.1 30.2 198.5 TL 0 E11-106R-003401 LS-2