мой заголовок не имеет никакого смысла, поэтому я собираюсь кратко рассказать о ситуации
Я собираю данные с сайта, который в основном является таблицей, но в данном случае каждая строка является элементом таблицы, а также каждым нечетным элементом таблицыэто бесполезно, поэтому я исключаю
, поэтому мне нужно объединить каждый отдельный фрейм данных, состоящий из каждого элемента таблицы, используя read_html ()
, ниже мой код
import pandas as pd
all_table = ["""<table cellpadding="0" cellspacing="0" cols="8" width="100%">
<tbody><tr height="10px">
<td align="right" colspan="9">
<font color="#D5D5D5">.</font>
</td>
</tr>
<tr height="30px" valign="middle" width="100%">
<td class="size-12" colspan="8" width="100%">
<strong>Shipment Status</strong>
</td>
</tr>
<tr valign="bottom" width="100%">
<td align="center" class="size-10" width="10%">
<strong>Station</strong>
</td>
<td align="center" class="size-10" width="10%">
<strong>Flight No.</strong>
</td>
<td align="center" class="size-10" width="25%">
<strong>Status</strong>
</td>
<td align="center" class="size-10" width="15%">
<strong>Date</strong>
</td>
<td align="center" class="size-10" width="9%">
<strong>Time</strong>
</td>
<td align="center" class="size-10" width="8%">
<strong>Pcs</strong>
</td>
<td align="center" class="size-10" width="8%">
<strong>Wgt</strong>
</td>
<td align="center" class="size-10" width="15%">
<strong>ULD - Battery - Temp</strong>
</td>
</tr>
<tr bgcolor="#F0F0F0" class="result-row">
<td align="center" class="size-10" width="10%">KIX</td>
<td align="center" class="size-10" width="10%">
<center>-</center>
</td>
<td align="center" class="size-10" width="25%">Shipment Received</td>
<td align="center" class="size-10" width="15%">11 Oct 2019</td>
<td align="center" class="size-10" width="9%"> 22:45</td>
<td align="center" class="size-10" width="8%">34</td>
<td align="center" class="size-10" width="8%">411.3</td>
<td align="center" class="size-10" width="15%"></td>
</tr>
</tbody></table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
</table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
<tbody><tr bgcolor="#FFFFFF" class="result-row">
<td align="center" class="size-10" width="10%">KIX</td>
<td align="center" class="size-10" width="10%">
<center>-</center>
</td>
<td align="center" class="size-10" width="25%">Freight On Hand</td>
<td align="center" class="size-10" width="15%">11 Oct 2019</td>
<td align="center" class="size-10" width="9%"> 22:45</td>
<td align="center" class="size-10" width="8%">34</td>
<td align="center" class="size-10" width="8%">411.3</td>
<td align="center" class="size-10" width="15%"></td>
</tr>
</tbody></table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
</table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
<tbody><tr bgcolor="#F0F0F0" class="result-row">
<td align="center" class="size-10" width="10%">KIX</td>
<td align="center" class="size-10" width="10%">SQ0621</td>
<td align="center" class="size-10" width="25%">Flight Departed</td>
<td align="center" class="size-10" width="15%">13 Oct 2019</td>
<td align="center" class="size-10" width="9%"> 17:18</td>
<td align="center" class="size-10" width="8%">34</td>
<td align="center" class="size-10" width="8%">411.3</td>
<td align="center" class="size-10" width="15%"></td>
</tr>
</tbody></table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
</table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
<tbody><tr bgcolor="#FFFFFF" class="result-row">
<td align="center" class="size-10" width="10%">SIN</td>
<td align="center" class="size-10" width="10%">SQ0621</td>
<td align="center" class="size-10" width="25%">Flight Arrived</td>
<td align="center" class="size-10" width="15%">13 Oct 2019</td>
<td align="center" class="size-10" width="9%"> 23:02</td>
<td align="center" class="size-10" width="8%">34</td>
<td align="center" class="size-10" width="8%">411.3</td>
<td align="center" class="size-10" width="15%"></td>
</tr>
</tbody></table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
</table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
<tbody><tr bgcolor="#F0F0F0" class="result-row">
<td align="center" class="size-10" width="10%">SIN</td>
<td align="center" class="size-10" width="10%">SQ0621</td>
<td align="center" class="size-10" width="25%">Flight Arrived</td>
<td align="center" class="size-10" width="15%">13 Oct 2019</td>
<td align="center" class="size-10" width="9%"> 23:02</td>
<td align="center" class="size-10" width="8%">34</td>
<td align="center" class="size-10" width="8%">411.3</td>
<td align="center" class="size-10" width="15%"></td>
</tr>
</tbody></table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
</table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
<tbody><tr bgcolor="#FFFFFF" class="result-row">
<td align="center" class="size-10" width="10%">SIN</td>
<td align="center" class="size-10" width="10%">SQ0621</td>
<td align="center" class="size-10" width="25%">Shipment Checked Into Warehouse</td>
<td align="center" class="size-10" width="15%">14 Oct 2019</td>
<td align="center" class="size-10" width="9%"> 02:57</td>
<td align="center" class="size-10" width="8%">34</td>
<td align="center" class="size-10" width="8%">411.3</td>
<td align="center" class="size-10" width="15%"></td>
</tr>
</tbody></table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
</table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
<tbody><tr bgcolor="#F0F0F0" class="result-row">
<td align="center" class="size-10" width="10%">SIN</td>
<td align="center" class="size-10" width="10%">SQ0422</td>
<td align="center" class="size-10" width="25%">Flight Departed</td>
<td align="center" class="size-10" width="15%">14 Oct 2019</td>
<td align="center" class="size-10" width="9%"> 07:39</td>
<td align="center" class="size-10" width="8%">34</td>
<td align="center" class="size-10" width="8%">411.3</td>
<td align="center" class="size-10" width="15%"></td>
</tr>
</tbody></table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
</table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
<tbody><tr bgcolor="#FFFFFF" class="result-row">
<td align="center" class="size-10" width="10%">BOM</td>
<td align="center" class="size-10" width="10%">SQ0422</td>
<td align="center" class="size-10" width="25%">Flight Arrived</td>
<td align="center" class="size-10" width="15%">14 Oct 2019</td>
<td align="center" class="size-10" width="9%"> 10:12</td>
<td align="center" class="size-10" width="8%">34</td>
<td align="center" class="size-10" width="8%">411.3</td>
<td align="center" class="size-10" width="15%"></td>
</tr>
</tbody></table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
</table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
<tbody><tr bgcolor="#F0F0F0" class="result-row">
<td align="center" class="size-10" width="10%">BOM</td>
<td align="center" class="size-10" width="10%">SQ0422</td>
<td align="center" class="size-10" width="25%">Flight Arrived</td>
<td align="center" class="size-10" width="15%">14 Oct 2019</td>
<td align="center" class="size-10" width="9%"> 10:30</td>
<td align="center" class="size-10" width="8%">34</td>
<td align="center" class="size-10" width="8%">411.3</td>
<td align="center" class="size-10" width="15%"></td>
</tr>
</tbody></table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
</table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
<tbody><tr bgcolor="#FFFFFF" class="result-row">
<td align="center" class="size-10" width="10%">BOM</td>
<td align="center" class="size-10" width="10%">SQ0422</td>
<td align="center" class="size-10" width="25%">Shipment Checked Into Warehouse</td>
<td align="center" class="size-10" width="15%">14 Oct 2019</td>
<td align="center" class="size-10" width="9%"> 14:10</td>
<td align="center" class="size-10" width="8%">34</td>
<td align="center" class="size-10" width="8%">411.3</td>
<td align="center" class="size-10" width="15%"></td>
</tr>
</tbody></table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
</table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
<tbody><tr bgcolor="#F0F0F0" class="result-row">
<td align="center" class="size-10" width="10%">BOM</td>
<td align="center" class="size-10" width="10%">
<center>-</center>
</td>
<td align="center" class="size-10" width="25%">Shipment Ready for Pick-up</td>
<td align="center" class="size-10" width="15%">14 Oct 2019</td>
<td align="center" class="size-10" width="9%"> 14:21</td>
<td align="center" class="size-10" width="8%">34</td>
<td align="center" class="size-10" width="8%">411.3</td>
<td align="center" class="size-10" width="15%"></td>
</tr>
</tbody></table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
</table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
<tbody><tr bgcolor="#FFFFFF" class="result-row">
<td align="center" class="size-10" width="10%">BOM</td>
<td align="center" class="size-10" width="10%">
<center>-</center>
</td>
<td align="center" class="size-10" width="25%">Document Delivered</td>
<td align="center" class="size-10" width="15%">14 Oct 2019</td>
<td align="center" class="size-10" width="9%"> 17:15</td>
<td align="center" class="size-10" width="8%">34</td>
<td align="center" class="size-10" width="8%">411.3</td>
<td align="center" class="size-10" width="15%"></td>
</tr>
</tbody></table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
</table>, <table cellpadding="0" cellspacing="0" cols="8" width="100%">
<tbody><tr bgcolor="#F0F0F0" class="result-row">
<td align="center" class="size-10" width="10%">BOM</td>
<td align="center" class="size-10" width="10%">
<center>-</center>
</td>
<td align="center" class="size-10" width="25%">Shipment Delivered</td>
<td align="center" class="size-10" width="15%">14 Oct 2019</td>
<td align="center" class="size-10" width="9%"> 17:15</td>
<td align="center" class="size-10" width="8%">34</td>
<td align="center" class="size-10" width="8%">411.3</td>
<td align="center" class="size-10" width="15%"></td>
</tr>
</tbody></table>"""]
final_delivary = pd.DataFrame()
a = 0
for i in range(len(all_table)):
print("-"*150)
if a % 2 == 0:
print(a)
# print(all_table[a])
tmp_table = all_table[a]
tmp_df = pd.read_html(str(tmp_table))
print("tmp_df = \n", tmp_df)
print("type of tmp_df = ", type(tmp_df))
print("#"*75)
tmp_df2 = pd.DataFrame(tmp_df[0])
print("tmp_df2 = \n", tmp_df2)
print("type of tmp_df2 = ", type(tmp_df2))
print("@"*75)
print("final_delivary = \n", final_delivary)
print("type of final_delivary = ", type(final_delivary))
pd.concat([final_delivary, tmp_df2], axis=0)
else:
print("nope")
a+=1
print("final_delivary = ", final_delivary)
, поэтому я столкнулся с проблемой при объединении отдельного фрейма данных в основной фрейм, и результат, который я получаю, является пустым фреймом, поэтому, пожалуйста, помогите мне с этим