Приведенный ниже код преобразует XML в плоский DICT
import xml.etree.ElementTree as ET
import pandas as pd
xml = '''<r><RecordContainer RecordNumber = "1">
<catalog>
<book id="bk101">
<author>Gambardella, Matthew</author>
<title>XML Developer's Guide</title>
<genre>Computer</genre>
<price>44.95</price>
<publish_date>2000-10-01</publish_date>
<description>An in-depth look at creating applications
with XML.</description>
</book>
</catalog>
</RecordContainer>
<RecordContainer RecordNumber = "2">
<catalog>
<book id="bk102">
<author>Ralls, Kim</author>
<title>Midnight Rain</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2000-12-16</publish_date>
<description>A former architect battles corporate zombies,
an evil sorceress, and her own childhood to become queen
of the world.</description>
</book>
</catalog>
</RecordContainer></r>'''
root = ET.fromstring(xml)
records = []
containers = root.findall('.//RecordContainer')
for container in containers:
entry = container.attrib
book = container.find('.//catalog/book')
entry.update(book.attrib)
for child in list(book):
entry[child.tag] = child.text
records.append(entry)
for rec in records:
print(rec)
df = pd.DataFrame(records)
print(df)
output
{'RecordNumber': '1', 'id': 'bk101', 'author': 'Gambardella, Matthew', 'title': "XML Developer's Guide", 'genre': 'Computer', 'price': '44.95', 'publish_date': '2000-10-01', 'description': 'An in-depth look at creating applications \n with XML.'}
{'RecordNumber': '2', 'id': 'bk102', 'author': 'Ralls, Kim', 'title': 'Midnight Rain', 'genre': 'Fantasy', 'price': '5.95', 'publish_date': '2000-12-16', 'description': 'A former architect battles corporate zombies, \n an evil sorceress, and her own childhood to become queen \n of the world.'}
RecordNumber author ... publish_date title
0 1 Gambardella, Matthew ... 2000-10-01 XML Developer's Guide
1 2 Ralls, Kim ... 2000-12-16 Midnight Rain
[2 rows x 8 columns]