Другой метод.
from simplified_scrapy import SimplifiedDoc, utils, req
html = '''<?xml version="1.0" encoding="UTF-8"?>
<filebooks xmlns=\"http://www.something.com/xml/xxx/filebook/2006-10-31"\>
<filebook>
<header filebook-id="Some Title">
<currency>GBP</currency>
<display-name xml:lang="x-default">Some Name</display-name>
</header>
<file-tables>
<file_table product-id="111">
<amount quantity="1">21.5000</amount>
<file-info>xxx 01/06/2020:Test</file-info>
<date-from>2020-06-01</date-from>
<date-to>2020-06-02</date-to>
</file_table>
<file_table product-id="222">
<amount quantity="1">18.3000</amount>
<file-info>xxx 01/07/2020: Txt</file-info>
<date-from>2020-07-02</date-from>
<date-to>2020-07-02</date-to>
</file_table>
</file-tables>
</filebook>
</filebooks>'''
doc = SimplifiedDoc(html)
rows = []
header = ['product_id', 'currency', 'amount', 'quantity', 'file_info', 'date_from','date_to']
rows.append(header)
filebooks = doc.selects('filebook')
for filebook in filebooks:
currency = filebook.currency.text
file_tables = filebook.selects('file_table')
for file_table in file_tables:
amount = file_table.amount
row = [
file_table['product-id'], currency, amount.text,
amount['quantity'],
file_table.select('file-info>text()'),
file_table.select('date-from>text()'),
file_table.select('date-to>text()')
]
rows.append(row)
utils.save2csv('data.csv', rows)
Результат:
product_id,currency,amount,quantity,file_info,date_from,date_to
111,GBP,21.5000,1,xxx 01/06/2020:Test,2020-06-01,2020-06-02
222,GBP,18.3000,1,xxx 01/07/2020: Txt,2020-07-02,2020-07-02
Вот еще примеры: https://github.com/yiyedata/simplified-scrapy-demo/tree/master/doc_examples