Поскольку всегда будет хотя бы один элемент TaxTotal
, я бы создал новую строку csv для каждого и вернулся бы к дереву для предыдущих значений.
Вот пример использования lxml. Я добавил функцию, чтобы упростить обработку пустых значений, но любое дополнительное форматирование значений я оставлю на ваше усмотрение.
Python 3,6
from lxml import etree
import csv
def get_value(target_tree, xpath, namespaces):
try:
return target_tree.xpath(xpath, namespaces=namespaces)[0].text
except IndexError:
return ""
tree = etree.parse("input.xml")
ns = {"cac": "urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2",
"cbc": "urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2",
"i2": "urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"}
with open("output.csv", "w") as csvfile:
csvwriter = csv.writer(csvfile, delimiter=";", lineterminator="\n", quoting=csv.QUOTE_MINIMAL)
# Header
csvwriter.writerow(["ID", "/InvoiceLine/ID", "/InvoiceLine/InvoicedQuantity", "/InvoiceLine/LineExtensionAmount",
"/InvoiceLine/TaxTotal/TaxAmount", "/InvoiceLine/TaxTotal/TaxSubtotal/TaxableAmount",
"/InvoiceLine/TaxTotal/TaxSubtotal/TaxAmount",
"/InvoiceLine/TaxTotal/TaxSubtotal/TaxCategory/ID",
"/InvoiceLine/TaxTotal/TaxSubtotal/TaxCategory/Percent",
"/InvoiceLine/TaxTotal/TaxSubtotal/TaxCategory/TaxScheme/ID",
"/InvoiceLine/TaxTotal/TaxSubtotal/TaxCategory/TaxScheme/Name"])
for tax_total in tree.xpath("//cac:TaxTotal", namespaces=ns):
csvwriter.writerow([get_value(tax_total, "/i2:Invoice/cbc:ID", ns),
get_value(tax_total, "../cbc:ID", ns),
get_value(tax_total, "../cbc:InvoicedQuantity", ns),
get_value(tax_total, "../cbc:LineExtensionAmount", ns),
get_value(tax_total, "cbc:TaxAmount", ns),
get_value(tax_total, "cac:TaxSubtotal/cbc:TaxableAmount", ns),
get_value(tax_total, "cac:TaxSubtotal/cbc:TaxAmount", ns),
get_value(tax_total, "cac:TaxSubtotal/cac:TaxCategory/cbc:ID", ns),
get_value(tax_total, "cac:TaxSubtotal/cac:TaxCategory/cbc:Percent", ns),
get_value(tax_total, "cac:TaxSubtotal/cac:TaxCategory/cac:TaxScheme/cbc:ID", ns),
get_value(tax_total, "cac:TaxSubtotal/cac:TaxCategory/cac:TaxScheme/cbc:Name", ns)])
Выход (output.csv)
ID;/InvoiceLine/ID;/InvoiceLine/InvoicedQuantity;/InvoiceLine/LineExtensionAmount;/InvoiceLine/TaxTotal/TaxAmount;/InvoiceLine/TaxTotal/TaxSubtotal/TaxableAmount;/InvoiceLine/TaxTotal/TaxSubtotal/TaxAmount;/InvoiceLine/TaxTotal/TaxSubtotal/TaxCategory/ID;/InvoiceLine/TaxTotal/TaxSubtotal/TaxCategory/Percent;/InvoiceLine/TaxTotal/TaxSubtotal/TaxCategory/TaxScheme/ID;/InvoiceLine/TaxTotal/TaxSubtotal/TaxCategory/TaxScheme/Name
102165444;1.0000;1.0000;142.3900;138.24;142.39;7.20;3645;;140;Afgift
102165444;2.0000;1.0000;142.3900;138.24;142.39;7.20;3645;;140;Afgift
102165444;2.0000;1.0000;142.3900;35.60;142.39;35.60;StandardRated;25;63;Moms