Как скопировать некоторые конкретные узлы из одного XML в другой, на определенном уровне в Python - PullRequest
0 голосов
/ 17 мая 2018

Я пытаюсь объединить один XML-файл с другим, но я копирую некоторые конкретные узлы из файла Source.xml в файл Destination.xml.

Вот источник (как ExampleSource.xml)) - Обратите внимание, что эти данные сгруппированы по «safetyreportid» (это будет наш ключ при объединении этих узлов в файл Destination):

<?xml version="1.0" encoding="UTF-8"?>
<drugs>
    <concmed safetyreportid="FR-10300423">
        <drug>
            <drugcharacterization>.</drugcharacterization>
            <medicinalproduct>Blinded study</medicinalproduct>
            <activesubstance>
                <activesubstancename> </activesubstancename>
            </activesubstance>
            <drugreactionrelatedness>
                <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                <drugreactionasses></drugreactionasses>
            </drugreactionrelatedness>
        </drug>
        <drug>
            <drugcharacterization>2</drugcharacterization>
            <medicinalproduct>METOTREXATE</medicinalproduct>
            <activesubstance>
                <activesubstancename>METHOTREXATE SODIUM</activesubstancename>
            </activesubstance>
            <drugreactionrelatedness>
                <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                <drugreactionasses></drugreactionasses>
            </drugreactionrelatedness>
        </drug>
    </concmed>
    <concmed safetyreportid="BG-1010011">
        <drug>
            <drugcharacterization>1</drugcharacterization>
            <medicinalproduct>Medical Product</medicinalproduct>
            <activesubstance>
                <activesubstancename>ActiveSub</activesubstancename>
            </activesubstance>
            <drugreactionrelatedness>
                <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                <drugreactionasses></drugreactionasses>
            </drugreactionrelatedness>
        </drug>
        <drug>
            <drugcharacterization>2</drugcharacterization>
            <medicinalproduct>Azerty</medicinalproduct>
            <activesubstance>
                <activesubstancename>POTATIUM</activesubstancename>
            </activesubstance>
            <drugreactionrelatedness>
                <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                <drugreactionasses></drugreactionasses>
            </drugreactionrelatedness>
        </drug>
        <drug>
            <drugcharacterization>3</drugcharacterization>
            <medicinalproduct>Querty</medicinalproduct>
            <activesubstance>
                <activesubstancename>Plutonium</activesubstancename>
            </activesubstance>
            <drugreactionrelatedness>
                <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                <drugreactionasses></drugreactionasses>
            </drugreactionrelatedness>
        </drug>
    </concmed>
</drugs>

Итак, вот файл Destination (как ExampleDestination.xml) - здесь снова, узлы упорядочены по safetyreportid:

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE ichicsr SYSTEM "http://eudravigilance.ema.europa.eu/dtd/icsr21xml.dtd">
<ichicsr lang="en">
    <ichicsrmessageheader>
        <messagetype>ichicsr</messagetype>
        <messageformatversion>2.1</messageformatversion>
    </ichicsrmessageheader>
    <safetyreport>
        <safetyreportversion/>
        <safetyreportid>BG-1010011</safetyreportid>
        <primarysource>
            <reportertitle/>
            <reportergivename>GivenName</reportergivename>
            <reportermiddlename></reportermiddlename>
            <reporterfamilyname>FAMILYNAME</reporterfamilyname>
        </primarysource>
        <sender>
            <sendertype></sendertype>
            <senderorganization></senderorganization>
        </sender>
        <receiver>
            <receivertype></receivertype>
            <receiverorganization></receiverorganization>
        </receiver>
        <patient>
            <patientinitial></patientinitial>
            <patientgpmedicalrecordnumb></patientgpmedicalrecordnumb>
            <medicalhistoryepisode>
            </medicalhistoryepisode>
            <reaction>
                <primarysourcereaction>CYSTITIS</primarysourcereaction>
                <reactionmeddraversionllt>20.1</reactionmeddraversionllt>
            </reaction>
            <concmed safetyreportid="BG-1010011">
            </concmed>
            <summary>
                <narrativeincludeclinical></narrativeincludeclinical>
                <reportercomment></reportercomment>
            </summary>
        </patient>
    </safetyreport>
    <safetyreport>
        <safetyreportversion/>
        <safetyreportid>FR-10300423</safetyreportid>
        <primarysource>
            <reportertitle/>
            <reportergivename>OtherGivenName</reportergivename>
            <reportermiddlename></reportermiddlename>
            <reporterfamilyname>OTHERFAMILYNAME</reporterfamilyname>
        </primarysource>
        <sender>
            <sendertype></sendertype>
            <senderorganization></senderorganization>
        </sender>
        <receiver>
            <receivertype></receivertype>
            <receiverorganization></receiverorganization>
        </receiver>
        <patient>
            <patientinitial></patientinitial>
            <patientgpmedicalrecordnumb></patientgpmedicalrecordnumb>
            <medicalhistoryepisode>
            </medicalhistoryepisode>
            <reaction>
                <primarysourcereaction>DIARRHEA</primarysourcereaction>
                <reactionmeddraversionllt>20.1</reactionmeddraversionllt>
            </reaction>
            <concmed safetyreportid="FR-10300423">
            </concmed>
            <summary>
                <narrativeincludeclinical></narrativeincludeclinical>
                <reportercomment></reportercomment>
            </summary>
        </patient>
    </safetyreport>
</ichicsr>

Я настраиваю этот скрипт, чтобы попытаться скопировать каждый узел (и подэлементы) в место назначения на соответствующем уровне Safetyreportid:

import xml.etree.ElementTree as ET
from lxml import etree

def find_child(node, with_ref):
    """Recursively find a node with given ref"""
    for element in list(node):
        if element.tag == with_ref:
            return element
        elif list(element):
            sub_result = find_child(element, with_ref)
            if sub_result is not None:
                return sub_result

    return None

def replace_node(from_tree, to_tree, node_ref):
    """Replace node with given node_ref in the to_tree with the same ref from the from_tree"""
    from_node = find_child(from_tree.getroot(), node_ref)
    to_node = find_child(to_tree.getroot(), node_ref)

    #Find where to substitute the from_node into the to_tree
    to_parent, to_index = get_node_parent_info(to_tree, to_node)

    #Replace to_node with from_node
    to_parent.remove(to_node)
    to_parent.insert(to_index, from_node)

def get_node_parent_info(tree, node):
    """Return tuple of (parent, index) where:
        parent = node of parent within tree
        index = index of node under parent"""

    parent_map = {c:p for p in tree.iter() for c in p}
    parent = parent_map[node]
    return parent, list(parent).index(node)


destinationFile = etree.parse("ExampleDestination.xml")

from_tree = ET.ElementTree(file='ExampleSource.xml')
to_tree = ET.ElementTree(file='ExampleDestination.xml')

safety_ref = destinationFile.xpath("//safetyreport")

for safetyreportid in safety_ref:
    xpath_safetyreportid = safetyreportid.xpath("./safetyreportid")
    local_safetyreportid = xpath_safetyreportid[0].text
    print(local_safetyreportid)
    replace_node(from_tree, to_tree, 'concmed')

to_tree.write('ouput.xml')

Так что в файле ouput.xml я не могу получить сценарий для копирования узлов лекарств на уровне безопасности.Вот результат:

<ichicsr lang="en">
    <ichicsrmessageheader>
        <messagetype>ichicsr</messagetype>
        <messageformatversion>2.1</messageformatversion>
    </ichicsrmessageheader>
    <safetyreport>
        <safetyreportversion />
        <safetyreportid>BG-1010011</safetyreportid>
        <primarysource>
            <reportertitle />
            <reportergivename>GivenName</reportergivename>
            <reportermiddlename />
            <reporterfamilyname>FAMILYNAME</reporterfamilyname>
        </primarysource>
        <sender>
            <sendertype />
            <senderorganization />
        </sender>
        <receiver>
            <receivertype />
            <receiverorganization />
        </receiver>
        <patient>
            <patientinitial />
            <patientgpmedicalrecordnumb />
            <medicalhistoryepisode>
            </medicalhistoryepisode>
            <reaction>
                <primarysourcereaction>CYSTITIS</primarysourcereaction>
                <reactionmeddraversionllt>20.1</reactionmeddraversionllt>
            </reaction>
            <concmed safetyreportid="FR-10300423">
                <drug>
                    <drugcharacterization>.</drugcharacterization>
                    <medicinalproduct>Blinded study</medicinalproduct>
                    <activesubstance>
                        <activesubstancename> </activesubstancename>
                    </activesubstance>
                    <drugreactionrelatedness>
                        <drugreactionassesmeddraversion />
                        <drugreactionasses />
                    </drugreactionrelatedness>
                </drug>
                <drug>
                    <drugcharacterization>2</drugcharacterization>
                    <medicinalproduct>METOTREXATE</medicinalproduct>
                    <activesubstance>
                        <activesubstancename>METHOTREXATE SODIUM</activesubstancename>
                    </activesubstance>
                    <drugreactionrelatedness>
                        <drugreactionassesmeddraversion />
                        <drugreactionasses />
                    </drugreactionrelatedness>
                </drug>
            </concmed>
            <summary>
                <narrativeincludeclinical />
                <reportercomment />
            </summary>
        </patient>
    </safetyreport>
    <safetyreport>
        <safetyreportversion />
        <safetyreportid>FR-10300423</safetyreportid>
        <primarysource>
            <reportertitle />
            <reportergivename>OtherGivenName</reportergivename>
            <reportermiddlename />
            <reporterfamilyname>OTHERFAMILYNAME</reporterfamilyname>
        </primarysource>
        <sender>
            <sendertype />
            <senderorganization />
        </sender>
        <receiver>
            <receivertype />
            <receiverorganization />
        </receiver>
        <patient>
            <patientinitial />
            <patientgpmedicalrecordnumb />
            <medicalhistoryepisode>
            </medicalhistoryepisode>
            <reaction>
                <primarysourcereaction>DIARRHEA</primarysourcereaction>
                <reactionmeddraversionllt>20.1</reactionmeddraversionllt>
            </reaction>
            <concmed safetyreportid="FR-10300423">
            </concmed>
            <summary>
                <narrativeincludeclinical />
                <reportercomment />
            </summary>
        </patient>
    </safetyreport>
</ichicsr>

И вот мой ожидаемый результат:

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE ichicsr SYSTEM "http://eudravigilance.ema.europa.eu/dtd/icsr21xml.dtd">
<ichicsr lang="en">
    <ichicsrmessageheader>
        <messagetype>ichicsr</messagetype>
        <messageformatversion>2.1</messageformatversion>
    </ichicsrmessageheader>
    <safetyreport>
        <safetyreportversion/>
        <safetyreportid>BG-1010011</safetyreportid>
        <primarysource>
            <reportertitle/>
            <reportergivename>GivenName</reportergivename>
            <reportermiddlename></reportermiddlename>
            <reporterfamilyname>FAMILYNAME</reporterfamilyname>
        </primarysource>
        <sender>
            <sendertype></sendertype>
            <senderorganization></senderorganization>
        </sender>
        <receiver>
            <receivertype></receivertype>
            <receiverorganization></receiverorganization>
        </receiver>
        <patient>
            <patientinitial></patientinitial>
            <patientgpmedicalrecordnumb></patientgpmedicalrecordnumb>
            <medicalhistoryepisode>
            </medicalhistoryepisode>
            <reaction>
                <primarysourcereaction>CYSTITIS</primarysourcereaction>
                <reactionmeddraversionllt>20.1</reactionmeddraversionllt>
            </reaction>
            <drug>
                <drugcharacterization>1</drugcharacterization>
                <medicinalproduct>Medical Product</medicinalproduct>
                <activesubstance>
                    <activesubstancename>ActiveSub</activesubstancename>
                </activesubstance>
                <drugreactionrelatedness>
                    <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                    <drugreactionasses></drugreactionasses>
                </drugreactionrelatedness>
            </drug>
            <drug>
                <drugcharacterization>2</drugcharacterization>
                <medicinalproduct>Azerty</medicinalproduct>
                <activesubstance>
                    <activesubstancename>POTATIUM</activesubstancename>
                </activesubstance>
                <drugreactionrelatedness>
                    <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                    <drugreactionasses></drugreactionasses>
                </drugreactionrelatedness>
            </drug>
            <drug>
                <drugcharacterization>3</drugcharacterization>
                <medicinalproduct>Querty</medicinalproduct>
                <activesubstance>
                    <activesubstancename>Plutonium</activesubstancename>
                </activesubstance>
                <drugreactionrelatedness>
                    <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                    <drugreactionasses></drugreactionasses>
                </drugreactionrelatedness>
            </drug>
            <summary>
                <narrativeincludeclinical></narrativeincludeclinical>
                <reportercomment></reportercomment>
            </summary>
        </patient>
    </safetyreport>
    <safetyreport>
        <safetyreportversion/>
        <safetyreportid>FR-10300423</safetyreportid>
        <primarysource>
            <reportertitle/>
            <reportergivename>OtherGivenName</reportergivename>
            <reportermiddlename></reportermiddlename>
            <reporterfamilyname>OTHERFAMILYNAME</reporterfamilyname>
        </primarysource>
        <sender>
            <sendertype></sendertype>
            <senderorganization></senderorganization>
        </sender>
        <receiver>
            <receivertype></receivertype>
            <receiverorganization></receiverorganization>
        </receiver>
        <patient>
            <patientinitial></patientinitial>
            <patientgpmedicalrecordnumb></patientgpmedicalrecordnumb>
            <medicalhistoryepisode>
            </medicalhistoryepisode>
            <reaction>
                <primarysourcereaction>DIARRHEA</primarysourcereaction>
                <reactionmeddraversionllt>20.1</reactionmeddraversionllt>
            </reaction>
            <drug>
                <drugcharacterization>.</drugcharacterization>
                <medicinalproduct>Blinded study</medicinalproduct>
                <activesubstance>
                    <activesubstancename> </activesubstancename>
                </activesubstance>
                <drugreactionrelatedness>
                    <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                    <drugreactionasses></drugreactionasses>
                </drugreactionrelatedness>
            </drug>
            <drug>
                <drugcharacterization>2</drugcharacterization>
                <medicinalproduct>METOTREXATE</medicinalproduct>
                <activesubstance>
                    <activesubstancename>METHOTREXATE SODIUM</activesubstancename>
                </activesubstance>
                <drugreactionrelatedness>
                    <drugreactionassesmeddraversion></drugreactionassesmeddraversion>
                    <drugreactionasses></drugreactionasses>
                </drugreactionrelatedness>
            </drug>
            <summary>
                <narrativeincludeclinical></narrativeincludeclinical>
                <reportercomment></reportercomment>
            </summary>
        </patient>
    </safetyreport>
</ichicsr>

Я хотел бы помочь с решением этого процесса слияния ... Заранее спасибо за чтение и помощь,Лучший из Франции

1 Ответ

0 голосов
/ 18 мая 2018

Итак, вот решение, которое сейчас работает здесь - Это может помочь кому-то еще ...

import xml.etree.ElementTree as ET
from lxml import etree

def find_child(node, with_ref, with_safetyreportid):
    """Recursively find a node with given ref"""
    for element in list(node):
        if element.tag == with_ref and element.attrib['safetyreportid'] == with_safetyreportid:
            return element
        elif list(element):
            sub_result = find_child(element, with_ref, with_safetyreportid)
            if sub_result is not None:
                return sub_result

    return None

def replace_node(from_tree, to_tree, node_ref, with_safetyreportid):
    """Replace node with given node_ref in the to_tree with the same ref from the from_tree"""
    from_node = find_child(from_tree.getroot(), node_ref, with_safetyreportid)
    to_node = find_child(to_tree.getroot(), node_ref, with_safetyreportid)

    #Find where to substitute the from_node into the to_tree
    to_parent, to_index = get_node_parent_info(to_tree, to_node)

    #Replace to_node with from_node
    to_parent.remove(to_node)
    to_parent.insert(to_index, from_node)

def get_node_parent_info(tree, node):
    """Return tuple of (parent, index) where:
        parent = node of parent within tree
        index = index of node under parent"""

    parent_map = {c:p for p in tree.iter() for c in p}
    parent = parent_map[node]
    return parent, list(parent).index(node)


destinationFile = etree.parse("ExampleDestination.xml")
sourceFile = etree.parse("ExampleSource.xml")

from_tree = ET.ElementTree(file='ExampleSource.xml')
to_tree = ET.ElementTree(file='ExampleDestination.xml')

safety_dest_ref = destinationFile.xpath("//concmed")
safety_sour_ref = sourceFile.xpath("//concmed")

for each_source_concmed in safety_sour_ref:
    xpath_source_concmed = each_source_concmed.xpath(".")
    pat_ref_source = xpath_source_concmed[0].attrib['safetyreportid']
    print(pat_ref_source)
    replace_node(from_tree, to_tree, 'concmed', pat_ref_source)

to_tree.write('ouput.xml')
...