Импорт файла XML, связывающего количество родительских узлов с их атрибутами - PullRequest
0 голосов
/ 31 мая 2019

У меня есть следующий код для импорта XML-файла и преобразования в табличную форму в Excel 2016 qith Power Query.

let
    Source = Xml.Tables(File.Contents("D:\XML\file.xml")),
    Table0 = Source{0}[Table],
    #"Changed Type" = Table.TransformColumnTypes(Table0,{{"Attribute:index", Int64.Type}}),
    #"Expanded row" = Table.ExpandTableColumn(#"Changed Type", "row", {"column"}, {"row.column"}),
    #"Expanded row.column" = Table.ExpandTableColumn(#"Expanded row", "row.column", {"text"}, {"row.column.text"}),
    #"Expanded row.column.text" = Table.ExpandTableColumn(#"Expanded row.column", "row.column.text", {"Element:Text", "Attribute:fontName", "Attribute:fontSize", "Attribute:x", "Attribute:y", "Attribute:width", "Attribute:height"}, {"row.column.text.Element:Text", "row.column.text.Attribute:fontName", "row.column.text.Attribute:fontSize", "row.column.text.Attribute:x", "row.column.text.Attribute:y", "row.column.text.Attribute:width", "row.column.text.Attribute:height"})
in
    #"Expanded row.column.text"

Я получаю почти нужный вывод (индекс и остальные столбцы атрибутов), но я не знаю, как добавить столбец для страницы, строки и столбца, соответствующих каждому атрибуту.

Может кто-нибудь помочь мне с этим, пожалуйста.

Это вывод, который я получаю с моим кодом.

+-------+------------------------------------------+----------+----------+--------+--------+--------+--------+
| index | Text                                     | fontName | fontSize | x      | y      | width  | height |
+-------+------------------------------------------+----------+----------+--------+--------+--------+--------+
| 0     |                                          |          |          |        |        |        |        |
+-------+------------------------------------------+----------+----------+--------+--------+--------+--------+
| 1     | achene –(e.g., strawberry)               | Arial    | 12.0     | 121.10 | 83.42  | 71.04  | 12.00  |
+-------+------------------------------------------+----------+----------+--------+--------+--------+--------+
| 1     | capsule – (e.g., Brazil nut)             | Arial    | 12.0     | 121.10 | 124.82 | 101.07 | 12.00  |
+-------+------------------------------------------+----------+----------+--------+--------+--------+--------+
| 1     | caryopsis – (e.g., wheat)                | Arial    | 12.0     | 121.10 | 207.65 | 140.31 | 12.00  |
+-------+------------------------------------------+----------+----------+--------+--------+--------+--------+
| 2     | cypsela – (e.g., dandelion)              | Arial    | 12.0     | 85.10  | 69.62  | 24.36  | 12.00  |
+-------+------------------------------------------+----------+----------+--------+--------+--------+--------+
| 3     |                                          |          |          |        |        |        |        |
+-------+------------------------------------------+----------+----------+--------+--------+--------+--------+
| 4     | fibrous drupe – (e.g., coconut, walnut)  | Arial    | 12.0     | 276.29 | 239.45 | 95.42  | 12.00  |
+-------+------------------------------------------+----------+----------+--------+--------+--------+--------+
| 4     | follicle – (e.g., magnolia)              | Arial    | 12.0     | 121.10 | 266.81 | 229.57 | 12.00  |
+-------+------------------------------------------+----------+----------+--------+--------+--------+--------+
| 4     | legume – (e.g., bean, pea, peanut)       | Arial    | 12.0     | 353.94 | 266.81 | 155.71 | 12.00  |
+-------+------------------------------------------+----------+----------+--------+--------+--------+--------+
| 4     | loment – a type of indehiscent legume    | Arial    | 12.0     | 85.10  | 294.41 | 165.10 | 12.00  |
+-------+------------------------------------------+----------+----------+--------+--------+--------+--------+
| 4     | nut – (e.g., beech, hazelnut, oak acorn) | Arial    | 12.0     | 253.43 | 294.41 | 14.39  | 12.00  |
+-------+------------------------------------------+----------+----------+--------+--------+--------+--------+
| 4     | samara – (e.g., ash, elm, maple key)     | Arial    | 12.0     | 271.04 | 294.41 | 255.64 | 12.00  |
+-------+------------------------------------------+----------+----------+--------+--------+--------+--------+
| 4     | schizocarp – (e.g., carrot seed)         | Arial    | 12.0     | 85.10  | 501.43 | 432.97 | 12.00  |
+-------+------------------------------------------+----------+----------+--------+--------+--------+--------+
| 5     | silicle – (e.g., shepherd's purse)       | Arial    | 12.0     | 85.10  | 69.62  | 363.44 | 12.00  |
+-------+------------------------------------------+----------+----------+--------+--------+--------+--------+
| 5     | utricle – (e.g., strawberry)             | Arial    | 12.0     | 85.10  | 83.42  | 382.36 | 12.00  |
+-------+------------------------------------------+----------+----------+--------+--------+--------+--------+
| 6     |                                          |          |          |        |        |        |        |
+-------+------------------------------------------+----------+----------+--------+--------+--------+--------+

И это желаемый вывод:

+------+-------+-----+--------+----------------------------------------+----------+----------+--------+--------+--------+--------+-----------+
| page | index | row | column | text                                   | fontName | fontSize | x      | y      | width  | height | fontStyle |
+------+-------+-----+--------+----------------------------------------+----------+----------+--------+--------+--------+--------+-----------+
| 0    | 0     |     |        |                                        |          |          |        |        |        |        |           |
+------+-------+-----+--------+----------------------------------------+----------+----------+--------+--------+--------+--------+-----------+
| 1    | 1     | 0   | 0      | achene                                 | Arial    | 12       | 121.1  | 83.42  | 71.04  | 12     |           |
|      |       |     |        |   –(e.g., strawberry)                  |          |          |        |        |        |        |           |
+------+-------+-----+--------+----------------------------------------+----------+----------+--------+--------+--------+--------+-----------+
| 1    | 1     | 1   | 0      | capsule                                | Arial    | 12       | 121.1  | 124.82 | 101.07 | 12     |           |
|      |       |     |        |   – (e.g., Brazil nut)                 |          |          |        |        |        |        |           |
+------+-------+-----+--------+----------------------------------------+----------+----------+--------+--------+--------+--------+-----------+
| 1    | 1     | 2   | 0      | caryopsis                              | Arial    | 12       | 121.1  | 207.65 | 140.31 | 12     |           |
|      |       |     |        |   – (e.g., wheat)                      |          |          |        |        |        |        |           |
+------+-------+-----+--------+----------------------------------------+----------+----------+--------+--------+--------+--------+-----------+
| 2    | 2     | 0   | 0      | cypsela                                | Arial    | 12       | 85.1   | 69.62  | 24.36  | 12     |           |
|      |       |     |        |   – (e.g., dandelion)                  |          |          |        |        |        |        |           |
+------+-------+-----+--------+----------------------------------------+----------+----------+--------+--------+--------+--------+-----------+
| 3    | 3     |     |        |                                        |          |          |        |        |        |        |           |
+------+-------+-----+--------+----------------------------------------+----------+----------+--------+--------+--------+--------+-----------+
| 4    | 4     | 0   | 0      | fibrous                                | Arial    | 12       | 276.29 | 239.45 | 95.42  | 12     | Bold      |
|      |       |     |        |   drupe – (e.g., coconut, walnut)      |          |          |        |        |        |        |           |
+------+-------+-----+--------+----------------------------------------+----------+----------+--------+--------+--------+--------+-----------+
| 4    | 4     | 1   | 1      | follicle                               | Arial    | 12       | 121.1  | 266.81 | 229.57 | 12     |           |
|      |       |     |        |   – (e.g., magnolia)                   |          |          |        |        |        |        |           |
+------+-------+-----+--------+----------------------------------------+----------+----------+--------+--------+--------+--------+-----------+
| 4    | 4     | 1   | 1      | legume                                 | Arial    | 12       | 353.94 | 266.81 | 155.71 | 12     |           |
|      |       |     |        |   – (e.g., bean, pea, peanut)          |          |          |        |        |        |        |           |
+------+-------+-----+--------+----------------------------------------+----------+----------+--------+--------+--------+--------+-----------+
| 4    | 4     | 2   | 2      | loment                                 | Arial    | 12       | 85.1   | 294.41 | 165.1  | 12     |           |
|      |       |     |        |   – a type of indehiscent legume       |          |          |        |        |        |        |           |
+------+-------+-----+--------+----------------------------------------+----------+----------+--------+--------+--------+--------+-----------+
| 4    | 4     | 2   | 2      | nut                                    | Arial    | 12       | 253.43 | 294.41 | 14.39  | 12     |           |
|      |       |     |        |   – (e.g., beech, hazelnut, oak acorn) |          |          |        |        |        |        |           |
+------+-------+-----+--------+----------------------------------------+----------+----------+--------+--------+--------+--------+-----------+
| 4    | 4     | 2   | 2      | samara                                 | Arial    | 12       | 271.04 | 294.41 | 255.64 | 12     |           |
|      |       |     |        |   – (e.g., ash, elm, maple key)        |          |          |        |        |        |        |           |
+------+-------+-----+--------+----------------------------------------+----------+----------+--------+--------+--------+--------+-----------+
| 4    | 4     | 3   | 0      | schizocarp                             | Arial    | 12       | 85.1   | 501.43 | 432.97 | 12     |           |
|      |       |     |        |   – (e.g., carrot seed)                |          |          |        |        |        |        |           |
+------+-------+-----+--------+----------------------------------------+----------+----------+--------+--------+--------+--------+-----------+
| 5    | 5     | 0   | 0      | silicle                                | Arial    | 12       | 85.1   | 69.62  | 363.44 | 12     |           |
|      |       |     |        |   – (e.g., shepherd's purse)           |          |          |        |        |        |        |           |
+------+-------+-----+--------+----------------------------------------+----------+----------+--------+--------+--------+--------+-----------+
| 5    | 5     | 1   | 1      | utricle                                | Arial    | 12       | 85.1   | 83.42  | 382.36 | 12     |           |
|      |       |     |        |   – (e.g., strawberry)                 |          |          |        |        |        |        |           |
+------+-------+-----+--------+----------------------------------------+----------+----------+--------+--------+--------+--------+-----------+
| 6    | 6     |     |        |                                        |          |          |        |        |        |        |           |
+------+-------+-----+--------+----------------------------------------+----------+----------+--------+--------+--------+--------+-----------+

Это входной XML-файл:

<?xml version="1.0" encoding="UTF-8"?>
<document>
    <page index="0"/>
    <page index="1">
        <row><column><text fontName="Arial" fontSize="12.0" x="121.10" y="83.42" width="71.04" height="12.00">achene –(e.g., strawberry)</text></column></row>
        <row><column><text fontName="Arial" fontSize="12.0" x="121.10" y="124.82" width="101.07" height="12.00">capsule – (e.g., Brazil nut)</text></column></row>
        <row><column><text fontName="Arial" fontSize="12.0" x="121.10" y="207.65" width="140.31" height="12.00">caryopsis – (e.g., wheat)</text></column></row>
    </page>
    <page index="2">
        <row><column><text fontName="Arial" fontSize="12.0" x="85.10" y="69.62" width="24.36" height="12.00">cypsela – (e.g., dandelion)</text></column></row>
    </page>
    <page index="3"/>
    <page index="4">
        <row><column><text fontName="Arial" fontSize="12.0" fontStyle="Bold" x="276.29" y="239.45" width="95.42" height="12.00">fibrous drupe – (e.g., coconut, walnut)</text></column></row>
        <row><column><text fontName="Arial" fontSize="12.0" x="121.10" y="266.81" width="229.57" height="12.00">follicle – (e.g., magnolia)</text></column>
             <column><text fontName="Arial" fontSize="12.0" x="353.94" y="266.81" width="155.71" height="12.00">legume – (e.g., bean, pea, peanut)</text></column></row>
        <row><column><text fontName="Arial" fontSize="12.0" x="85.10" y="294.41" width="165.10" height="12.00">loment – a type of indehiscent legume</text></column>
             <column><text fontName="Arial" fontSize="12.0" x="253.43" y="294.41" width="14.39" height="12.00">nut – (e.g., beech, hazelnut, oak acorn)</text></column>
             <column><text fontName="Arial" fontSize="12.0" x="271.04" y="294.41" width="255.64" height="12.00">samara – (e.g., ash, elm, maple key)</text></column></row>
        <row><column><text fontName="Arial" fontSize="12.0" x="85.10" y="501.43" width="432.97" height="12.00">schizocarp – (e.g., carrot seed)</text></column></row>
    </page>
    <page index="5">
        <row><column><text fontName="Arial" fontSize="12.0" x="85.10" y="69.62" width="363.44" height="12.00">silicle – (e.g., shepherd's purse)</text></column></row>
        <row><column><text fontName="Arial" fontSize="12.0" x="85.10" y="83.42" width="382.36" height="12.00">utricle – (e.g., strawberry)</text></column></row>
    </page>
    <page index="6"/>
</document>

Спасибо за любую помощь

1 Ответ

1 голос
/ 31 мая 2019

Так как эти номера строк и столбцов не являются частью XML, вам придется вычислять их самостоятельно, пока вы углубляетесь в уровни.

let
    Source = Xml.Tables(File.Contents("<Path to XML file>")),
    #"Expanded Table" = Table.ExpandTableColumn(Source, "Table", {"Attribute:index", "row"}, {"index", "row"}),
    #"Changed Type" = Table.TransformColumnTypes(#"Expanded Table",{{"index", Int64.Type}}),
    #"Expanded row" = Table.ExpandTableColumn(#"Changed Type", "row", {"column"}, {"column"}),
    #"Added Index" = Table.AddIndexColumn(#"Expanded row", "row_index", 0, 1),
    #"Added Custom" = Table.AddColumn(#"Added Index", "row", (C) => if C[column] = null then null else Table.RowCount(Table.SelectRows(#"Added Index", each [index] = C[index] and [row_index] <= C[row_index]))-1, Int64.Type),
    #"Expanded column" = Table.ExpandTableColumn(#"Added Custom", "column", {"text"}, {"text"}),
    #"Expanded text" = Table.ExpandTableColumn(#"Expanded column", "text", {"Element:Text", "Attribute:fontName", "Attribute:fontSize", "Attribute:x", "Attribute:y", "Attribute:width", "Attribute:height", "Attribute:fontStyle"}, {"Text", "fontName", "fontSize", "x", "y", "width", "height", "fontStyle"}),
    #"Added Index1" = Table.AddIndexColumn(#"Expanded text", "col_index", 0, 1),
    #"Added Custom1" = Table.AddColumn(#"Added Index1", "column", (C) => if C[row] = null then null else Table.RowCount(Table.SelectRows(#"Added Index1", each [index] = C[index] and [row] = C[row] and [col_index] <= C[col_index]))-1, Int64.Type),
    #"Removed Other Columns" = Table.SelectColumns(#"Added Custom1",{"index", "Text", "fontName", "fontSize", "x", "y", "width", "height", "row", "column", "fontStyle"}),
    #"Reordered Columns" = Table.ReorderColumns(#"Removed Other Columns",{"index", "row", "column", "Text", "fontName", "fontSize", "fontStyle", "x", "y", "width", "height"}),
    #"Changed Type1" = Table.TransformColumnTypes(#"Reordered Columns",{{"fontName", type text}, {"fontSize", type number}, {"x", type number}, {"y", type number}, {"width", type number}, {"height", type number}, {"Text", type text}, {"fontStyle", type text}})
in
    #"Changed Type1"

Сложный момент - этот пользовательский столбеца другой аналогичный:

(C) => if C[column] = null then null else
    Table.RowCount(
        Table.SelectRows(#"Added Index",
            each [index] = C[index] and [row_index] <= C[row_index]
        )
    ) - 1
...