XSLT для извлечения значений из HTML XML - PullRequest
0 голосов
/ 18 сентября 2018

у меня есть один xml

И мне нужно извлечь значения из производимой таблицы. в частности, значения строк столбцов 2 и 3 для каждой строки.

HTML выглядит как

table

И xml выглядит так:

<DIV><DIV><TABLE border-collapse="collapse" cellspacing="0" style="width: 650px;"><TR></TR><TR><TD colspan="1"><TABLE border-collapse="collapse" cellspacing="0" style="width: 650px;"><COLGROUP><COL width="160px"><COL width="122px"><COL width="122px"><COL width="122px"><COL width="122px"></COLGROUP><TR></TR><TR><TD align="LEFT" colspan="5" style="border: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Nutrition</SPAN></TD></TR><TR><TD colspan="1" style="width: 160px;border-left: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Typical Values</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">Per 100g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">One tart (125g)</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">%RI*</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-right: 1px solid black;"><SPAN style="font-size: inherit;">RI*</SPAN></TD></TR></TABLE></TD></TR><TR><TD colspan="1"><TABLE border-collapse="collapse" cellspacing="0" style="width: 650px;"><COLGROUP><COL width="160px"><COL width="122px"><COL width="122px"><COL width="122px"><COL width="122px"></COLGROUP><TR></TR><TR><TD colspan="1" style="width: 160px;border-left: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Energy</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">1373kJ / 329kcal</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">1717kJ / 411kcal</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">20%</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-right: 1px solid black;"><SPAN style="font-size: inherit;">8400kJ / 2000kcal</SPAN></TD></TR><TR><TD colspan="1" style="width: 160px;border-left: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Fat</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">20.0g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">25.0g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">36%</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-right: 1px solid black;"><SPAN style="font-size: inherit;">70g</SPAN></TD></TR><TR><TD colspan="1" style="width: 160px;border-left: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Saturates</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">11.2g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">14.0g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">70%</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-right: 1px solid black;"><SPAN style="font-size: inherit;">20g</SPAN></TD></TR><TR><TD colspan="1" style="width: 160px;border-left: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Carbohydrate</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">32.9g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">41.1g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">16%</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-right: 1px solid black;"><SPAN style="font-size: inherit;">260g</SPAN></TD></TR><TR><TD colspan="1" style="width: 160px;border-left: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Sugars</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">16.2g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">20.2g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">22%</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-right: 1px solid black;"><SPAN style="font-size: inherit;">90g</SPAN></TD></TR><TR><TD colspan="1" style="width: 160px;border-left: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Fibre</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">1.3g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">1.6g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">&nbsp;</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-right: 1px solid black;"><SPAN style="font-size: inherit;">&nbsp;</SPAN></TD></TR><TR><TD colspan="1" style="width: 160px;border-left: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Protein</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">3.9g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">4.9g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">10%</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-right: 1px solid black;"><SPAN style="font-size: inherit;">50g</SPAN></TD></TR><TR><TD colspan="1" style="width: 160px;border-left: 1px solid black;border-bottom: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Salt</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-bottom: 1px solid black;"><SPAN style="font-size: inherit;">0.1g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-bottom: 1px solid black;"><SPAN style="font-size: inherit;">0.1g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-bottom: 1px solid black;"><SPAN style="font-size: inherit;">2%</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-right: 1px solid black;border-bottom: 1px solid black;"><SPAN style="font-size: inherit;">6g</SPAN></TD></TR></TABLE></TD></TR><TR><TD colspan="1"><TABLE border-collapse="collapse" cellspacing="0" style="width: 650px;"><COLGROUP><COL width="160px"><COL width="122px"><COL width="122px"><COL width="122px"><COL width="122px"></COLGROUP><TR></TR><TR><TD colspan="5" style="border-left: 1px solid black;border-right: 1px solid black;"><SPAN style="font-size: inherit;">Contains 2 servings</SPAN></TD></TR></TABLE></TD></TR><TR><TD colspan="1"><TABLE border-collapse="collapse" cellspacing="0" style="width: 650px;"><COLGROUP><COL width="160px"></COLGROUP><TR></TR><TR><TD colspan="1"><TABLE border-collapse="collapse" cellspacing="0" style="width: 650px;"><TR></TR><TR><TD colspan="1" style="border-left: 1px solid black;border-right: 1px solid black;border-bottom: 1px solid black;padding-left: 3px;"><P><SPAN>* Reference intake of an average adult (8400 kJ / 2000 kcal)</SPAN></P></TD></TR></TABLE></TD></TR></TABLE></TD></TR></TABLE></DIV></DIV>

Что я пробовал: Мне нужно, чтобы значения хранились в переменных в xslt.

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <for-each select="//SPAN">
        <value-of select="." />
    </for-each>
</xsl:stylesheet>

Как я получу значения, в частности, я хотел бы знать:

EnergyCol2

EnergyCol3

значения. И, хотел бы их в переменных. Как мне узнать, что конкретное значение - это столбец 2 (или 3) и имеет тип (энергия или жир и т. Д.)

1 Ответ

0 голосов
/ 18 сентября 2018

Хотя это не отвечает на вопрос, так как я использовал regex для разбора html xml, но он все еще делает мою работу. Итак, я вызываю функцию Java из XSLT.

И код Java:

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class NutrientValues {

private static final String regex = "Energy.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>.*?Fat.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>.*?Saturates.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>.*?Carbohydrate.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>.*?Sugars.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>.*?Fibre.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>.*?Protein.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPA    N>.*?Salt.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>";
    private static final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
    private static Matcher matcher;
    public static boolean process(String htmldoc) {
        matcher = pattern.matcher(htmldoc);
        return matcher.find();
    }
    public static String getEnergyPer100() {
        return matcher.group(1);
    }
    public static String getEnergyPerServ() {
        return matcher.group(2);
    }
    public static String getFatPer100() {
        return matcher.group(3);
    }
    public static String getFatPerServ() {
        return matcher.group(4);
    }
    public static String getSaturatesPer100() {
        return matcher.group(5);
    }
    public static String getSaturatesPerServ() {
        return matcher.group(6);
    }
    public static String getCarbohydratePer100() {
        return matcher.group(7);
    }
    public static String getCarbohydratePerServ() {
        return matcher.group(8);
    }
    public static String getSugarsPer100() {
        return matcher.group(9);
    }
    public static String getSugarsPerServ() {
        return matcher.group(10);
    }
    public static String getFibrePer100() {
        return matcher.group(11);
    }
    public static String getFibrePerServ() {
        return matcher.group(12);
    }
    public static String getProteinPer100() {
        return matcher.group(13);
    }
    public static String getProteinPerServ() {
        return matcher.group(14);
    }
    public static String getSaltPer100() {
        return matcher.group(15);
    }
    public static String getSaltPerServ() {
        return matcher.group(16);
    }
}

Результат:

Group 1: 1373kJ / 329kcal
Group 2: 1717kJ / 411kcal
Group 3: 20.0g
Group 4: 25.0g
Group 5: 11.2g
Group 6: 14.0g
Group 7: 32.9g
Group 8: 41.1g
Group 9: 16.2g
Group 10: 20.2g
Group 11: 1.3g
Group 12: 1.6g
Group 13: 3.9g
Group 14: 4.9g
Group 15: 0.1g
Group 16: 0.1g
...