Я пытаюсь проанализировать входной XML здесь, чтобы пометить узел SENTENCE[SECONDARY]/TOKEN
значением FURTHER_FROM_PRIMARY="YES"
, когда его значение BEGIN
превышает значение BEGIN
другого SENTENCE[SECONDARY]/TOKEN
в том же PARAGRAPH
узел.
Здесь под входным XML:
<DOCUMENT>
<SECTION>
<PARAGRAPH TRACK="0">
<SENTENCE NAME="PRIMARY" COUNT="1">
<TOKEN BEGIN="14" END="25" SENTENCE_BEGIN="0" SENTENCE_END="48"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEFORE_PRIMARY="YES" BEGIN="0" END="9" SENTENCE_BEGIN="0" SENTENCE_END="48" SAME_SENTENCE="YES"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="43" END="47" SENTENCE_BEGIN="0" SENTENCE_END="48" SAME_SENTENCE="YES"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="27" END="34" SENTENCE_BEGIN="0" SENTENCE_END="48" SAME_SENTENCE="YES"/>
</SENTENCE>
</PARAGRAPH>
<PARAGRAPH TRACK="1">
<SENTENCE NAME="PRIMARY" COUNT="2">
<TOKEN BEGIN="37" END="41" SENTENCE_BEGIN="0" SENTENCE_END="48"/>
<TOKEN BEGIN="37" END="41" SENTENCE_BEGIN="0" SENTENCE_END="48"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="43" END="47" SENTENCE_BEGIN="0" SENTENCE_END="48"/>
</SENTENCE>
</PARAGRAPH>
<PARAGRAPH TRACK="2">
<SENTENCE NAME="PRIMARY" COUNT="2">
<TOKEN BEGIN="164" END="170" SENTENCE_BEGIN="135" SENTENCE_END="187"/>
<TOKEN BEGIN="164" END="170" SENTENCE_BEGIN="135" SENTENCE_END="187"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="172" END="175" SENTENCE_BEGIN="135" SENTENCE_END="187"/>
</SENTENCE>
</PARAGRAPH>
<PARAGRAPH TRACK="3">
<SENTENCE NAME="PRIMARY" COUNT="4">
<TOKEN BEGIN="93" END="97" SENTENCE_BEGIN="50" SENTENCE_END="133"/>
<TOKEN BEGIN="93" END="97" SENTENCE_BEGIN="50" SENTENCE_END="133"/>
<TOKEN BEGIN="135" END="139" SENTENCE_BEGIN="135" SENTENCE_END="187"/>
<TOKEN BEGIN="135" END="139" SENTENCE_BEGIN="135" SENTENCE_END="187"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="141" END="147" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEFORE_PRIMARY="YES" BEGIN="79" END="88" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="3">
<TOKEN BEGIN="110" END="113" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES"/>
<TOKEN BEGIN="129" END="132" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES"/>
<TOKEN BEGIN="172" END="175" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="99" END="104" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES"/>
<TOKEN BEGIN="153" END="158" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES"/>
</SENTENCE>
</PARAGRAPH>
<PARAGRAPH TRACK="4">
<SENTENCE NAME="PRIMARY" COUNT="1">
<TOKEN BEGIN="119" END="127" SENTENCE_BEGIN="50" SENTENCE_END="133"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="129" END="132" SENTENCE_BEGIN="50" SENTENCE_END="133"/>
</SENTENCE>
</PARAGRAPH>
<PARAGRAPH TRACK="5">
<SENTENCE NAME="PRIMARY" COUNT="1">
<TOKEN BEGIN="50" END="58" SENTENCE_BEGIN="50" SENTENCE_END="133"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="60" END="66" SENTENCE_BEGIN="50" SENTENCE_END="133"/>
</SENTENCE>
</PARAGRAPH>
</SECTION>
</DOCUMENT>
Это код XSLT:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="TOKEN[@SAME_SENTENCE][not (@BEFORE_PRIMARY)]">
<xsl:copy>
<xsl:if test="((current()/@SENTENCE_BEGIN = ../../SENTENCE[@NAME='PRIMARY']/TOKEN/@SENTENCE_BEGIN) and (current()/@SENTENCE_END = ../../SENTENCE[@NAME='PRIMARY']/TOKEN/@SENTENCE_END)) and (current()/@BEGIN > ../../SENTENCE[@NAME='SECONDARY']/TOKEN [@SAME_SENTENCE] [not (@BEFORE_PRIMARY)]/@BEGIN)">
<xsl:attribute name="FURTHER_FROM_PRIMARY">YES</xsl:attribute>
</xsl:if>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
Это выходной XML:
<?xml version="1.0" encoding="utf-16"?>
<DOCUMENT>
<SECTION>
<PARAGRAPH TRACK="0">
<SENTENCE NAME="PRIMARY" COUNT="1">
<TOKEN BEGIN="14" END="25" SENTENCE_BEGIN="0" SENTENCE_END="48" />
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEFORE_PRIMARY="YES" BEGIN="0" END="9" SENTENCE_BEGIN="0" SENTENCE_END="48" SAME_SENTENCE="YES" />
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="43" END="47" SENTENCE_BEGIN="0" SENTENCE_END="48" SAME_SENTENCE="YES" />
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="27" END="34" SENTENCE_BEGIN="0" SENTENCE_END="48" SAME_SENTENCE="YES" />
</SENTENCE>
</PARAGRAPH>
<PARAGRAPH TRACK="1">
<SENTENCE NAME="PRIMARY" COUNT="2">
<TOKEN BEGIN="37" END="41" SENTENCE_BEGIN="0" SENTENCE_END="48" />
<TOKEN BEGIN="37" END="41" SENTENCE_BEGIN="0" SENTENCE_END="48" />
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="43" END="47" SENTENCE_BEGIN="0" SENTENCE_END="48" />
</SENTENCE>
</PARAGRAPH>
<PARAGRAPH TRACK="2">
<SENTENCE NAME="PRIMARY" COUNT="2">
<TOKEN BEGIN="164" END="170" SENTENCE_BEGIN="135" SENTENCE_END="187" />
<TOKEN BEGIN="164" END="170" SENTENCE_BEGIN="135" SENTENCE_END="187" />
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="172" END="175" SENTENCE_BEGIN="135" SENTENCE_END="187" />
</SENTENCE>
</PARAGRAPH>
<PARAGRAPH TRACK="3">
<SENTENCE NAME="PRIMARY" COUNT="4">
<TOKEN BEGIN="93" END="97" SENTENCE_BEGIN="50" SENTENCE_END="133" />
<TOKEN BEGIN="93" END="97" SENTENCE_BEGIN="50" SENTENCE_END="133" />
<TOKEN BEGIN="135" END="139" SENTENCE_BEGIN="135" SENTENCE_END="187" />
<TOKEN BEGIN="135" END="139" SENTENCE_BEGIN="135" SENTENCE_END="187" />
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="141" END="147" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES" />
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEFORE_PRIMARY="YES" BEGIN="79" END="88" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES" />
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="3">
<TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="110" END="113" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES" />
<TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="129" END="132" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES" />
<TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="172" END="175" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES" />
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="99" END="104" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES" />
<TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="153" END="158" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES" />
</SENTENCE>
</PARAGRAPH>
<PARAGRAPH TRACK="4">
<SENTENCE NAME="PRIMARY" COUNT="1">
<TOKEN BEGIN="119" END="127" SENTENCE_BEGIN="50" SENTENCE_END="133" />
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="129" END="132" SENTENCE_BEGIN="50" SENTENCE_END="133" />
</SENTENCE>
</PARAGRAPH>
<PARAGRAPH TRACK="5">
<SENTENCE NAME="PRIMARY" COUNT="1">
<TOKEN BEGIN="50" END="58" SENTENCE_BEGIN="50" SENTENCE_END="133" />
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="60" END="66" SENTENCE_BEGIN="50" SENTENCE_END="133" />
</SENTENCE>
</PARAGRAPH>
</SECTION>
</DOCUMENT>
К сожалению, в приведенном выше примере XSLT-код неправильно выбирает один из TOKENS
:
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="141" END="147" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES"/>
</SENTENCE>
Здесь под полным желаемым выводом:
<DOCUMENT>
<SECTION>
<PARAGRAPH TRACK="0">
<SENTENCE NAME="PRIMARY" COUNT="1">
<TOKEN BEGIN="14" END="25" SENTENCE_BEGIN="0" SENTENCE_END="48"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEFORE_PRIMARY="YES" BEGIN="0" END="9" SENTENCE_BEGIN="0" SENTENCE_END="48" SAME_SENTENCE="YES"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="43" END="47" SENTENCE_BEGIN="0" SENTENCE_END="48" SAME_SENTENCE="YES"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="27" END="34" SENTENCE_BEGIN="0" SENTENCE_END="48" SAME_SENTENCE="YES"/>
</SENTENCE>
</PARAGRAPH>
<PARAGRAPH TRACK="1">
<SENTENCE NAME="PRIMARY" COUNT="2">
<TOKEN BEGIN="37" END="41" SENTENCE_BEGIN="0" SENTENCE_END="48"/>
<TOKEN BEGIN="37" END="41" SENTENCE_BEGIN="0" SENTENCE_END="48"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="43" END="47" SENTENCE_BEGIN="0" SENTENCE_END="48"/>
</SENTENCE>
</PARAGRAPH>
<PARAGRAPH TRACK="2">
<SENTENCE NAME="PRIMARY" COUNT="2">
<TOKEN BEGIN="164" END="170" SENTENCE_BEGIN="135" SENTENCE_END="187"/>
<TOKEN BEGIN="164" END="170" SENTENCE_BEGIN="135" SENTENCE_END="187"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="172" END="175" SENTENCE_BEGIN="135" SENTENCE_END="187"/>
</SENTENCE>
</PARAGRAPH>
<PARAGRAPH TRACK="3">
<SENTENCE NAME="PRIMARY" COUNT="4">
<TOKEN BEGIN="93" END="97" SENTENCE_BEGIN="50" SENTENCE_END="133"/>
<TOKEN BEGIN="93" END="97" SENTENCE_BEGIN="50" SENTENCE_END="133"/>
<TOKEN BEGIN="135" END="139" SENTENCE_BEGIN="135" SENTENCE_END="187"/>
<TOKEN BEGIN="135" END="139" SENTENCE_BEGIN="135" SENTENCE_END="187"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="141" END="147" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEFORE_PRIMARY="YES" BEGIN="79" END="88" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="3">
<TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="110" END="113" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES"/>
<TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="129" END="132" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES"/>
<TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="172" END="175" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="99" END="104" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES"/>
<TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="153" END="158" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES"/>
</SENTENCE>
</PARAGRAPH>
<PARAGRAPH TRACK="4">
<SENTENCE NAME="PRIMARY" COUNT="1">
<TOKEN BEGIN="119" END="127" SENTENCE_BEGIN="50" SENTENCE_END="133"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="129" END="132" SENTENCE_BEGIN="50" SENTENCE_END="133"/>
</SENTENCE>
</PARAGRAPH>
<PARAGRAPH TRACK="5">
<SENTENCE NAME="PRIMARY" COUNT="1">
<TOKEN BEGIN="50" END="58" SENTENCE_BEGIN="50" SENTENCE_END="133"/>
</SENTENCE>
<SENTENCE NAME="SECONDARY" COUNT="1">
<TOKEN BEGIN="60" END="66" SENTENCE_BEGIN="50" SENTENCE_END="133"/>
</SENTENCE>
</PARAGRAPH>
</SECTION>
</DOCUMENT>
У вас естьЛюбая подсказка о том, что я делаю не так?