Перекрестная оценка узлов с XSLT - PullRequest
0 голосов
/ 31 мая 2018

Я пытаюсь проанализировать входной XML здесь, чтобы пометить узел SENTENCE[SECONDARY]/TOKEN значением FURTHER_FROM_PRIMARY="YES", когда его значение BEGIN превышает значение BEGIN другого SENTENCE[SECONDARY]/TOKEN в том же PARAGRAPHузел.

Здесь под входным XML:

<DOCUMENT>
  <SECTION>
    <PARAGRAPH TRACK="0">
      <SENTENCE NAME="PRIMARY" COUNT="1">
        <TOKEN BEGIN="14" END="25" SENTENCE_BEGIN="0" SENTENCE_END="48"/>   
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEFORE_PRIMARY="YES" BEGIN="0" END="9" SENTENCE_BEGIN="0" SENTENCE_END="48" SAME_SENTENCE="YES"/>        
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN  BEGIN="43" END="47" SENTENCE_BEGIN="0" SENTENCE_END="48" SAME_SENTENCE="YES"/>       
       </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="27" END="34" SENTENCE_BEGIN="0" SENTENCE_END="48" SAME_SENTENCE="YES"/>          
      </SENTENCE>
    </PARAGRAPH>
    <PARAGRAPH TRACK="1">
      <SENTENCE NAME="PRIMARY" COUNT="2">
        <TOKEN BEGIN="37" END="41" SENTENCE_BEGIN="0" SENTENCE_END="48"/> 
        <TOKEN BEGIN="37" END="41" SENTENCE_BEGIN="0" SENTENCE_END="48"/>
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="43" END="47" SENTENCE_BEGIN="0" SENTENCE_END="48"/> 
      </SENTENCE>
    </PARAGRAPH>
    <PARAGRAPH TRACK="2">
      <SENTENCE NAME="PRIMARY" COUNT="2">
        <TOKEN BEGIN="164" END="170" SENTENCE_BEGIN="135" SENTENCE_END="187"/>          
        <TOKEN BEGIN="164" END="170" SENTENCE_BEGIN="135" SENTENCE_END="187"/>         
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="172" END="175" SENTENCE_BEGIN="135" SENTENCE_END="187"/>          
      </SENTENCE>
    </PARAGRAPH>
    <PARAGRAPH TRACK="3">
      <SENTENCE NAME="PRIMARY" COUNT="4">
        <TOKEN BEGIN="93" END="97" SENTENCE_BEGIN="50" SENTENCE_END="133"/>   
        <TOKEN BEGIN="93" END="97" SENTENCE_BEGIN="50" SENTENCE_END="133"/>         
        <TOKEN BEGIN="135" END="139" SENTENCE_BEGIN="135" SENTENCE_END="187"/>
        <TOKEN BEGIN="135" END="139" SENTENCE_BEGIN="135" SENTENCE_END="187"/>
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="141" END="147" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES"/>        
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEFORE_PRIMARY="YES" BEGIN="79" END="88" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES"/>          
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="3">
        <TOKEN  BEGIN="110" END="113" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES"/>          
        <TOKEN  BEGIN="129" END="132" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES"/>          
        <TOKEN  BEGIN="172" END="175" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES"/>          
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="99" END="104" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES"/> 
        <TOKEN  BEGIN="153" END="158" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES"/> 
      </SENTENCE>
    </PARAGRAPH>
    <PARAGRAPH TRACK="4">
      <SENTENCE NAME="PRIMARY" COUNT="1">
        <TOKEN BEGIN="119" END="127" SENTENCE_BEGIN="50" SENTENCE_END="133"/>
        </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="129" END="132" SENTENCE_BEGIN="50" SENTENCE_END="133"/>       
      </SENTENCE>
    </PARAGRAPH>
    <PARAGRAPH TRACK="5">
      <SENTENCE NAME="PRIMARY" COUNT="1">
        <TOKEN BEGIN="50" END="58" SENTENCE_BEGIN="50" SENTENCE_END="133"/>          
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="60" END="66" SENTENCE_BEGIN="50" SENTENCE_END="133"/>   
       </SENTENCE>
    </PARAGRAPH>
  </SECTION>
</DOCUMENT>

Это код XSLT:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:output indent="yes"/>
  <xsl:strip-space elements="*"/>

  <xsl:template match="@*|node()">
    <xsl:copy>
      <xsl:apply-templates select="@*|node()"/>
    </xsl:copy>
  </xsl:template>

  <xsl:template match="TOKEN[@SAME_SENTENCE][not (@BEFORE_PRIMARY)]">
     <xsl:copy>
      <xsl:if test="((current()/@SENTENCE_BEGIN = ../../SENTENCE[@NAME='PRIMARY']/TOKEN/@SENTENCE_BEGIN) and (current()/@SENTENCE_END = ../../SENTENCE[@NAME='PRIMARY']/TOKEN/@SENTENCE_END)) and  (current()/@BEGIN &gt; ../../SENTENCE[@NAME='SECONDARY']/TOKEN [@SAME_SENTENCE] [not (@BEFORE_PRIMARY)]/@BEGIN)">

            <xsl:attribute name="FURTHER_FROM_PRIMARY">YES</xsl:attribute>

      </xsl:if>
      <xsl:apply-templates select="@*|node()"/>
      </xsl:copy> 
  </xsl:template>

</xsl:stylesheet>

Это выходной XML:

<?xml version="1.0" encoding="utf-16"?>
<DOCUMENT>
  <SECTION>
    <PARAGRAPH TRACK="0">
      <SENTENCE NAME="PRIMARY" COUNT="1">
        <TOKEN BEGIN="14" END="25" SENTENCE_BEGIN="0" SENTENCE_END="48" />
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEFORE_PRIMARY="YES" BEGIN="0" END="9" SENTENCE_BEGIN="0" SENTENCE_END="48" SAME_SENTENCE="YES" />
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="43" END="47" SENTENCE_BEGIN="0" SENTENCE_END="48" SAME_SENTENCE="YES" />
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="27" END="34" SENTENCE_BEGIN="0" SENTENCE_END="48" SAME_SENTENCE="YES" />
      </SENTENCE>
    </PARAGRAPH>
    <PARAGRAPH TRACK="1">
      <SENTENCE NAME="PRIMARY" COUNT="2">
        <TOKEN BEGIN="37" END="41" SENTENCE_BEGIN="0" SENTENCE_END="48" />
        <TOKEN BEGIN="37" END="41" SENTENCE_BEGIN="0" SENTENCE_END="48" />
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="43" END="47" SENTENCE_BEGIN="0" SENTENCE_END="48" />
      </SENTENCE>
    </PARAGRAPH>
    <PARAGRAPH TRACK="2">
      <SENTENCE NAME="PRIMARY" COUNT="2">
        <TOKEN BEGIN="164" END="170" SENTENCE_BEGIN="135" SENTENCE_END="187" />
        <TOKEN BEGIN="164" END="170" SENTENCE_BEGIN="135" SENTENCE_END="187" />
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="172" END="175" SENTENCE_BEGIN="135" SENTENCE_END="187" />
      </SENTENCE>
    </PARAGRAPH>
    <PARAGRAPH TRACK="3">
      <SENTENCE NAME="PRIMARY" COUNT="4">
        <TOKEN BEGIN="93" END="97" SENTENCE_BEGIN="50" SENTENCE_END="133" />
        <TOKEN BEGIN="93" END="97" SENTENCE_BEGIN="50" SENTENCE_END="133" />
        <TOKEN BEGIN="135" END="139" SENTENCE_BEGIN="135" SENTENCE_END="187" />
        <TOKEN BEGIN="135" END="139" SENTENCE_BEGIN="135" SENTENCE_END="187" />
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="141" END="147" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES" />
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEFORE_PRIMARY="YES" BEGIN="79" END="88" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES" />
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="3">
        <TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="110" END="113" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES" />
        <TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="129" END="132" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES" />
        <TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="172" END="175" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES" />
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="99" END="104" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES" />
        <TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="153" END="158" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES" />
      </SENTENCE>
    </PARAGRAPH>
    <PARAGRAPH TRACK="4">
      <SENTENCE NAME="PRIMARY" COUNT="1">
        <TOKEN BEGIN="119" END="127" SENTENCE_BEGIN="50" SENTENCE_END="133" />
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="129" END="132" SENTENCE_BEGIN="50" SENTENCE_END="133" />
      </SENTENCE>
    </PARAGRAPH>
    <PARAGRAPH TRACK="5">
      <SENTENCE NAME="PRIMARY" COUNT="1">
        <TOKEN BEGIN="50" END="58" SENTENCE_BEGIN="50" SENTENCE_END="133" />
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="60" END="66" SENTENCE_BEGIN="50" SENTENCE_END="133" />
      </SENTENCE>
    </PARAGRAPH>
  </SECTION>
</DOCUMENT>

К сожалению, в приведенном выше примере XSLT-код неправильно выбирает один из TOKENS:

<SENTENCE NAME="SECONDARY" COUNT="1">
     <TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="141" END="147" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES"/>        
</SENTENCE>

Здесь под полным желаемым выводом:

<DOCUMENT>
  <SECTION>
    <PARAGRAPH TRACK="0">
      <SENTENCE NAME="PRIMARY" COUNT="1">
        <TOKEN BEGIN="14" END="25" SENTENCE_BEGIN="0" SENTENCE_END="48"/>   
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEFORE_PRIMARY="YES" BEGIN="0" END="9" SENTENCE_BEGIN="0" SENTENCE_END="48" SAME_SENTENCE="YES"/>        
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="43" END="47" SENTENCE_BEGIN="0" SENTENCE_END="48" SAME_SENTENCE="YES"/>       
       </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="27" END="34" SENTENCE_BEGIN="0" SENTENCE_END="48" SAME_SENTENCE="YES"/>          
      </SENTENCE>
    </PARAGRAPH>
    <PARAGRAPH TRACK="1">
      <SENTENCE NAME="PRIMARY" COUNT="2">
        <TOKEN BEGIN="37" END="41" SENTENCE_BEGIN="0" SENTENCE_END="48"/> 
        <TOKEN BEGIN="37" END="41" SENTENCE_BEGIN="0" SENTENCE_END="48"/>
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="43" END="47" SENTENCE_BEGIN="0" SENTENCE_END="48"/> 
      </SENTENCE>
    </PARAGRAPH>
    <PARAGRAPH TRACK="2">
      <SENTENCE NAME="PRIMARY" COUNT="2">
        <TOKEN BEGIN="164" END="170" SENTENCE_BEGIN="135" SENTENCE_END="187"/>          
        <TOKEN BEGIN="164" END="170" SENTENCE_BEGIN="135" SENTENCE_END="187"/>         
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="172" END="175" SENTENCE_BEGIN="135" SENTENCE_END="187"/>          
      </SENTENCE>
    </PARAGRAPH>
    <PARAGRAPH TRACK="3">
      <SENTENCE NAME="PRIMARY" COUNT="4">
        <TOKEN BEGIN="93" END="97" SENTENCE_BEGIN="50" SENTENCE_END="133"/>   
        <TOKEN BEGIN="93" END="97" SENTENCE_BEGIN="50" SENTENCE_END="133"/>         
        <TOKEN BEGIN="135" END="139" SENTENCE_BEGIN="135" SENTENCE_END="187"/>
        <TOKEN BEGIN="135" END="139" SENTENCE_BEGIN="135" SENTENCE_END="187"/>
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="141" END="147" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES"/>        
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEFORE_PRIMARY="YES" BEGIN="79" END="88" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES"/>          
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="3">
        <TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="110" END="113" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES"/>          
        <TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="129" END="132" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES"/>          
        <TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="172" END="175" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES"/>          
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="99" END="104" SENTENCE_BEGIN="50" SENTENCE_END="133" SAME_SENTENCE="YES"/> 
        <TOKEN FURTHER_FROM_PRIMARY="YES" BEGIN="153" END="158" SENTENCE_BEGIN="135" SENTENCE_END="187" SAME_SENTENCE="YES"/>   
      </SENTENCE>
    </PARAGRAPH>
    <PARAGRAPH TRACK="4">
      <SENTENCE NAME="PRIMARY" COUNT="1">
        <TOKEN BEGIN="119" END="127" SENTENCE_BEGIN="50" SENTENCE_END="133"/>
        </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="129" END="132" SENTENCE_BEGIN="50" SENTENCE_END="133"/>       
      </SENTENCE>
    </PARAGRAPH>
    <PARAGRAPH TRACK="5">
      <SENTENCE NAME="PRIMARY" COUNT="1">
        <TOKEN BEGIN="50" END="58" SENTENCE_BEGIN="50" SENTENCE_END="133"/>          
      </SENTENCE>
      <SENTENCE NAME="SECONDARY" COUNT="1">
        <TOKEN BEGIN="60" END="66" SENTENCE_BEGIN="50" SENTENCE_END="133"/>   
       </SENTENCE>
    </PARAGRAPH>
  </SECTION>
</DOCUMENT>

У вас естьЛюбая подсказка о том, что я делаю не так?

1 Ответ

0 голосов
/ 31 мая 2018

Вы не упоминаете в своем описании, что у вас также есть некоторые проверки на SENTENCE_BEGIN и SENTENCE_END, которые тоже совпадают.

Проблема в том, что ваш чек на BEGIN не зависит отSENTENCE_BEGIN и SENTENCE_END одинаковы.Другими словами, вы тестируете «Есть ли токен с тем же SENTENCE_BEGIN, и есть ли токен с тем же SENTENCE_END, и есть ли токен с более низким значением BEGIN»

Я думаю, это то, что вам нужно....

<xsl:if test="../../SENTENCE[@NAME='SECONDARY']/TOKEN[@SENTENCE_BEGIN = current()/@SENTENCE_BEGIN and @SENTENCE_END = current()/@SENTENCE_END and not(@BEFORE_PRIMARY) and current()/@BEGIN &gt; @BEGIN]">
...