Тип пользовательского поля SOLR отсутствует после обновления с SOLR 6.6 до 8.2 - PullRequest
0 голосов
/ 28 февраля 2020

SOLR - это не мое ежедневное дело, но мне пришлось обновить ядро ​​SOLR с 6.6 до 8.2. После обновления пользовательские поля fieldTypes отсутствуют.

После прочтения соответствующих частей в руководстве по SOLR я не вижу никаких изменений в синтаксисе схемы.

Новая схема SOLR 8.2. xml

<schema name="tx_solr-10-0-0--20191010" version="1.6">

<uniqueKey>id</uniqueKey>
<!--  xinclude fields  -->
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="../general_schema_types.xml"/>
<!--   xinclude fields -->
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="../general_schema_fields.xml"/>

<dynamicField name="*_textEdgeNgram1S" type="textEdgeNgram1" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="*_textEdgeNgram1M" type="textEdgeNgram1" indexed="true" stored="true" multiValued="true"/>

<!-- Language-sensitive string field allowing proper sorting -->
<fieldType name="string_collated" class="solr.ICUCollationField" locale="de" strength="primary"/>

<!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
    words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
    so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
    Synonyms and stopwords are customized by external files, and stemming is enabled.
    Duplicate tokens at the same position (which may result from Stemmed Synonyms or
    WordDelim parts) are removed. -->
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
    <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.WordDelimiterGraphFilterFactory"
                generateWordParts="1"
                generateNumberParts="1"
                catenateWords="1"
                catenateNumbers="1"
                catenateAll="0"
                splitOnCaseChange="1"
                preserveOriginal="1"
                protected="german/protwords.txt"
        />
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ManagedSynonymGraphFilterFactory" managed="german"/>
        <filter class="solr.FlattenGraphFilterFactory"/>
        <filter class="solr.DictionaryCompoundWordTokenFilterFactory"
                dictionary="german/german-common-nouns.txt"
                minWordSize="5"
                minSubwordSize="4"
                maxSubwordSize="15"
                onlyLongestMatch="false"
        />
        <filter class="solr.ManagedStopFilterFactory" managed="german"/>
        <filter class="solr.GermanNormalizationFilterFactory"/>
        <filter class="solr.SnowballPorterFilterFactory" language="German2" protected="german/protwords.txt"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
    </analyzer>
    <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.WordDelimiterGraphFilterFactory"
                generateWordParts="1"
                generateNumberParts="1"
                catenateWords="0"
                catenateNumbers="0"
                catenateAll="0"
                splitOnCaseChange="1"
                preserveOriginal="1"
                protected="german/protwords.txt"
        />
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ManagedSynonymGraphFilterFactory" managed="german"/>
        <filter class="solr.ManagedStopFilterFactory" managed="german"/>
        <filter class="solr.GermanNormalizationFilterFactory"/>
        <filter class="solr.SnowballPorterFilterFactory" language="German2" protected="german/protwords.txt"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
    </analyzer>
</fieldType>
<!-- Less flexible matching, but less false matches.    Probably not ideal for product names,
    but may be good for SKUs.   Can insert dashes in the wrong place and still match. -->
<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100">
    <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ManagedSynonymGraphFilterFactory" managed="german"/>
        <filter class="solr.FlattenGraphFilterFactory"/>
        <filter class="solr.DictionaryCompoundWordTokenFilterFactory"
                dictionary="german/german-common-nouns.txt"
                minWordSize="5"
                minSubwordSize="4"
                maxSubwordSize="15"
                onlyLongestMatch="false"
        />
        <filter class="solr.ManagedStopFilterFactory" managed="german"/>
        <filter class="solr.WordDelimiterGraphFilterFactory"
                generateWordParts="0"
                generateNumberParts="0"
                catenateWords="1"
                catenateNumbers="1"
                catenateAll="0"
                preserveOriginal="1"
                protected="german/protwords.txt"
        />
        <filter class="solr.GermanNormalizationFilterFactory"/>
        <filter class="solr.SnowballPorterFilterFactory" language="German2" protected="german/protwords.txt"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
    </analyzer>
    <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ManagedSynonymGraphFilterFactory" managed="german"/>
        <filter class="solr.DictionaryCompoundWordTokenFilterFactory"
                dictionary="german/german-common-nouns.txt"
                minWordSize="5"
                minSubwordSize="4"
                maxSubwordSize="15"
                onlyLongestMatch="false"
        />
        <filter class="solr.ManagedStopFilterFactory" managed="german"/>
        <filter class="solr.WordDelimiterGraphFilterFactory"
                generateWordParts="0"
                generateNumberParts="0"
                catenateWords="1"
                catenateNumbers="1"
                catenateAll="0"
                preserveOriginal="1"
                protected="german/protwords.txt"
        />
        <filter class="solr.GermanNormalizationFilterFactory"/>
        <filter class="solr.SnowballPorterFilterFactory" language="German2" protected="german/protwords.txt"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
    </analyzer>
</fieldType>

<!-- Exact matching of words like textWhiteSpaceTokenized,
    but with enabled Synonym and Stop Filter
 -->
<fieldType name="textExact" class="solr.TextField" positionIncrementGap="100">
    <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ManagedSynonymGraphFilterFactory" managed="german"/>
        <filter class="solr.FlattenGraphFilterFactory"/>
        <filter class="solr.ManagedStopFilterFactory" managed="german"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
    </analyzer>
    <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>

        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ManagedSynonymGraphFilterFactory" managed="german"/>
        <filter class="solr.ManagedStopFilterFactory" managed="german"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
    </analyzer>
</fieldType>
<!-- Setup simple analysis for spell checking -->
<fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100" omitNorms="true">
    <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.DictionaryCompoundWordTokenFilterFactory"
                dictionary="german/german-common-nouns.txt"
                minWordSize="5"
                minSubwordSize="4"
                maxSubwordSize="15"
                onlyLongestMatch="false"
        />

        <!-- no synonyms here because we do not want to add them as spell suggestion -->
        <filter class="solr.ManagedStopFilterFactory" managed="german"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
    </analyzer>
    <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ManagedSynonymGraphFilterFactory" managed="german"/>
        <filter class="solr.ManagedStopFilterFactory" managed="german"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
    </analyzer>
</fieldType>

<fieldType name="textEdgeNgram1" class="solr.TextField" positionIncrementGap="100">
    <analyzer type="index">
        <tokenizer class="solr.LowerCaseTokenizerFactory"/>
        <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15"/>
    </analyzer>
    <analyzer type="query">
        <tokenizer class="solr.LowerCaseTokenizerFactory"/>
    </analyzer>
</fieldType>

Старая схема SOLR 6.6. xml

<schema name="tx_solr-8-0-0--20171020" version="1.6">

<uniqueKey>id</uniqueKey>
<!--  xinclude fields  -->
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="../general_schema_types.xml"/>
<!--   xinclude fields -->
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="../general_schema_fields.xml"/>

<dynamicField name="*_textEdgeNgram1S" type="textEdgeNgram1" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="*_textEdgeNgram1M" type="textEdgeNgram1" indexed="true" stored="true" multiValued="true"/>

<!-- Language-sensitive string field allowing proper sorting -->
<fieldType name="string_collated" class="solr.ICUCollationField" locale="de" strength="primary"/>
    <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
    <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.WordDelimiterFilterFactory"
                generateWordParts="1"
                generateNumberParts="1"
                catenateWords="1"
                catenateNumbers="1"
                catenateAll="0"
                splitOnCaseChange="1"
                preserveOriginal="1"
                protected="german/protwords.txt"
        />
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ManagedSynonymFilterFactory" managed="german"/>
        <filter class="solr.DictionaryCompoundWordTokenFilterFactory"
                dictionary="german/german-common-nouns.txt"
                minWordSize="5"
                minSubwordSize="4"
                maxSubwordSize="15"
                onlyLongestMatch="false"
        />
        <filter class="solr.ManagedStopFilterFactory" managed="german"/>
        <filter class="solr.GermanNormalizationFilterFactory"/>
        <filter class="solr.SnowballPorterFilterFactory" language="German2" protected="german/protwords.txt"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
    </analyzer>
    <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.WordDelimiterFilterFactory"
                generateWordParts="1"
                generateNumberParts="1"
                catenateWords="0"
                catenateNumbers="0"
                catenateAll="0"
                splitOnCaseChange="1"
                preserveOriginal="1"
                protected="german/protwords.txt"
        />
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ManagedSynonymFilterFactory" managed="german"/>
        <filter class="solr.ManagedStopFilterFactory" managed="german"/>
        <filter class="solr.GermanNormalizationFilterFactory"/>
        <filter class="solr.SnowballPorterFilterFactory" language="German2" protected="german/protwords.txt"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
    </analyzer>
</fieldType>

<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100">
    <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ManagedSynonymFilterFactory" managed="german"/>
        <filter class="solr.DictionaryCompoundWordTokenFilterFactory"
                dictionary="german/german-common-nouns.txt"
                minWordSize="5"
                minSubwordSize="4"
                maxSubwordSize="15"
                onlyLongestMatch="false"
        />
        <filter class="solr.ManagedStopFilterFactory" managed="german"/>
        <filter class="solr.WordDelimiterFilterFactory"
                generateWordParts="0"
                generateNumberParts="0"
                catenateWords="1"
                catenateNumbers="1"
                catenateAll="0"
                preserveOriginal="1"
                protected="german/protwords.txt"
        />
        <filter class="solr.GermanNormalizationFilterFactory"/>
        <filter class="solr.SnowballPorterFilterFactory" language="German2" protected="german/protwords.txt"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
    </analyzer>
</fieldType>

<fieldType name="textExact" class="solr.TextField" positionIncrementGap="100">
    <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ManagedSynonymFilterFactory" managed="german"/>
        <filter class="solr.ManagedStopFilterFactory" managed="german"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
    </analyzer>
</fieldType>
<!--  Setup simple analysis for spell checking  -->
<fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100" omitNorms="true">
    <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.DictionaryCompoundWordTokenFilterFactory" dictionary="german/german-common-nouns.txt"
                minWordSize="5" minSubwordSize="4" maxSubwordSize="15" onlyLongestMatch="false"/>
        <filter class="solr.ManagedStopFilterFactory" managed="german"/>
        <filter class="solr.StandardFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
    </analyzer>
    <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ManagedSynonymFilterFactory" managed="german"/>
        <filter class="solr.ManagedStopFilterFactory" managed="german"/>
        <filter class="solr.StandardFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
    </analyzer>
</fieldType>

<fieldType name="textEdgeNgram1" class="solr.TextField" positionIncrementGap="100">
    <analyzer type="index">
        <tokenizer class="solr.LowerCaseTokenizerFactory"/>
        <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="15"/>
    </analyzer>
    <analyzer type="query">
        <tokenizer class="solr.LowerCaseTokenizerFactory"/>
    </analyzer>
</fieldType>

Спасибо за помощь :) Ваш Флориан

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...