Я пытаюсь использовать подсветку Solr, но у меня возникли некоторые проблемы. Когда я публикую этот URL http://localhost:8983/solr/pesquisa-jurisprudencia/select?fl=id,assunto&hl=on&q=insalubridade&wt=json&hl.fl=*
, он не возвращает выделенный термин:
{
"responseHeader":{
"status":0,
"QTime":167,
"params":{
"q":"insalubridade",
"hl":"on",
"fl":"id,assunto",
"hl.fl":"*",
"wt":"json"}},
"response":{"numFound":8,"start":0,"docs":[
{
"id":"saj-4815412",
"assunto":["Adicional de Insalubridade",
"Assistência Judiciária Gratuita",
"Aviso-prévio",
"Décimo Terceiro Salário [Proporcional]",
"Férias [Proporcionais]",
"Fruição / Gozo",
"Horas Extras",
"Indenização / Dobra / Terço Constitucional",
"Intervalo Intrajornada",
"Levantamento / Liberação",
"Multa [de 40%] do FGTS",
"Reflexos",
"Salário por Equiparação / Isonomia",
"Saldo de Salário"]},
{
"id":"saj-4676226",
"assunto":["Adicional de Insalubridade",
"Assistência Judiciária Gratuita",
"Aviso-prévio",
"Décimo Terceiro Salário [Proporcional]",
"Férias [Proporcionais]",
"Fruição / Gozo",
"Horas Extras",
"Indenização / Dobra / Terço Constitucional",
"Intervalo Intrajornada",
"Levantamento / Liberação",
"Multa [de 40%] do FGTS",
"Reflexos",
"Salário por Equiparação / Isonomia",
"Saldo de Salário"]},
{
"id":"saj-661600"},
{
"id":"pje1-24544513",
"assunto":["Saldo de Salário"]},
{
"id":"pje2-8188452",
"assunto":["Adicional de Insalubridade",
"Grupo Econômico"]},
{
"id":"pje2-10910741",
"assunto":["Adicional de Insalubridade",
"Grupo Econômico"]},
{
"id":"pje2-7109330",
"assunto":["Adicional de Horas Extras"]},
{
"id":"pje1-6880206",
"assunto":["Efeitos",
"Integração em Verbas Rescisórias"]}]
},
"highlighting":{
"saj-4815412":{},
"saj-4676226":{},
"saj-661600":{},
"pje1-24544513":{},
"pje2-8188452":{},
"pje2-10910741":{},
"pje2-7109330":{},
"pje1-6880206":{}}}
Хотя, когда я ищу "adicional", публикуя этот URL http://localhost:8983/solr/pesquisa-jurisprudencia/select?fl=id,assunto&hl=on&q=adicional&wt=json&hl.fl=*
, он работает.
{
"responseHeader":{
"status":0,
"QTime":88,
"params":{
"q":"adicional",
"hl":"on",
"fl":"id,assunto",
"hl.fl":"*",
"wt":"json"}},
"response":{"numFound":32,"start":0,"docs":[
{
"id":"saj-4815412",
"assunto":["Adicional de Insalubridade",
"Assistência Judiciária Gratuita",
"Aviso-prévio",
"Décimo Terceiro Salário [Proporcional]",
"Férias [Proporcionais]",
"Fruição / Gozo",
"Horas Extras",
"Indenização / Dobra / Terço Constitucional",
"Intervalo Intrajornada",
"Levantamento / Liberação",
"Multa [de 40%] do FGTS",
"Reflexos",
"Salário por Equiparação / Isonomia",
"Saldo de Salário"]},
{
"id":"pje1-14030983",
"assunto":["Diferenças por Desvio de Função"]},
{
"id":"saj-4676226",
"assunto":["Adicional de Insalubridade",
"Assistência Judiciária Gratuita",
"Aviso-prévio",
"Décimo Terceiro Salário [Proporcional]",
"Férias [Proporcionais]",
"Fruição / Gozo",
"Horas Extras",
"Indenização / Dobra / Terço Constitucional",
"Intervalo Intrajornada",
"Levantamento / Liberação",
"Multa [de 40%] do FGTS",
"Reflexos",
"Salário por Equiparação / Isonomia",
"Saldo de Salário"]},
{
"id":"pje2-8188452",
"assunto":["Adicional de Insalubridade",
"Grupo Econômico"]},
{
"id":"saj-661600"},
{
"id":"pje1-13247674",
"assunto":["Adicional de Hora Extra"]},
{
"id":"sap2-732470",
"assunto":["Horas In Itinere",
"Supressão de Horas Extras Habituais - Indenização"]},
{
"id":"pje1-24446947",
"assunto":["Abono",
"Abono Pecuniário",
"Acordo Individual e/ou Coletivo de Trabalho",
"Adicional",
"Adicional de Hora Extra",
"Adicional de Horas Extras",
"Alteração da Jornada",
"Aviso Prévio",
"Base de Cálculo",
"Cartão de Ponto",
"Controle de Jornada",
"Desconfiguração de Justa Causa",
"Décimo Terceiro Salário",
"Décimo Terceiro Salário Proporcional",
"Efeitos",
"FGTS",
"Folha Individual de Presença",
"Fruição / Gozo",
"Férias / Gozo / Fruição",
"Férias Proporcionais",
"Indenizado - Efeitos",
"Indenização",
"Indenização / Dobra / Terço Constitucional",
"Indenização Adicional",
"Intervalo Intrajornada",
"Levantamento de Valor",
"Liberação / Entrega das Guias",
"Multa de 40% do FGTS",
"Multa do Artigo 467 da CLT",
"Multa do Artigo 477 da CLT",
"Reflexos",
"Saldo de Salário",
"Seguro Desemprego",
"Termo de Rescisão Contratual",
"Verbas Rescisórias",
"Ônus da Prova"]},
{
"id":"pje1-35506695",
"assunto":["Diferenças por Desvio de Função"]},
{
"id":"sap2-493296"}]
},
"highlighting":{
"saj-4815412":{
"assunto":["<em>Adicional</em> de Insalubridade"]},
"pje1-14030983":{},
"saj-4676226":{
"assunto":["<em>Adicional</em> de Insalubridade"]},
"pje2-8188452":{
"assunto":["<em>Adicional</em> de Insalubridade"]},
"saj-661600":{},
"pje1-13247674":{
"assunto":["<em>Adicional</em> de Hora Extra"]},
"sap2-732470":{},
"pje1-24446947":{
"assunto":["<em>Adicional</em>"]},
"pje1-35506695":{},
"sap2-493296":{}}}
С тех пор я могу заметить, что это работает, когда поле начинается с термина, который я ищу. Если термин находится в середине предложения, он не возвращает никакого выделения.
Почему это происходит. Как я могу это исправить? Я использую Solr 8,5, и, что очень важно, я начинающий Solr ....; -)
Итак, я провел еще несколько тестов и сделал некоторые заметки о них в этой таблице .
Если вы проверите в моем первом тесте (# 1), вы увидите, что это сработало. Но только потому, что я сузил запрос по полю "assunto" в параметре "q". Когда я указал общий запрос (без указания поля), он не работает (см. Тест № 3). Хотя в тесте № 6 я искал слово «adicional», которое находится в начале поля, и оно сработало. На тесте № 4 я повторил тест № 3, но изменил метод на «унифицированный», и он тоже не сработал. Похоже, это не проблема, связанная с методом.
Вот моя схема (я удалил все комментарии в целях космоса):
<?xml version="1.0" encoding="UTF-8" ?>
<!-- Definindo o próprio esquema
<schema name="example-DIH-db" version="1.6"-->
<schema name="sentencas" version="1.6">
<field name="_version_" type="plong" indexed="true" stored="true"/>
<field name="_root_" type="string" indexed="true" stored="false"/>
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<field name="id_documento" type="string" indexed="true" stored="true"/>
<field name="id_processo" type="string" indexed="true" stored="true"/>
<field name="num_processo" type="string" indexed="true" stored="true"/>
<field name="ano_processo" type="string" indexed="true" stored="true"/>
<field name="id_tipo_documento" type="string" indexed="true" stored="true"/>
<field name="tipo_documento" type="string" indexed="true" stored="true"/>
<field name="origem_dados" type="string" indexed="true" stored="true"/>
<field name="sigla_classe_processual" type="string" indexed="true" stored="true"/>
<field name="desc_classe_processual" type="string" indexed="false" stored="true"/>
<field name="orgao_julgador" type="string" indexed="true" stored="true"/>
<field name="juiz_sentenciante" type="string" indexed="true" stored="true"/>
<field name="turma" type="string" indexed="true" stored="true"/>
<field name="relator" type="string" indexed="true" stored="true"/>
<field name="data_referencia" type="date" indexed="true" stored="true"/>
<field name="nome_data_referencia" type="string" indexed="true" stored="true"/>
<field name="data_assinatura" type="date" indexed="true" stored="true"/>
<field name="data_publicacao" type="date" indexed="true" stored="true"/>
<field name="ementa" type="text_trt18" indexed="true" stored="true" />
<field name="texto_documento" type="text_trt18" indexed="true" stored="true" multiValued="true"/>
<field name="assunto" type="text_pt" indexed="true" stored="true" multiValued="true" termVectors="true"/>
<field name="parte" type="text_pt" indexed="true" stored="true" multiValued="true" termVectors="true"/>
<field name="link_andamentos" type="string" indexed="false" stored="true"/>
<field name="link_visualizar_documento" type="string" indexed="false" stored="true"/>
<field name="link_visualizar_acordao" type="string" indexed="false" stored="true"/>
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
<field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
<uniqueKey>id</uniqueKey>
<copyField source="num_processo" dest="text"/>
<copyField source="ano_processo" dest="text"/>
<copyField source="sigla_classe_processual" dest="text"/>
<copyField source="orgao_julgador" dest="text"/>
<copyField source="texto_documento" dest="text"/>
<copyField source="assunto" dest="text"/>
<copyField source="parte" dest="text"/>
<fieldType name="text_trt18" class="solr.TextField" >
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.BrazilianStemFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
<charFilter class="solr.HTMLStripCharFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.BrazilianStemFilterFactory"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
<charFilter class="solr.HTMLStripCharFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
<fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
<!-- boolean type: "true" or "false" -->
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
<fieldType name="pint" class="solr.IntPointField" docValues="true"/>
<fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
<fieldType name="plong" class="solr.LongPointField" docValues="true"/>
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
<fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/>
<fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/>
<fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/>
<fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/>
<fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
<fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/>
<fieldType name="binary" class="solr.BinaryField"/>
<fieldType name="random" class="solr.RandomSortField" indexed="true" />
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.FlattenGraphFilterFactory"/>
-->
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt"
/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt"
/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt"
/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt"
/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/>
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.TrimFilterFactory" />
<filter class="solr.PatternReplaceFilterFactory"
pattern="([^a-z])" replacement="" replace="all"
/>
</analyzer>
</fieldType>
<fieldType name="phonetic" stored="false" indexed="true" class="solr.TextField" >
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
</analyzer>
</fieldType>
<fieldType name="payloads" stored="false" indexed="true" class="solr.TextField" >
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
</analyzer>
</fieldType>
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>
<fieldType name="descendent_path" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory" />
</analyzer>
</fieldType>
<fieldType name="ancestor_path" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
</analyzer>
</fieldType>
<fieldType name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" />
<!-- Portuguese -->
<fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
<filter class="solr.PortugueseLightStemFilterFactory"/>
</analyzer>
</fieldType>
</schema>