Выделить проблему термина с подстановочным знаком в строке запроса - PullRequest
0 голосов
/ 29 октября 2018

Мы используем Elasticsearch версии 5.4.3

1: мы создали индекс с указанными ниже настройками и отображением.

PUT http://localhost:9200/essearch
{ 
"mappings": {
    "object": {
        "_all": {
            "enabled": false
        },
        "properties": {
           "content": {
                "type": "text",
                "term_vector": "with_positions_offsets",
                "similarity": "classic",
                "analyzer": "content_standard"
            },
            "content_phonic": {
                "type": "text",
                "term_vector": "with_positions_offsets",
                "similarity": "classic",
                "analyzer": "content_phonetic"
            },
            "content_stemming": {
                "type": "text",
                "term_vector": "with_positions_offsets",
                "similarity": "classic",
                "analyzer": "content_stemming"
            }
        }
    }
},
"settings": {
    "index": {
        "number_of_shards": "1",
        "similarity": {
            "default": {
                "type": "classic"
            }
        },
        "max_result_window": "50000",
        "mapper": {
            "dynamic": "false"
        },
        "analysis": {
            "filter": {
                "content_phonetic": {
                    "type": "phonetic",
                    "encoder": "doublemetaphone"
                },
                "StopWords": {
                    "type": "stop",
                    "stopwords": [
                        "after",
                        "all",
                        "under",
                        "very",
                        "well"]
                }
            },
            "analyzer": {
                "content_phonetic": {
                    "filter": [
                        "content_phonetic"
                    ],
                    "char_filter": [
                        "CharFilters"
                    ],
                    "type": "custom",
                    "tokenizer": "standard"
                },
                "content_stemming": {
                    "filter": [
                        "lowercase",
                        "porter_stem"
                    ],
                    "char_filter": [
                        "CharFilters"
                    ],
                    "type": "custom",
                    "tokenizer": "standard"
                },
                "content_standard": {
                    "filter": [
                        "lowercase",
                        "StopWords"
                    ],
                    "char_filter": [
                        "CharFilters"
                    ],
                    "type": "custom",
                    "tokenizer": "standard"
                }
            },
            "char_filter": {
                "CharFilters": {
                    "type": "mapping",
                    "mappings": [
                        ". => ' '",
                        "' => ' '",
                        "_ => ' '",
                        ": => ' '"
                    ]
                }
            }
        },
        "number_of_replicas": "0"
    }
}}

2: проиндексированный документ

http://localhost:9200/essearch/object/1
{ "content" : "beginning thirty days after the anticipated COD. 
         Buyer shall be responsible for all natural gas and electrical imbalance charges.
         All prices shall be at the Reference Conditions.
         Buyer’s performance of its obligations under the ECSA with a form of guarantee in an amount. Seller shall assign its rights under said requests to Buyer.  Buyer shall have full dispatch rights subject to operational parameters  (including ramp rates. buyer said to me..."   }

3: Выполнен запрос выделения

 http://localhost:9200/essearch/_search
  {
   "highlight": {
      "pre_tags": [ "<term0 style='background-color:Lime'>", "<term1 style='background-color:Chocolate'>", "<term2 style='background-color:Pink'>"
   ],"post_tags": [ "</term0>", "</term1>", "</term2>" ],
  "encoder": "html",
   "fields": { "content": { "fragment_size": 50, "number_of_fragments": 0, 
   "type": "fvh" } } },
    "_source": false,
   "query": {
   "bool": {
       "must": [
{
  "query_string": {
    "query": "(a*) OR (said) OR (buyer)",
    "default_field": "content"}} ],
     "filter": [
{
  "ids": {
    "values": [ "1" ] } } ] } } }

4: выделить вывод запроса

{
"took": 0,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
 },
"hits": {
"total": 1,
"max_score": 0,
"hits": [
  {
    "_index": "hi3",
    "_type": "object",
    "_id": "1",
    "_score": 0,
    "highlight": {
      "content": [
        "beginning thirty days after the <term0 style='background-color:red'>anticipated</term0> COD.<term2 style='background-color:Pink'>Buyer</term2> shall be responsible for all natural gas <term2 style='background-color:Pink'>and</term2> electrical imbalance charges. All prices shall be <term2 style='background-color:Pink'>at</term2> the Reference Conditions. Buyer’s performance of its obligations under the ECSA with a form of guarantee in <term1 style='background-color:yellow'>an</term1> <term0 style='background-color:red'>amount</term0>. Seller shall <term1 style='background-color:yellow'>assign</term1> its rights under <term2 style='background-color:Pink'>said</term2> requests to <term2 style='background-color:Pink'>Buyer</term2>.  <term2 style='background-color:Pink'>Buyer</term2> shall have full dispatch rights subject to operational parameters  (including ramp rates. <term2 style='background-color:Pink'>buyer</term2> <term2 style='background-color:Pink'>said</term2> to me..."
      ]
    }
  }
]
}
 }

В приведенном выше запросе следует применить тег «term0» ко всем словам, начинающимся с «a», тег «term1» к слову «said» и тег «term2» к слову «покупатель», но это не так , Я где-то читал, что fvh может применять несколько тегов в последовательности с запросом, который здесь не работает.

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...