Совпадение запроса в поле с пользовательским анализатором, который не работает должным образом с оператором илиimum_should_match - PullRequest
0 голосов
/ 21 февраля 2019

Я создал специальный анализатор шаблонов для одного из полей.Он создает 2 токена в большинстве случаев.Но когда я пытаюсь использовать запрос на совпадение с оператором AND или Minimum_should_match как 100% , он возвращает записи, даже если сопоставлен только один токен.

Отображение дляindex:

{
  "settings": {
    "analysis": {
      "analyzer": {
        "test_analyzer": {
          "type": "custom",
          "tokenizer": "keyword",
          "filter": [
            "lowercase",
            "test_pattern",
            "unique"
          ]
        }
      },
      "filter": {
        "test_pattern": {
          "type": "pattern_capture",
          "preserve_original": 0,
          "patterns": [
            ".*###(\\d*)###(.*###.*###.*)",
            ".*###(.*###.*###.*)"
          ]
        }
      }
    }
  },
  "mappings": {
    "doc_type": {
      "properties": {
        "test_value": {
          "type": "text",
          "analyzer": "test_analyzer"
        }
      }
    }
  }
}

Тестовые документы:

{
  "test_value": "abc###def###12345###jkl###mno###pqr"
}

{
  "test_value": "abc###def###12367###jkl###mno###pqr"
}

Запрос:

{
  "query": {
    "match": {
      "test_value": {
        "query": "abc###def###12345###jkl###mno###pqr",
        "operator": "AND"
      }
    }
  }
}

Следующий запрос возвращает обе записи.

Я пыталсячтобы понять объяснение результата.Я не знаю, почему в объяснении есть Синоним .Не могли бы вы помочь, где я не прав?

{
  "took": 0,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": 0.46029136,
    "hits": [
      {
        "_shard": "[test_stack][1]",
        "_node": "JO7WIHxLQKW9b_hc8Xm9fQ",
        "_index": "test_stack",
        "_type": "doc_type",
        "_id": "AWkPiO2DN2C8SdyE0d6K",
        "_score": 0.46029136,
        "_source": {
          "test_value": "abc###def###12345###jkl###mno###pqr"
        },
        "_explanation": {
          "value": 0.46029136,
          "description": "weight(Synonym(test_value:12345 test_value:jkl###mno###pqr) in 0) [PerFieldSimilarity], result of:",
          "details": [
            {
              "value": 0.46029136,
              "description": "score(doc=0,freq=2.0 = termFreq=2.0 ), product of:",
              "details": [
                {
                  "value": 0.2876821,
                  "description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                  "details": [
                    {
                      "value": 1,
                      "description": "docFreq",
                      "details": []
                    },
                    {
                      "value": 1,
                      "description": "docCount",
                      "details": []
                    }
                  ]
                },
                {
                  "value": 1.6,
                  "description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
                  "details": [
                    {
                      "value": 2,
                      "description": "termFreq=2.0",
                      "details": []
                    },
                    {
                      "value": 1.2,
                      "description": "parameter k1",
                      "details": []
                    },
                    {
                      "value": 0.75,
                      "description": "parameter b",
                      "details": []
                    },
                    {
                      "value": 2,
                      "description": "avgFieldLength",
                      "details": []
                    },
                    {
                      "value": 1,
                      "description": "fieldLength",
                      "details": []
                    }
                  ]
                }
              ]
            }
          ]
        }
      },
      {
        "_shard": "[test_stack][4]",
        "_node": "JO7WIHxLQKW9b_hc8Xm9fQ",
        "_index": "test_stack",
        "_type": "doc_type",
        "_id": "AWkPiQfJN2C8SdyE0d6L",
        "_score": 0.36165747,
        "_source": {
          "test_value": "abc###def###12378###jkl###mno###pqr"
        },
        "_explanation": {
          "value": 0.3616575,
          "description": "weight(Synonym(test_value:12345 test_value:jkl###mno###pqr) in 0) [PerFieldSimilarity], result of:",
          "details": [
            {
              "value": 0.3616575,
              "description": "score(doc=0,freq=1.0 = termFreq=1.0 ), product of:",
              "details": [
                {
                  "value": 0.2876821,
                  "description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
                  "details": [
                    {
                      "value": 1,
                      "description": "docFreq",
                      "details": []
                    },
                    {
                      "value": 1,
                      "description": "docCount",
                      "details": []
                    }
                  ]
                },
                {
                  "value": 1.2571429,
                  "description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
                  "details": [
                    {
                      "value": 1,
                      "description": "termFreq=1.0",
                      "details": []
                    },
                    {
                      "value": 1.2,
                      "description": "parameter k1",
                      "details": []
                    },
                    {
                      "value": 0.75,
                      "description": "parameter b",
                      "details": []
                    },
                    {
                      "value": 2,
                      "description": "avgFieldLength",
                      "details": []
                    },
                    {
                      "value": 1,
                      "description": "fieldLength",
                      "details": []
                    }
                  ]
                }
              ]
            }
          ]
        }
      }
    ]
  }
}
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...