Я создал специальный анализатор шаблонов для одного из полей.Он создает 2 токена в большинстве случаев.Но когда я пытаюсь использовать запрос на совпадение с оператором AND или Minimum_should_match как 100% , он возвращает записи, даже если сопоставлен только один токен.
Отображение дляindex:
{
"settings": {
"analysis": {
"analyzer": {
"test_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"lowercase",
"test_pattern",
"unique"
]
}
},
"filter": {
"test_pattern": {
"type": "pattern_capture",
"preserve_original": 0,
"patterns": [
".*###(\\d*)###(.*###.*###.*)",
".*###(.*###.*###.*)"
]
}
}
}
},
"mappings": {
"doc_type": {
"properties": {
"test_value": {
"type": "text",
"analyzer": "test_analyzer"
}
}
}
}
}
Тестовые документы:
{
"test_value": "abc###def###12345###jkl###mno###pqr"
}
{
"test_value": "abc###def###12367###jkl###mno###pqr"
}
Запрос:
{
"query": {
"match": {
"test_value": {
"query": "abc###def###12345###jkl###mno###pqr",
"operator": "AND"
}
}
}
}
Следующий запрос возвращает обе записи.
Я пыталсячтобы понять объяснение результата.Я не знаю, почему в объяснении есть Синоним .Не могли бы вы помочь, где я не прав?
{
"took": 0,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0.46029136,
"hits": [
{
"_shard": "[test_stack][1]",
"_node": "JO7WIHxLQKW9b_hc8Xm9fQ",
"_index": "test_stack",
"_type": "doc_type",
"_id": "AWkPiO2DN2C8SdyE0d6K",
"_score": 0.46029136,
"_source": {
"test_value": "abc###def###12345###jkl###mno###pqr"
},
"_explanation": {
"value": 0.46029136,
"description": "weight(Synonym(test_value:12345 test_value:jkl###mno###pqr) in 0) [PerFieldSimilarity], result of:",
"details": [
{
"value": 0.46029136,
"description": "score(doc=0,freq=2.0 = termFreq=2.0 ), product of:",
"details": [
{
"value": 0.2876821,
"description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
"details": [
{
"value": 1,
"description": "docFreq",
"details": []
},
{
"value": 1,
"description": "docCount",
"details": []
}
]
},
{
"value": 1.6,
"description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
"details": [
{
"value": 2,
"description": "termFreq=2.0",
"details": []
},
{
"value": 1.2,
"description": "parameter k1",
"details": []
},
{
"value": 0.75,
"description": "parameter b",
"details": []
},
{
"value": 2,
"description": "avgFieldLength",
"details": []
},
{
"value": 1,
"description": "fieldLength",
"details": []
}
]
}
]
}
]
}
},
{
"_shard": "[test_stack][4]",
"_node": "JO7WIHxLQKW9b_hc8Xm9fQ",
"_index": "test_stack",
"_type": "doc_type",
"_id": "AWkPiQfJN2C8SdyE0d6L",
"_score": 0.36165747,
"_source": {
"test_value": "abc###def###12378###jkl###mno###pqr"
},
"_explanation": {
"value": 0.3616575,
"description": "weight(Synonym(test_value:12345 test_value:jkl###mno###pqr) in 0) [PerFieldSimilarity], result of:",
"details": [
{
"value": 0.3616575,
"description": "score(doc=0,freq=1.0 = termFreq=1.0 ), product of:",
"details": [
{
"value": 0.2876821,
"description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
"details": [
{
"value": 1,
"description": "docFreq",
"details": []
},
{
"value": 1,
"description": "docCount",
"details": []
}
]
},
{
"value": 1.2571429,
"description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
"details": [
{
"value": 1,
"description": "termFreq=1.0",
"details": []
},
{
"value": 1.2,
"description": "parameter k1",
"details": []
},
{
"value": 0.75,
"description": "parameter b",
"details": []
},
{
"value": 2,
"description": "avgFieldLength",
"details": []
},
{
"value": 1,
"description": "fieldLength",
"details": []
}
]
}
]
}
]
}
}
]
}
}