Мы используем ElasticSearch для индексации записей о клиентах (имя + адрес), и в наших запросах мы используем триграмный поиск.
Когда я профилировал некоторые запросы, я заметил нечто странное, чего я не понимаю, и янадеялся получить некоторое представление.
Посмотрите на вывод профилировщика ниже и проверьте время, затраченное на каждую из триграмм TermQueries
Например, четыре TermQueries для firstName "Sabine"все занимают около 208.000-244.000нс, и это более или менее соответствует всем другим запросам триграмм в этом поиске (fullName, улица, город) ... ЗА ИСКЛЮЧЕНИЕМ lastName.Здесь отдельные запросы TermQueries занимают около 1,2-1,5 млн. Нс, что значительно медленнее и составляет более 24% от общего времени запроса.
Сначала я думал, что это может иметь отношение к общему количеству триграмм.в каждом индексе.Таким образом, поскольку в среднем имя может быть короче фамилии, в инвертированном индексе имени будет меньше триграмм, и оно будет быстрее.
Но это не соответствует полю улицы, котороев среднем, вероятно, самое длинное поле и должно быть самым медленным.но это не так, он находится в строке (завершается даже чуть быстрее), чем первый запрос имени.
кстати, в этом индексе около 15 миллионов записей, поэтому, когда я говорю «в среднем», тогдана самом деле это довольно хорошее среднее значение для предположения.
100,0% 21.193.917ns ( 0,02s) BooleanQuery ((Synonym(person.fullName.phonetic:sYbini person.fullName.phonetic:sabin person.fullName.phonetic:sabini person.fullName.phonetic:savini person.fullName.phonetic:sobin person.fullName.phonetic:sobini person.fullName.phonetic:sovini person.fullName.phonetic:zYbini person.fullName.phonetic:zabini person.fullName.phonetic:zobini) Synonym(person.fullName.phonetic:kDnik person.fullName.phonetic:kYnik person.fullName.phonetic:koinik person.fullName.phonetic:kunik))^2.0 (person.fullName.trigram:sab person.fullName.trigram:abi person.fullName.trigram:bin person.fullName.trigram:ine person.fullName.trigram:koe person.fullName.trigram:oen person.fullName.trigram:eni person.fullName.trigram:nig)^2.0 Synonym(person.firstName.phonetic:sYbini person.firstName.phonetic:sabin person.firstName.phonetic:sabini person.firstName.phonetic:savini person.firstName.phonetic:sobin person.firstName.phonetic:sobini person.firstName.phonetic:sovini person.firstName.phonetic:zYbini person.firstName.phonetic:zabini person.firstName.phonetic:zobini) (person.firstName.trigram:sab person.firstName.trigram:abi person.firstName.trigram:bin person.firstName.trigram:ine) (Synonym(person.lastName.phonetic:kDnik person.lastName.phonetic:kYnik person.lastName.phonetic:koinik person.lastName.phonetic:kunik))^2.0 (person.lastName.trigram:koe person.lastName.trigram:oen person.lastName.trigram:eni person.lastName.trigram:nig)^2.0 (Synonym(address.street.phonetic:Dpdbl address.street.phonetic:apdbl address.street.phonetic:updbl address.street.phonetic:xDpdbl address.street.phonetic:xapdbl address.street.phonetic:xupdbl))^2.0 (address.street.trigram:hau address.street.trigram:aup address.street.trigram:upt address.street.trigram:ptb address.street.trigram:tbu address.street.trigram:bue address.street.trigram:ueh address.street.trigram:ehl)^2.0 (address.houseNo:11)^9.0 (address.normalizedHouseNos:11)^7.5 (address.neighboringHouseNos:11)^3.0 (address.zip.ngram:886 address.zip.ngram:8860 address.zip.ngram:88605) address.zip:88605 (Synonym(address.city.phonetic:mYskQrx address.city.phonetic:mYskirx address.city.phonetic:miskQrx address.city.phonetic:miskirtS address.city.phonetic:miskirts address.city.phonetic:miskirx))^2.0 (address.city.trigram:mes address.city.trigram:ess address.city.trigram:ssk address.city.trigram:ski address.city.trigram:kir address.city.trigram:irc address.city.trigram:rch)^2.0)~11
10,4% 2.206.373ns ( 0,00s) BoostQuery (Synonym(person.fullName.phonetic:sYbini person.fullName.phonetic:sabin person.fullName.phonetic:sabini person.fullName.phonetic:savini person.fullName.phonetic:sobin person.fullName.phonetic:sobini person.fullName.phonetic:sovini person.fullName.phonetic:zYbini person.fullName.phonetic:zabini person.fullName.phonetic:zobini) Synonym(person.fullName.phonetic:kDnik person.fullName.phonetic:kYnik person.fullName.phonetic:koinik person.fullName.phonetic:kunik))^2.0
68,5% 1.510.292ns ( 0,00s) SynonymQuery Synonym(person.fullName.phonetic:sYbini person.fullName.phonetic:sabin person.fullName.phonetic:sabini person.fullName.phonetic:savini person.fullName.phonetic:sobin person.fullName.phonetic:sobini person.fullName.phonetic:sovini person.fullName.phonetic:zYbini person.fullName.phonetic:zabini person.fullName.phonetic:zobini)
23,0% 506.711ns ( 0,00s) SynonymQuery Synonym(person.fullName.phonetic:kDnik person.fullName.phonetic:kYnik person.fullName.phonetic:koinik person.fullName.phonetic:kunik)
7,2% 1.529.277ns ( 0,00s) BoostQuery (person.fullName.trigram:sab person.fullName.trigram:abi person.fullName.trigram:bin person.fullName.trigram:ine person.fullName.trigram:koe person.fullName.trigram:oen person.fullName.trigram:eni person.fullName.trigram:nig)^2.0
11,7% 179.616ns ( 0,00s) TermQuery person.fullName.trigram:sab
9,8% 149.382ns ( 0,00s) TermQuery person.fullName.trigram:abi
9,5% 145.463ns ( 0,00s) TermQuery person.fullName.trigram:bin
9,9% 151.973ns ( 0,00s) TermQuery person.fullName.trigram:ine
8,6% 131.949ns ( 0,00s) TermQuery person.fullName.trigram:koe
9,5% 144.911ns ( 0,00s) TermQuery person.fullName.trigram:oen
9,3% 142.253ns ( 0,00s) TermQuery person.fullName.trigram:eni
9,0% 138.348ns ( 0,00s) TermQuery person.fullName.trigram:nig
6,3% 1.329.048ns ( 0,00s) SynonymQuery Synonym(person.firstName.phonetic:sYbini person.firstName.phonetic:sabin person.firstName.phonetic:sabini person.firstName.phonetic:savini person.firstName.phonetic:sobin person.firstName.phonetic:sobini person.firstName.phonetic:sovini person.firstName.phonetic:zYbini person.firstName.phonetic:zabini person.firstName.phonetic:zobini)
5,7% 1.215.579ns ( 0,00s) BooleanQuery person.firstName.trigram:sab person.firstName.trigram:abi person.firstName.trigram:bin person.firstName.trigram:ine
20,1% 244.919ns ( 0,00s) TermQuery person.firstName.trigram:sab
17,6% 213.711ns ( 0,00s) TermQuery person.firstName.trigram:abi
17,2% 208.705ns ( 0,00s) TermQuery person.firstName.trigram:bin
18,1% 219.848ns ( 0,00s) TermQuery person.firstName.trigram:ine
6,8% 1.436.954ns ( 0,00s) BoostQuery (Synonym(person.lastName.phonetic:kDnik person.lastName.phonetic:kYnik person.lastName.phonetic:koinik person.lastName.phonetic:kunik))^2.0
24,3% 5.147.155ns ( 0,01s) BoostQuery (person.lastName.trigram:koe person.lastName.trigram:oen person.lastName.trigram:eni person.lastName.trigram:nig)^2.0
29,3% 1.506.565ns ( 0,00s) TermQuery person.lastName.trigram:koe
27,8% 1.432.001ns ( 0,00s) TermQuery person.lastName.trigram:oen
25,9% 1.335.459ns ( 0,00s) TermQuery person.lastName.trigram:eni
24,0% 1.233.435ns ( 0,00s) TermQuery person.lastName.trigram:nig
2,7% 573.936ns ( 0,00s) BoostQuery (Synonym(address.street.phonetic:Dpdbl address.street.phonetic:apdbl address.street.phonetic:updbl address.street.phonetic:xDpdbl address.street.phonetic:xapdbl address.street.phonetic:xupdbl))^2.0
6,7% 1.413.559ns ( 0,00s) BoostQuery (address.street.trigram:hau address.street.trigram:aup address.street.trigram:upt address.street.trigram:ptb address.street.trigram:tbu address.street.trigram:bue address.street.trigram:ueh address.street.trigram:ehl)^2.0
12,6% 178.647ns ( 0,00s) TermQuery address.street.trigram:hau
10,8% 152.014ns ( 0,00s) TermQuery address.street.trigram:aup
10,6% 150.251ns ( 0,00s) TermQuery address.street.trigram:upt
6,1% 86.559ns ( 0,00s) TermQuery address.street.trigram:ptb
8,3% 116.926ns ( 0,00s) TermQuery address.street.trigram:tbu
10,0% 141.978ns ( 0,00s) TermQuery address.street.trigram:bue
9,9% 139.370ns ( 0,00s) TermQuery address.street.trigram:ueh
10,2% 144.440ns ( 0,00s) TermQuery address.street.trigram:ehl
5,4% 1.135.597ns ( 0,00s) BoostQuery (address.houseNo:11)^9.0
1,9% 395.763ns ( 0,00s) BoostQuery (address.normalizedHouseNos:11)^7.5
0,7% 150.071ns ( 0,00s) BoostQuery (address.neighboringHouseNos:11)^3.0
14,7% 3.116.334ns ( 0,00s) BooleanQuery address.zip.ngram:886 address.zip.ngram:8860 address.zip.ngram:88605
29,0% 904.591ns ( 0,00s) TermQuery address.zip.ngram:886
6,4% 198.371ns ( 0,00s) TermQuery address.zip.ngram:8860
6,2% 194.678ns ( 0,00s) TermQuery address.zip.ngram:88605
1,2% 244.842ns ( 0,00s) TermQuery address.zip:88605
3,3% 702.358ns ( 0,00s) BoostQuery (Synonym(address.city.phonetic:mYskQrx address.city.phonetic:mYskirx address.city.phonetic:miskQrx address.city.phonetic:miskirtS address.city.phonetic:miskirts address.city.phonetic:miskirx))^2.0
7,4% 1.577.788ns ( 0,00s) BoostQuery (address.city.trigram:mes address.city.trigram:ess address.city.trigram:ssk address.city.trigram:ski address.city.trigram:kir address.city.trigram:irc address.city.trigram:rch)^2.0
11,3% 177.748ns ( 0,00s) TermQuery address.city.trigram:mes
12,4% 196.403ns ( 0,00s) TermQuery address.city.trigram:ess
8,7% 137.265ns ( 0,00s) TermQuery address.city.trigram:ssk
9,7% 152.642ns ( 0,00s) TermQuery address.city.trigram:ski
11,2% 177.189ns ( 0,00s) TermQuery address.city.trigram:kir
11,3% 177.635ns ( 0,00s) TermQuery address.city.trigram:irc
10,9% 171.608ns ( 0,00s) TermQuery address.city.trigram:rch
Запрос для справки:
{
"profile": true,
"size": 200,
"timeout": "10s",
"query": {
"bool": {
"should": [
{
"match": {
"person.fullName.phonetic": {
"query": "sabine koenig",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 2
}
}
},
{
"match": {
"person.fullName.trigram": {
"query": "sabine koenig",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 2
}
}
},
{
"match": {
"person.firstName.phonetic": {
"query": "sabine",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 1
}
}
},
{
"match": {
"person.firstName.trigram": {
"query": "sabine",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 1
}
}
},
{
"match": {
"person.lastName.phonetic": {
"query": "koenig",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 2
}
}
},
{
"match": {
"person.lastName.trigram": {
"query": "koenig",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 2
}
}
},
{
"match": {
"address.street.phonetic": {
"query": "am hauptbuehl",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 2
}
}
},
{
"match": {
"address.street.trigram": {
"query": "am hauptbuehl",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 2
}
}
},
{
"match": {
"address.houseNo": {
"query": "11",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 9
}
}
},
{
"match": {
"address.normalizedHouseNos": {
"query": "11",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 7.5
}
}
},
{
"match": {
"address.neighboringHouseNos": {
"query": "11",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 3
}
}
},
{
"match": {
"address.zip.ngram": {
"query": "88605",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 1
}
}
},
{
"match": {
"address.zip": {
"query": "88605",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 1
}
}
},
{
"match": {
"address.city.phonetic": {
"query": "messkirch",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 2
}
}
},
{
"match": {
"address.city.trigram": {
"query": "messkirch",
"operator": "OR",
"prefix_length": 0,
"max_expansions": 50,
"fuzzy_transpositions": true,
"lenient": false,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"boost": 2
}
}
}
],
"adjust_pure_negative": true,
"minimum_should_match": "11",
"boost": 1
}
}
}