Я впервые пытаюсь объединить термины, и, похоже, существует проблема с используемым мной токенайзером шаблонов.
Вот сопоставление:
{
"mappings": {
"properties": {
"contentItemType": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "patternAnalyzer"
},
"theme": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "patternAnalyzer"
}
}
},
"settings": {
"analysis": {
"analyzer": {
"patternAnalyzer": {
"tokenizer": "patternTokenizer"
}
},
"tokenizer": {
"patternTokenizer": {
"type": "pattern",
"pattern": ";"
}
}
}
}
}
Когда Я пытаюсь выполнить поиск с помощью API агрегации http://my_server / index_name / _search , вот результат:
{
"aggregations": {
"group_by_contentItemType": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Correspondence; Reports",
"doc_count": 3
},
{
"key": "Correspondence",
"doc_count": 2
},
{
"key": "Meeting Minutes; Administrative Records; Reports",
"doc_count": 2
},
{
"key": "Correspondence; Legal and Treaty Material; Reports",
"doc_count": 1
},
{
"key": "Correspondence; Memoranda",
"doc_count": 1
},
{
"key": "Memoranda",
"doc_count": 1
},
{
"key": "Reports",
"doc_count": 1
}
]
},
"group_by_theme": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "International Relations",
"doc_count": 2
},
{
"key": "Key Events; Dissent; Dissent; Resistance; Human Rights",
"doc_count": 2
},
{
"key": "Border Security and Migration; Key Events",
"doc_count": 1
},
{
"key": "Border Security and Migration; Second World War Aftermath",
"doc_count": 1
},
{
"key": "Domestic Politics",
"doc_count": 1
},
{
"key": "Domestic Politics; Border Security and Migration",
"doc_count": 1
},
{
"key": "Economics and Trade; International Relations",
"doc_count": 1
},
{
"key": "Embassy and Consulate Administration; Industry and Agriculture; International Relations",
"doc_count": 1
},
{
"key": "Populations and Social Policy; Second World War Aftermath; International Relations",
"doc_count": 1
}
]
}
}
}
Как вы видите проблему с агрегацией. Я застрял на этой проблеме в течение нескольких дней. Я видел так много примеров, но все еще не смог решить эту проблему. Пожалуйста помоги. Заранее спасибо !!!
РЕДАКТИРОВАТЬ !!! Вот полное отображение после ответа @CatalinM:
{
"local_cwee": {
"mappings": {
"dynamic": "false",
"properties": {
"author": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"commentaries": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"contentDateEndMonth": {
"type": "integer"
},
"contentDateEndSpecified": {
"type": "boolean"
},
"contentDateEndYear": {
"type": "integer"
},
"contentDateMonth": {
"type": "integer"
},
"contentDateMonthSpecified": {
"type": "boolean"
},
"contentDateStartMonth": {
"type": "integer"
},
"contentDateStartSpecified": {
"type": "boolean"
},
"contentDateStartYear": {
"type": "integer"
},
"contentDateYear": {
"type": "integer"
},
"contentDoi": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"contentItemType": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"contentItemTypeFacets": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"contentTitle": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"copyrightNotices": {
"type": "nested",
"properties": {
"imageName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"text": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"countries": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"country": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"coverDateEndMonth": {
"type": "integer"
},
"coverDateEndSpecified": {
"type": "boolean"
},
"coverDateEndYear": {
"type": "integer"
},
"coverDateMonth": {
"type": "integer"
},
"coverDateMonthSpecified": {
"type": "boolean"
},
"coverDateStartMonth": {
"type": "integer"
},
"coverDateStartSpecified": {
"type": "boolean"
},
"coverDateStartYear": {
"type": "integer"
},
"coverDateYear": {
"type": "integer"
},
"displayName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"documentDoi": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"documentLevel": {
"type": "integer"
},
"keyEvents": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"language": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"languageFacets": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"languages": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"languagesFacets": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"moduleNumber": {
"type": "integer"
},
"notes": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"pageTranscript": {
"type": "text",
"term_vector": "with_positions",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "whiteSpaceAnalyzer"
},
"people": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"publicationDate": {
"type": "integer"
},
"publicationDateEndMonth": {
"type": "integer"
},
"publicationDateEndSpecified": {
"type": "boolean"
},
"publicationDateEndYear": {
"type": "integer"
},
"publicationDateMonth": {
"type": "integer"
},
"publicationDateMonthSpecified": {
"type": "boolean"
},
"publicationDateStartMonth": {
"type": "integer"
},
"publicationDateStartSpecified": {
"type": "boolean"
},
"publicationDateStartYear": {
"type": "integer"
},
"publicationDateYear": {
"type": "integer"
},
"publicationDoi": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"publicationId": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"publicationIdFacet": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"publicationTitle": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"publicationType": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"publicationTypeFacets": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"publicationYear": {
"type": "integer"
},
"publisherName": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"publisherNameFacet": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
}
"subject": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"subjectAreas": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"subjectAreasFacets": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"subjectCountries": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"subjectCountriesFacets": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"subjectKeyword": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"subjectKeywordFacets": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"subthemeFacets": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"subthemes": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"theme": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"themeFacets": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
},
"themes": {
"type": "text",
"analyzer": "patternAnalyzer",
"fielddata": true
}
}
}
}
}