Поисковый запрос для aws elasticsearch - PullRequest
0 голосов
/ 19 июня 2020

Я пытаюсь выполнить эластичный поиск amazon, и я тоже получаю результат, но, как мы знаем, он будет соответствовать всему с realted search_term. Итак, я хочу сделать jaro-winkler на выходе elasticsearch, чтобы получить наилучшее совпадение. Я не знаю, как реализовать jaro-winkler для movie_entity в этом скрипте. Может ли кто-нибудь помочь мне в этом?

Я хочу запустить запрос, чтобы он соответствовал данной сущности:

from elasticsearch import Elasticsearch, RequestsHttpConnection
from requests_aws4auth import AWS4Auth
import boto3
from pyjarowinkler import distance

host = 'xxxxx' # For example, my-test-domain.us-east-1.es.amazonaws.com
region = 'xxxxx' # e.g. us-west-1

service = 'es'
credentials = boto3.Session().get_credentials()
awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service, session_token=credentials.token)

es = Elasticsearch(
    hosts = [{'host': host, 'port': 443}],
    http_auth = awsauth,
    use_ssl = True,
    verify_certs = True,
    connection_class = RequestsHttpConnection
)
search_term =  " Gulabo Sitabo "

body = {
        "query": {
                "multi_match" : {
                        "query": search_term,
                        "fields": ["entity_name", "aka"],
                        "fuzziness": "AUTO"
                                }
                }
        }



res = es.search(index="production-widget_id_search", body=body)
print(res)

Это результат, который я получаю.

{'took': 90, 'timed_out': False, '_shards': {'total': 5, 'successful': 5, 'skipped': 0, 'failed': 0}, 'hits': {'total': 251, 'max_score': 20.859459, 'hits': [{'_index': 'production-widget_id_search', '_type': 'widget_id_search', '_id': 'm_62629', '_score': 20.859459, '_source': {'created_at': '2020-06-19T01:06:28Z', 'updated_at': '2020-06-19T01:06:28Z', 'entity_name': 'Gulabo Sitabo', 'aka': ['gulabo sitabo'], 'entity_id': 62629, 'active_flag': 1, 'entity_type': 'movie_entity'}}, {'_index': 'production-widget_id_search', '_type': 'widget_id_search', '_id': 'p_126674', '_score': 10.604076, '_source': {'created_at': '2020-06-17T22:45:48Z', 'updated_at': '2020-06-17T22:45:48Z', 'entity_name': 'Joel Slabo', 'aka': [], 'entity_id': 126674, 'active_flag': 1, 'entity_type': 'person_entity'}}, {'_index': 'production-widget_id_search', '_type': 'widget_id_search', '_id': 'p_313185', '_score': 10.086909, '_source': {'created_at': '2020-06-17T21:03:53Z', 'updated_at': '2020-06-17T21:03:53Z', 'entity_name': 'Gulab', 'aka': [], 'entity_id': 313185, 'active_flag': 1, 'entity_type': 'person_entity'}}, {'_index': 'production-widget_id_search', '_type': 'widget_id_search', '_id': 'p_92198', '_score': 10.086909, '_source': {'created_at': '2020-06-17T23:09:16Z', 'updated_at': '2020-06-17T23:09:16Z', 'entity_name': 'Gulab', 'aka': [], 'entity_id': 92198, 'active_flag': 1, 'entity_type': 'person_entity'}}, {'_index': 'production-widget_id_search', '_type': 'widget_id_search', '_id': 'm_41703', '_score': 10.067321, '_source': {'created_at': '2020-06-19T01:17:03Z', 'updated_at': '2020-06-19T01:17:03Z', 'entity_name': 'Bili Gulabi', 'aka': ['bili gulabi', 'bilee gulabi'], 'entity_id': 41703, 'active_flag': 0, 'entity_type': 'movie_entity'}}, {'_index': 'production-widget_id_search', '_type': 'widget_id_search', '_id': 'p_465990', '_score': 9.976623, '_source': {'created_at': '2020-06-17T19:39:28Z', 'updated_at': '2020-06-17T19:39:28Z', 'entity_name': 'Gulaabo', 'aka': [], 'entity_id': 465990, 'active_flag': 1, 'entity_type': 'person_entity'}}, {'_index': 'production-widget_id_search', '_type': 'widget_id_search', '_id': 'm_9019', '_score': 9.976623, '_source': {'created_at': '2020-06-19T01:46:10Z', 'updated_at': '2020-06-19T01:46:10Z', 'entity_name': 'Gulabi', 'aka': ['gulabi', 'gulabi (film)'], 'entity_id': 9019, 'active_flag': 0, 'entity_type': 'movie_entity'}}, {'_index': 'production-widget_id_search', '_type': 'widget_id_search', '_id': 'm_49410', '_score': 9.012667, '_source': {'created_at': '2020-06-19T01:14:34Z', 'updated_at': '2020-06-19T01:14:35Z', 'entity_name': 'Shweta Gulabi', 'aka': ['shweta gulabi', 'shwetha gulabi'], 'entity_id': 49410, 'active_flag': 1, 'entity_type': 'movie_entity'}}, {'_index': 'production-widget_id_search', '_type': 'widget_id_search', '_id': 'm_9133', '_score': 8.731601, '_source': {'created_at': '2020-06-19T01:46:02Z', 'updated_at': '2020-06-19T01:46:02Z', 'entity_name': 'Silakbo', 'aka': ['silakbo'], 'entity_id': 9133, 'active_flag': 1, 'entity_type': 'movie_entity'}}, {'_index': 'production-widget_id_search', '_type': 'widget_id_search', '_id': 'p_464470', '_score': 8.61012, '_source': {'created_at': '2020-06-17T19:40:41Z', 'updated_at': '2020-06-17T19:40:41Z', 'entity_name': 'Sitara', 'aka': [], 'entity_id': 464470, 'active_flag': 1, 'entity_type': 'person_entity'}}]}}```

...