Я относительно новичок в Elasticsearch
и у меня возникла проблема с определением того, почему число записей с питона dataframe
отличается от количества документов индекса Elasticsearch
.
Я начинаю с создания индекса, выполняя следующее: Как вы можете видеть, есть 62932 записи.
Я создаю индекс вasticsearch, используя следующее: Python code
Когда я проверяю индекс в Kibana Management/Index Management
, в нем только 62630 документов. Согласно окну статистики было 302 удаленных отсчета. Я не знаю, что это значит.
Ниже выводится окно STATS
{
"_shards": {
"total": 2,
"successful": 1,
"failed": 0
},
"stats": {
"uuid": "egOx_6EwTFysBr0WkJyR1Q",
"primaries": {
"docs": {
"count": 62630,
"deleted": 302
},
"store": {
"size_in_bytes": 4433722
},
"indexing": {
"index_total": 62932,
"index_time_in_millis": 3235,
"index_current": 0,
"index_failed": 0,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": false,
"throttle_time_in_millis": 0
},
"get": {
"total": 0,
"time_in_millis": 0,
"exists_total": 0,
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0
},
"search": {
"open_contexts": 0,
"query_total": 140,
"query_time_in_millis": 1178,
"query_current": 0,
"fetch_total": 140,
"fetch_time_in_millis": 1233,
"fetch_current": 0,
"scroll_total": 1,
"scroll_time_in_millis": 6262,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0
},
"merges": {
"current": 0,
"current_docs": 0,
"current_size_in_bytes": 0,
"total": 2,
"total_time_in_millis": 417,
"total_docs": 62932,
"total_size_in_bytes": 4882755,
"total_stopped_time_in_millis": 0,
"total_throttled_time_in_millis": 0,
"total_auto_throttle_in_bytes": 20971520
},
"refresh": {
"total": 26,
"total_time_in_millis": 597,
"external_total": 24,
"external_total_time_in_millis": 632,
"listeners": 0
},
"flush": {
"total": 1,
"periodic": 0,
"total_time_in_millis": 10
},
"warmer": {
"current": 0,
"total": 23,
"total_time_in_millis": 0
},
"query_cache": {
"memory_size_in_bytes": 17338,
"total_count": 283,
"hit_count": 267,
"miss_count": 16,
"cache_size": 4,
"cache_count": 4,
"evictions": 0
},
"fielddata": {
"memory_size_in_bytes": 0,
"evictions": 0
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 2,
"memory_in_bytes": 22729,
"terms_memory_in_bytes": 17585,
"stored_fields_memory_in_bytes": 2024,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 512,
"points_memory_in_bytes": 2112,
"doc_values_memory_in_bytes": 496,
"index_writer_memory_in_bytes": 0,
"version_map_memory_in_bytes": 0,
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": -1,
"file_sizes": {}
},
"translog": {
"operations": 62932,
"size_in_bytes": 17585006,
"uncommitted_operations": 0,
"uncommitted_size_in_bytes": 55,
"earliest_last_modified_age": 0
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 0,
"miss_count": 0
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 0
}
},
"total": {
"docs": {
"count": 62630,
"deleted": 302
},
"store": {
"size_in_bytes": 4433722
},
"indexing": {
"index_total": 62932,
"index_time_in_millis": 3235,
"index_current": 0,
"index_failed": 0,
"delete_total": 0,
"delete_time_in_millis": 0,
"delete_current": 0,
"noop_update_total": 0,
"is_throttled": false,
"throttle_time_in_millis": 0
},
"get": {
"total": 0,
"time_in_millis": 0,
"exists_total": 0,
"exists_time_in_millis": 0,
"missing_total": 0,
"missing_time_in_millis": 0,
"current": 0
},
"search": {
"open_contexts": 0,
"query_total": 140,
"query_time_in_millis": 1178,
"query_current": 0,
"fetch_total": 140,
"fetch_time_in_millis": 1233,
"fetch_current": 0,
"scroll_total": 1,
"scroll_time_in_millis": 6262,
"scroll_current": 0,
"suggest_total": 0,
"suggest_time_in_millis": 0,
"suggest_current": 0
},
"merges": {
"current": 0,
"current_docs": 0,
"current_size_in_bytes": 0,
"total": 2,
"total_time_in_millis": 417,
"total_docs": 62932,
"total_size_in_bytes": 4882755,
"total_stopped_time_in_millis": 0,
"total_throttled_time_in_millis": 0,
"total_auto_throttle_in_bytes": 20971520
},
"refresh": {
"total": 26,
"total_time_in_millis": 597,
"external_total": 24,
"external_total_time_in_millis": 632,
"listeners": 0
},
"flush": {
"total": 1,
"periodic": 0,
"total_time_in_millis": 10
},
"warmer": {
"current": 0,
"total": 23,
"total_time_in_millis": 0
},
"query_cache": {
"memory_size_in_bytes": 17338,
"total_count": 283,
"hit_count": 267,
"miss_count": 16,
"cache_size": 4,
"cache_count": 4,
"evictions": 0
},
"fielddata": {
"memory_size_in_bytes": 0,
"evictions": 0
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 2,
"memory_in_bytes": 22729,
"terms_memory_in_bytes": 17585,
"stored_fields_memory_in_bytes": 2024,
"term_vectors_memory_in_bytes": 0,
"norms_memory_in_bytes": 512,
"points_memory_in_bytes": 2112,
"doc_values_memory_in_bytes": 496,
"index_writer_memory_in_bytes": 0,
"version_map_memory_in_bytes": 0,
"fixed_bit_set_memory_in_bytes": 0,
"max_unsafe_auto_id_timestamp": -1,
"file_sizes": {}
},
"translog": {
"operations": 62932,
"size_in_bytes": 17585006,
"uncommitted_operations": 0,
"uncommitted_size_in_bytes": 55,
"earliest_last_modified_age": 0
},
"request_cache": {
"memory_size_in_bytes": 0,
"evictions": 0,
"hit_count": 0,
"miss_count": 0
},
"recovery": {
"current_as_source": 0,
"current_as_target": 0,
"throttle_time_in_millis": 0
}
}
}
}
Почему количество документов отличается от общего индекса? Я экспортировал данные, и количество записей соответствует количеству документов. Как я могу узнать, почему документы были удалены, и убедиться, что их нет в будущем?