BulkIndexError: ('500 документов не удалось проиндексировать.', Используя Python + Elasti c Searc - PullRequest
0 голосов
/ 13 июля 2020

Код ниже

from elasticsearch import helpers, Elasticsearch
import csv

es = Elasticsearch()

with open(r'C:\Users\user\Desktop\police.csv') as f:
    index_name = 'census_data_records'
    doctype = 'census_record'
    reader = csv.reader(f)
    headers = []
    index = 0
    es.indices.delete(index=index_name, ignore=[400, 404])
    es.indices.create(index=index_name, ignore=400)
    action_list = []
    for row in reader:
        record ={
            '_op_type': 'index',
            '_index': index_name,
            '_type' : doctype,
            '_source': row
        }
        action_list.append(record)
    helpers.bulk(es, action_list)

Набор данных ниже

,IncidntNum,Category,Descript,DayOfWeek,Date,Time,PdDistrict,Resolution,Address,X,Y
0,120058272,WEAPON LAWS,POSS OF PROHIBITED WEAPON,Friday,01/29/2016 12:00:00 AM,11:00,Kan,"ARREST, BOOKED",800 Block of BRYANT ST,10.98727872,75.44928793
1,120058272,WEAPON LAWS,"FIREARM, LOADED, IN VEHICLE, POSSESSION OR USE",Friday,01/29/2016 12:00:00 AM,11:00,Kan,"ARREST, BOOKED",800 Block of BRYANT ST,10.93029836,75.85839714
2,141059263,WARRANTS,WARRANT ARREST,Monday,04/25/2016 12:00:00 AM,14:59,Thi,"ARREST, BOOKED",KEITH ST / SHAFTER AV,10.02948575,74.81278836
3,160013662,NON-CRIMINAL,LOST PROPERTY,Tuesday,01/05/2016 0:00,23:50,Pat,NONE,JONES ST / OFARRELL ST,10.91399488,75.39788708
4,160002740,NON-CRIMINAL,LOST PROPERTY,Friday,01/01/2016 0:00,0:30,Ala,NONE,16TH ST / Alapuzha ST,12.35918751,74.87851143
5,160002869,ASSAULT,BATTERY,Friday,01/01/2016 0:00,21:35,Ern,NONE,1700 Block of BUSH ST,10.87491543,75.96476576
6,160003130,OTHER OFFENSES,PAROLE VIOLATION,Saturday,01/02/2016 0:00,0:04,Kan,"ARREST, BOOKED",MARY ST / HOWARD ST,10.6450246,75.7202032

При нажатии я получил ошибку

  • BulkIndexError: ('500 document ( s) не удалось проиндексировать. ',

Есть ли альтернативный способ поиска по sh csv в elasti c? Любой документ или блог также помогут

1 Ответ

0 голосов
/ 13 июля 2020
from elasticsearch import helpers, Elasticsearch
import pandas as pd
import json

df = pd.read_csv("police.csv")
json_str = df.to_json(orient='records')

json_records = json.loads(json_str)

es = Elasticsearch()
index_name = 'census_data_records'
doctype = 'census_record'
es.indices.delete(index=index_name, ignore=[400, 404])
es.indices.create(index=index_name, ignore=400)
action_list = []
for row in json_records:
    record ={
        '_op_type': 'index',
        '_index': index_name,
        '_type' : doctype,
        '_source': row
    }
    action_list.append(record)
helpers.bulk(es, action_list)
...