Миграция данных с использованием многопоточности - PullRequest
0 голосов
/ 12 мая 2018

Мне нужно заполнить базу данных исходными данными из CSV-файла.Поскольку данные в CSV-файле очень велики, я пытаюсь использовать многопоточность при переносе данных.Но я постоянно получаю ошибки при этом.Я пытался поймать блокировку и блокировки, но, кажется, ничто не помогает мне.Это мой файл миграции.

from django.db import migrations
import csv
from django.db import transaction
# from multiprocessing import Pool
import threading
import time


def process_data(data,State,District,Region,Pincode):
    for row in data:
        state=row['statename']
        district=row['districtname']
        region=row['officename'].split(' '+row['officetype'])[0]
        pincode=row['pincode']
        try:
            state_object = State.objects.get(name=state)
        except State.DoesNotExist:
            state_object = State.objects.create(name=state)
        try:
            district_object = District.objects.get(
                name=district,
                state=state_object
            )
        except District.DoesNotExist:
            district_object = District.objects.create(
                name=district,
                state=state_object
            )
        try:
            pincode_object = Pincode.objects.get(
                value=pincode,
                district=district_object
            )
        except Pincode.DoesNotExist:
            pincode_object = Pincode.objects.create(
                value=pincode,
                district=district_object
            )
        try:
            Region.objects.get(
                name=region,
                district=district_object,
                pincode=pincode_object
            )
        except Region.DoesNotExist:
            Region.objects.create(
                name=region,
                district=district_object,
                pincode=pincode_object
            )


def add_data(apps, schema_editor):
    State = apps.get_model('locations', 'State')
    District = apps.get_model('locations', 'District')
    Region = apps.get_model('locations', 'Region')
    Pincode = apps.get_model('locations', 'Pincode')
    thread_list = []
    filename = 'All_India_pincode_data_26022018.csv'
    file_data = csv.DictReader(open(filename,'r',errors='ignore'))
    count=0
    rows=[]
    for row in file_data:
        rows.append(row)
        count+=1
        if count==10000:
            thread_list.append(threading.Thread(target=process_data, 
            args=(rows,State,District,Region,Pincode)))
            rows=[]
            print(rows,len(thread_list))
            count=0
    if count>0:
        thread_list.append(threading.Thread(target=process_data, 
        args=(rows,State,District,Region,Pincode)))
    for thread in thread_list:
        thread.start()
    for thread in thread_list:
        thread.join()
    print('Number of threads: ',len(thread_list))


class Migration(migrations.Migration):

    dependencies = [
        ('locations', '0001_initial'),
    ]

    operations = [
        migrations.RunPython(add_data)
    ]

Это небольшая часть трассировки стека, которую я получаю.

Exception in thread Thread-15:
Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 46, in process_data
    district=district_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 403, inget
    self.model._meta.object_name
__fake__.DoesNotExist: Pincode matching query does not exist.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
psycopg2.IntegrityError: duplicate key value violates unique constraint "locations_pincode_value_key"
DETAIL:  Key (value)=(232101) already exists.


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/usr/local/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 51, in process_data
    district=district_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 417, increate
    obj.save(force_insert=True, using=self.db)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 729, in save
    force_update=force_update, update_fields=update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 759, in save_base
    updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 842, in _save_table
    result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 880, in _do_insert
    using=using, raw=raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 1125, in _insert
    return query.get_compiler(using=using).execute_sql(return_id)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/sql/compiler.py", line 1283, in execute_sql
    cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 100, in execute
    return super().execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 68, in execute
    return self._execute_with_wrappers(sql, params, many=False, executor=self._execute)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 77, in _execute_with_wrappers
    return executor(sql, params, many, context)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/utils.py", line 89, in __exit__
    raise dj_exc_value.with_traceback(traceback) from exc_value
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
django.db.utils.IntegrityError: duplicate key value violates unique constraint "locations_pincode_value_key"
DETAIL:  Key (value)=(232101) already exists.


Exception in thread Thread-16:
Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 46, in process_data
    district=district_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 403, inget
    self.model._meta.object_name
__fake__.DoesNotExist: Pincode matching query does not exist.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
psycopg2.IntegrityError: duplicate key value violates unique constraint "locations_pincode_value_key"
DETAIL:  Key (value)=(713150) already exists.


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/usr/local/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 51, in process_data
    district=district_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 417, increate
    obj.save(force_insert=True, using=self.db)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 729, in save
    force_update=force_update, update_fields=update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 759, in save_base
    updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 842, in _save_table
    result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 880, in _do_insert
    using=using, raw=raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 1125, in _insert
    return query.get_compiler(using=using).execute_sql(return_id)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/sql/compiler.py", line 1283, in execute_sql
    cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 100, in execute
    return super().execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 68, in execute
    return self._execute_with_wrappers(sql, params, many=False, executor=self._execute)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 77, in _execute_with_wrappers
    return executor(sql, params, many, context)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/utils.py", line 89, in __exit__
    raise dj_exc_value.with_traceback(traceback) from exc_value
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
django.db.utils.IntegrityError: duplicate key value violates unique constraint "locations_pincode_value_key"
DETAIL:  Key (value)=(713150) already exists.


Exception in thread Thread-12:
Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 60, in process_data
    pincode=pincode_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 403, inget
    self.model._meta.object_name
__fake__.DoesNotExist: Region matching query does not exist.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
psycopg2.IntegrityError: duplicate key value violates unique constraint "locations_region_name_district_id_a93784e1_uniq"
DETAIL:  Key (name, district_id)=(Rayagiri, 49594) already exists.


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/usr/local/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 66, in process_data
    pincode=pincode_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 417, increate
    obj.save(force_insert=True, using=self.db)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 729, in save
    force_update=force_update, update_fields=update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 759, in save_base
    updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 842, in _save_table
    result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 880, in _do_insert
    using=using, raw=raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 1125, in _insert
    return query.get_compiler(using=using).execute_sql(return_id)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/sql/compiler.py", line 1283, in execute_sql
    cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 100, in execute
    return super().execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 68, in execute
    return self._execute_with_wrappers(sql, params, many=False, executor=self._execute)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 77, in _execute_with_wrappers
    return executor(sql, params, many, context)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/utils.py", line 89, in __exit__
    raise dj_exc_value.with_traceback(traceback) from exc_value
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
django.db.utils.IntegrityError: duplicate key value violates unique constraint "locations_region_name_district_id_a93784e1_uniq"
DETAIL:  Key (name, district_id)=(Rayagiri, 49594) already exists.


Exception in thread Thread-10:
Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 46, in process_data
    district=district_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 403, inget
    self.model._meta.object_name
__fake__.DoesNotExist: Pincode matching query does not exist.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
psycopg2.IntegrityError: duplicate key value violates unique constraint "locations_pincode_value_key"
DETAIL:  Key (value)=(755019) already exists.


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/usr/local/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 51, in process_data
    district=district_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 417, increate
    obj.save(force_insert=True, using=self.db)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 729, in save
    force_update=force_update, update_fields=update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 759, in save_base
    updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 842, in _save_table
    result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 880, in _do_insert
    using=using, raw=raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 1125, in _insert
    return query.get_compiler(using=using).execute_sql(return_id)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/sql/compiler.py", line 1283, in execute_sql
    cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 100, in execute
    return super().execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 68, in execute
    return self._execute_with_wrappers(sql, params, many=False, executor=self._execute)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 77, in _execute_with_wrappers
    return executor(sql, params, many, context)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/utils.py", line 89, in __exit__
    raise dj_exc_value.with_traceback(traceback) from exc_value
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
django.db.utils.IntegrityError: duplicate key value violates unique constraint "locations_pincode_value_key"
DETAIL:  Key (value)=(755019) already exists.


Exception in thread Thread-11:
Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 60, in process_data
    pincode=pincode_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 403, inget
    self.model._meta.object_name
__fake__.DoesNotExist: Region matching query does not exist.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
psycopg2.IntegrityError: duplicate key value violates unique constraint "locations_region_name_district_id_a93784e1_uniq"
DETAIL:  Key (name, district_id)=(Jajod, 49630) already exists.


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/usr/local/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "/home/sourabh/projects/onlease/apps/locations/migrations/0002_auto_20180512_1902.py", line 66, in process_data
    pincode=pincode_object
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 417, increate
    obj.save(force_insert=True, using=self.db)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 729, in save
    force_update=force_update, update_fields=update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 759, in save_base
    updated = self._save_table(raw, cls, force_insert, force_update, using, update_fields)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 842, in _save_table
    result = self._do_insert(cls._base_manager, using, fields, update_pk, raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/base.py", line 880, in _do_insert
    using=using, raw=raw)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/manager.py", line 82, in manager_method
    return getattr(self.get_queryset(), name)(*args, **kwargs)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/query.py", line 1125, in _insert
    return query.get_compiler(using=using).execute_sql(return_id)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/models/sql/compiler.py", line 1283, in execute_sql
    cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 100, in execute
    return super().execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 68, in execute
    return self._execute_with_wrappers(sql, params, many=False, executor=self._execute)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 77, in _execute_with_wrappers
    return executor(sql, params, many, context)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/utils.py", line 89, in __exit__
    raise dj_exc_value.with_traceback(traceback) from exc_value
  File "/home/sourabh/projects/onlease/venv/lib/python3.6/site-packages/django/db/backends/utils.py", line 85, in _execute
    return self.cursor.execute(sql, params)
django.db.utils.IntegrityError: duplicate key value violates unique constraint "locations_region_name_district_id_a93784e1_uniq"
DETAIL:  Key (name, district_id)=(Jajod, 49630) already exists.

1 Ответ

0 голосов
/ 12 мая 2018

Вам не нужно обрабатывать ошибки get или create, пусть Django обрабатывает это, используя get_or_create. Кроме того, Threading не ускорит вашу программу, у вас все еще есть GIL. Вы можете использовать множественную обработку, если вы хотите больше скорости. Если вы собираетесь это сделать, я рекомендую использовать приложение с частичной функцией и для ваших констант, а затем отобразить частичный доступ к вашим данным. Я не могу полностью проверить это, поскольку у меня нет вашего кода / базы данных, но попробуйте что-то вроде этого:

from functools import partial
from django.db import migrations
import csv
from multiprocessing import Pool


def process_data(row, State, District, Region, Pincode):
    state = row['statename']
    district = row['districtname']
    region = row['officename'].split(' ' + row['officetype'])[0]
    pincode = row['pincode']

    state_object = State.objects.get_or_create(
        name=state
    )
    district_object = District.objects.get_or_create(
        name=district,
        state=state_object
    )
    pincode_object = Pincode.objects.get_or_create(
        value=pincode,
        district=district_object
    )
    Region.objects.get_or_create(
        name=region,
        district=district_object,
        pincode=pincode_object
    )


def add_data(apps):
    State = apps.get_model('locations', 'State')
    District = apps.get_model('locations', 'District')
    Region = apps.get_model('locations', 'Region')
    Pincode = apps.get_model('locations', 'Pincode')
    filename = 'All_India_pincode_data_26022018.csv'
    file_data = csv.DictReader(open(filename, 'r', errors='ignore'))

    with Pool(processes=4) as pool:
        runner = partial(process_data, State=State, District=District, Region=Region, Pincode=Pincode)
        pool.starmap(runner, file_data)

if __name__ == '__main__':
    apps = ...
    add_data(apps)
...