У меня есть функция Django, которая берет файл Nessus и затем анализирует данные перед сохранением их в базе данных, мой файл Nessus обычно имеет около 30 тыс. Строк, и сохранение этого в базе данных может занять до 2 часов, Я пытался использовать bulk_create , но это нарушает код, в то время как я использую Django 1.11, есть ли способ ускорить эти большие вставки в базу данных (postgres)
Вот мой код:
def process_nessus_file(*args, **kwargs):
process_obj = kwargs.get('file')
context = kwargs.get('context')
request = kwargs.get('request')
file_obj = process_obj.first()
file_path = file_obj.file.path
context = etree.iterparse(
file_path,
events=('end', ),
tag="ReportHost"
)
total_issues = 0
detected_issues = 0
undetected_issues = 0
already_exist_issue = 0
low_risk_count = 0
medium_risk_count = 0
high_risk_count = 0
critical_risk_count = 0
low_new_issue = 0
medium_new_issue = 0
high_new_issue = 0
critical_new_issue = 0
vul_history = []
for event, elem in context:
first_identified = None
last_seen = None
host = elem.get('name')
logger.info('Processing issue for host : {}'.format(host))
for child in elem:
if child.tag == "HostProperties":
for host_prop_tags in child:
if host_prop_tags.attrib['name'] == "HOST_START":
first_identified = host_prop_tags.text
elif host_prop_tags.attrib['name'] == "HOST_END":
last_seen = host_prop_tags.text
if child.tag == "ReportItem":
main_tags = child.attrib
child_tags = dict()
for ch_tags in child:
if ch_tags.text:
tag_text = ch_tags.text.strip()
else:
tag_text = ch_tags.text
child_tags[ch_tags.tag] = tag_text
if child_tags.get('solution') and \
child_tags.get('solution') in ['n/a', 'N/A']:
child_tags['solution'] = ''
plugin_output = child_tags.get('plugin_output')
pluginid = int(main_tags.get('pluginID'))
if plugin_output and (pluginid == 10107):
if re.search(BANNER_PATTERN, plugin_output):
banner_pattern = plugin_output.replace("{}".\
format(BANNER_PATTERN), "")
banner = banner_pattern.strip()
else:
banner = ''
else:
banner = ''
risk = child_tags.get('risk_factor')
synopsis = child_tags.get('synopsis')
description = child_tags.get('description')
solution = child_tags.get('solution')
protocol = main_tags.get('protocol')
port = main_tags.get('port')
pluginname = main_tags.get('pluginName')
svcname = main_tags.get('svc_type')
try:
host_type = get_host_type(host)
user_host = check_host_exists(host, host_type)
if user_host and not NessusData.objects.filter(
plugin_id=int(pluginid), host=host,
port=int(port), name=pluginname
).exists():
try:
host_link_obj = Host.objects.get(
host=host
)
except Host.MultipleObjectsReturned:
host_link_obj = host.objects.filter(
host=host
).first()
except Host.DoesNotExist:
host_link_obj = Host.objects.create(
host=host,
user_host=user_host
)
nessus_obj = NessusFile.objects.create(
user_host=user_host,
host_link=host_link_obj,
linked_file=file_obj,
plugin_id=int(pluginid),
risk=risk, host=host,
protocol=protocol, port=int(port),
banner=banner, name=pluginname,
svc_type=svcname,
description=description,
first_identified=first_identified,
last_seen=last_seen,
synopsis=synopsis,
plugin_output=plugin_output,
solution=solution
)
issue = "Issue with host {}, port {} and"\
" pluginID {} is added.".\
format(
nessus_obj.host, nessus_obj.port,
nessus_obj.plugin_id
)
NessusFileLog.objects.create(
linked_file=file_obj,
issue_type="new",
issue=issue
)
detected_issues = detected_issues + 1
if risk == 'Medium':
medium_new_issue = medium_new_issue + 1
elif risk == 'Low':
low_new_issue = low_new_issue + 1
elif risk == 'High':
high_new_issue = high_new_issue + 1
elif risk == 'Critical':
critical_new_issue = critical_new_issue + 1
else:
nessus_obj = NessusFile.objects.filter(
plugin_id=int(pluginid), host=host,
port=int(port), name=pluginname
).first()
if nessus_obj and not nessus_obj.last_seen:
nessus_obj.last_seen = last_seen
nessus_obj.save()
issue = "Issue with host {}, port {} and"\
" pluginID {} is already exists.".\
format(host,port, pluginid)
NessusFileLog.objects.create(
linked_file=file_obj,
issue_type="duplicate",
issue=issue
)
already_exist_issue = already_exist_issue + 1
except Exception as e:
pass
if risk == 'Medium':
medium_risk_count = medium_risk_count + 1
elif risk == 'Low':
low_risk_count = low_risk_count + 1
elif risk == 'High':
high_risk_count = high_risk_count + 1
elif risk == 'Critical':
critical_risk_count = critical_risk_count + 1
total_issues = total_issues + 1
elem.clear()
while elem.getprevious() is not None:
del elem.getparent()[0]
Я слышал, что необработанные sql запросы ускорят его, но я не могу обернуться вокруг процесса