После того, как задали предыдущий вопрос , я попытался использовать пакетные транзакции с Py2Neo для ускорения процесса. Я немного адаптировал свой код, но, похоже, не смог создать и выполнить пакет транзакций. Сопоставление работает нормально, у меня возникают проблемы только с частью транзакции внизу - я думал, что включу весь свой код, на всякий случай. Текущая ошибка, которую я получаю, выглядит следующим образом:
AttributeError Traceback (most recent call last)
<ipython-input-5-953d29f58a36> in <module>
108 a = nodes[x]
109 print(a)
--> 110 tx.run(a)
111 rela = Relationship(s, "HOMOZYGOUS", a, HTA=h1, HTB=h2, GT=genotype, dp=read_depth, phase_set=ps1, PL0=PL0, PL1=PL1, PL2=PL2, GP0=GP0, GP1=GP1, GP2=GP2)
112 print(rela)
~/anaconda3/envs/genome/lib/python3.6/site-packages/py2neo/database.py in run(self, cypher, parameters, **kwparameters)
803 graph=self.graph,
804 keys=[],
--> 805 entities=entities))
806 except CypherError as error:
807 raise GraphError.hydrate({"code": error.code, "message": error.message})
~/anaconda3/envs/genome/lib/python3.6/site-packages/py2neo/internal/connectors.py in run(self, statement, parameters, tx, graph, keys, entities)
285 return self._run_1(statement, parameters, graph, keys, entities)
286 else:
--> 287 return self._run_in_tx(statement, parameters, tx, graph, keys, entities)
288
289 def begin(self):
~/anaconda3/envs/genome/lib/python3.6/site-packages/py2neo/internal/connectors.py in _run_in_tx(self, statement, parameters, tx, graph, keys, entities)
269 hydrator.keys = result.keys()
270
--> 271 tx.run(statement, dehydrated_parameters or {}, on_success=update_metadata_with_keys, on_failure=fail)
272 tx.pull_all(on_records=lambda records: result.append_records(map(hydrator.hydrate, records)),
273 on_success=result.update_metadata, on_failure=fail, on_summary=result.done)
~/anaconda3/envs/genome/lib/python3.6/site-packages/neobolt/direct.py in run(self, statement, parameters, mode, bookmarks, metadata, timeout, **handlers)
292 fields = (statement, parameters)
293 log_debug("[#%04X] C: RUN %s", self.local_port, " ".join(map(repr, fields)))
--> 294 if statement.upper() == u"COMMIT":
295 self._append(b"\x10", fields, CommitResponse(self, **handlers))
296 else:
AttributeError: 'Node' object has no attribute 'upper'
Мой код ниже:
import pandas as pd
import csv
import math
import allel
import zarr
from py2neo import Graph, Node, Relationship, NodeMatcher
zarr_path = 'filepath'
callset = zarr.open_group(zarr_path, mode='r')
graph = Graph(user="neo4j", password="password")
chrom_list = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,'X']
matcher = NodeMatcher(graph)
for chrom in chrom_list:
chrom_label = "Chromosome_" + str(chrom)
samples = callset[chrom]['samples']
variants = allel.VariantChunkedTable(callset[chrom]['variants'], names=['AC','AF_AFR', 'AF_AMR', 'AF_ASN', 'AF_EUR', 'AF_MAX', 'CGT', 'CLR', 'CSQ', 'DP', 'DP4', 'ESP_MAF', 'FILTER_LowQual', 'FILTER_MinHWE', 'FILTER_MinVQSLOD', 'FILTER_PASS', 'HWE', 'ICF', 'ID', 'IS', 'PC2', 'PCHI2', 'POS', 'PR', 'QCHI2', 'QUAL', 'REF', 'ALT', 'INDEL', 'SHAPEIT', 'SNP_ID', 'TYPE', 'UGT', 'VQSLOD', 'dbSNPmismatch', 'is_snp', 'numalt'], index='POS')
pos = variants['POS'][:]
pos = pos.tolist()
ref = variants['REF'][:]
alt = variants['ALT'][:]
dpz = callset[chrom]['calldata/DP']
dp = dpz[:, 0]
psz = callset[chrom]['calldata/PS']
plz = callset[chrom]['calldata/PL']
gpz = callset[chrom]['calldata/GP']
calldata = callset[chrom]['calldata']
gt = allel.GenotypeDaskArray(calldata['GT'])
hap = gt.to_haplotypes()
hap = gt.to_haplotypes()
hap1 = hap[:, ::2]
hap2 = hap[:, 1::2]
list_h1 = hap1[:, 0].compute()
list_h1 = list_h1.tolist()
list_h2 = hap2[:, 0].compute()
for i in range(len(samples)):
subject = samples[i]
dp = dpz[:, i]
ps = psz[:, i]
pl = plz[:, i]
gp = gpz[:, i]
list_h1 = hap1[:, i].compute()
list_h2 = hap2[:, i].compute()
g = Graph()
tx = g.begin()
print(subject)
s = matcher.match("Subject", subject_id= subject).first()
print(s)
if s is None:
continue
j = 0
nodes = []
for j in range(len(pos)):
h1 = int(list_h1[j])
h2 = int(list_h2[j])
k = int(pos[j])
l = str(ref[j])
m = str(alt[j][h1-1])
o = str(alt[j][h2-1])
if h1 == 0 and h2 == 0:
a = matcher.match(chrom_label, pos=k, bp=l).first()
nodes.append(a)
nodes.append(a)
elif h1 == 0 and h2 > 0:
a = matcher.match(chrom_label, pos=k, bp=l).first()
nodes.append(a)
a = matcher.match(chrom_label, pos=k, bp=o).first()
nodes.append(a)
elif h1 > 0 and h2 == 0:
a = matcher.match(chrom_label, pos=k, bp=m).first()
nodes.append(a)
a = matcher.match(chrom_label, pos=k, bp=l).first()
nodes.append(a)
elif h1 == h2 and h1 > 0:
a = matcher.match(chrom_label, pos=k, bp=m).first()
nodes.append(a)
nodes.append(a)
else:
a = matcher.match(chrom_label, pos=k, bp=m).first()
nodes.append(a)
a = matcher.match(chrom_label, pos=k, bp=o).first()
nodes.append(a)
if j % 1000 == 0:
print(str(j) + " rows complete.")
print(subject + " matching complete.")
print(len(nodes))
j=0
for j in range(len(pos)):
read_depth = int(dp[j])
ps1 = int(ps[j])
PL0 = int(pl[j][0])
PL1 = int(pl[j][1])
PL2 = int(pl[j][2])
genotype = str(h1) + '|' + str(h2)
GP0 = float(gp[j][0])
GP1 = float(gp[j][1])
GP2 = float(gp[j][2])
h1 = int(list_h1[j])
h2 = int(list_h2[j])
k = int(pos[j])
l = str(ref[j])
m = str(alt[j][h1-1])
o = str(alt[j][h2-1])
if h1 == 0 and h2 == 0:
x = (2*j)
print(x)
a = nodes[x]
print(a)
tx.run(a)
rela = Relationship(s, "HOMOZYGOUS", a, HTA=h1, HTB=h2, GT=genotype, dp=read_depth, phase_set=ps1, PL0=PL0, PL1=PL1, PL2=PL2, GP0=GP0, GP1=GP1, GP2=GP2)
print(rela)
tx.run(rela)
elif h1 == 0 and h2 > 0:
x = (2*j)
a = nodes[x]
tx.run(a)
rela = Relationship(s, "HETEROZYGOUS", a, HTA=h1, HTB=h2, GT=genotype, dp=read_depth, phase_set=ps1, PL0=PL0, PL1=PL1, PL2=PL2, GP0=GP0, GP1=GP1, GP2=GP2)
tx.run(rela)
y = (2*j)+1
b = nodes[y]
tx.run(b)
relb = Relationship(s, "HETEROZYGOUS", b, HTA=h1, HTB=h2, GT=genotype, dp=read_depth, phase_set=ps1, PL0=PL0, PL1=PL1, PL2=PL2, GP0=GP0, GP1=GP1, GP2=GP2)
tx.run(relb)
elif h1 > 0 and h2 == 0:
x = (2*j)
a = nodes[j]
tx.run(a)
rela = Relationship(s, "HETEROZYGOUS", a, HTA=h1, HTB=h2, GT=genotype, dp=read_depth, phase_set=ps1, PL0=PL0, PL1=PL1, PL2=PL2, GP0=GP0, GP1=GP1, GP2=GP2)
tx.run(rela)
y = (2*j)+1
b = nodes[y]
tx.run(b)
relb = Relationship(s, "HETEROZYGOUS", b, HTA=h1, HTB=h2, GT=genotype, dp=read_depth, phase_set=ps1, PL0=PL0, PL1=PL1, PL2=PL2, GP0=GP0, GP1=GP1, GP2=GP2)
tx.run(relb)
elif h1 == h2 and h1 > 0:
x = (2*j)
a = nodes[j]
tx.run(a)
rela = Relationship(s, "HOMOZYGOUS", a, HTA=h1, HTB=h2, GT=genotype, dp=read_depth, phase_set=ps1, PL0=PL0, PL1=PL1, PL2=PL2, GP0=GP0, GP1=GP1, GP2=GP2)
tx.run(rela)
else:
x = (2*j)
a = node[j]
tx.run(a)
rela = Relationship(s, "HETEROZYGOUS", a, HTA=h1, HTB=h2, GT=genotype, dp=read_depth, phase_set=ps1, PL0=PL0, PL1=PL1, PL2=PL2, GP0=GP0, GP1=GP1, GP2=GP2)
tx.run(rela)
y = (2*j)+1
b = nodes[y]
tx.run(b)
relb = Relationship(s, "HETEROZYGOUS", b, HTA=h1, HTB=h2, GT=genotype, dp=read_depth, phase_set=ps1, PL0=PL0, PL1=PL1, PL2=PL2, GP0=GP0, GP1=GP1, GP2=GP2)
tx.run(relb)
if j % 1000 == 0:
tx.commit()
print(chrom_label + " completed.")
Я добавляю узлы в качестве объектов в запрос, но это выводит штраф: например, он возвращает:
(_0:Allele:Chromosome_1:Reference {SNPid: 'rs147999235', bp: 'T', pos: 738539})
для узла и:
(_971387)-[:HOMOZYGOUS {GP0: 1.0, GP1: 0.0, GP2: 0.0, GT: '0|0', HTA: 0, HTB: 0, PL0: 0, PL1: 21, PL2: 166, dp: 7, phase_set: 28590}]->(_0)
для отношений, как и должно быть.
Любая помощь будет принята с благодарностью .