У меня есть словарь и списки:
list1 = ['J', 'C', 'U', 'N']
list2 = ['K', 'G', 'E', 'Q', 'V']
dictionary = {'key1': ['key1 stuff C_stuff stuff',
'key1 stuff J_stuff stuff',
'key1 stuff K stuff',
'key1 stuff E,V stuff'],
'key2': ['key2 stuff N_stuff stuff',
'key2 stuff Q stuff',
'key2 stuff K,G,V stuff',
'key2 stuff U_stuff stuff'],
'key3': ['key3 stuff K,E,Q stuff',
'key3 stuff G,K,E,V stuff'],
'key4': ['key4 stuff G,E stuff',
'key4 stuff V stuff']}
, и я хотел бы отсортировать список value
для каждого key
в dictionary
.Таким образом, список должен быть отсортирован сначала по list1
, а если None
, то отсортировать список по list2
, а если None
, наконец, вернуть наибольшее число, разделенное запятыми в третьем столбце.Вернуть только первое совпадение для каждого key
.Это желаемый результат:
'key1': 'key1 stuff J_stuff stuff'
'key2': 'key2 stuff U_stuff stuff'
'key3': 'key3 stuff G,K,E,V stuff'
'key4': 'key4 stuff V stuff'
Зная, что stuff
все разные, как я могу достичь таких результатов в Python3?
Редактировать 1 : Будет ли этоБыстрее сортировать список для каждого key
без использования list1
и list2
?
Редактировать 2 : добавление фактических строк из моего файла (см. ниже)
Изменить 3 : 'key4 stuff V stuff'
возвращается до 'key4 stuff G,E stuff'
, поскольку, если среди list2
найдено одно значение, оно имеет наивысший приоритет, чем если найдено более одного значения.
Редактировать 4 : добавить код для загрузки образца моего текстового файла в словарь, как в примере выше.
import sys,re
import collections
dictionary = collections.defaultdict(list)
refseq_list = ["NC", "NG", "NM", "NP", "NR", "XM", "XP", "XR", "WP"]
ensembl_list = ["frameshift_variant", "missense_variant", "inframe_insertion", "inframe_deletion", "initiator_codon_variant", "stop_gained", "stop_lost", "splice_donor_variant", "splice_acceptor_variant", "splice_region_variant", "mature_miRNA_variant", "TF_binding_site_variant", "regulatory_region_variant", "TFBS_ablation", "TFBS_amplification", "regulatory_region_ablation", "regulatory_region_amplification", "coding_sequence_variant", "stop_retained_variant", "NMD_transcript_variant", "incomplete_terminal_codon_variant", "non_coding_exon_variant", "nc_transcript_variant", "5_prime_UTR_variant", "3_prime_UTR_variant", "upstream_gene_variant", "downstream_gene_variant", "intron_variant", "transcript_ablation", "transcript_amplification", "feature_elongation", "feature_truncation", "intergenic_variant", "synonymous_variant"]
with open("/home/test.txt") as FileObj:
for line in FileObj:
if not line.startswith("#"):
line_split = line.split("\t")
dictionary[line_split[0]].append(line)
И скопировать эти строки в файл
rs141130360 chr1:16495 C ENSG00000223972 ENST00000450305 Transcript downstream_gene_variant - - - - - rs3210724 G MODIFIER 2825 1 - SNV DDX11L1 HGNC HGNC:37102 transcribed_unprocessed_pseudogene - - - - - - - - - Ensembl G G - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs141130360 chr1:16495 C ENSG00000223972 ENST00000456328 Transcript downstream_gene_variant - - - - - rs3210724 G MODIFIER 2086 1 - SNV DDX11L1 HGNC HGNC:37102 processed_transcript YES 1 - - - - - - - Ensembl G G - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs141130360 chr1:16495 C ENSG00000227232 ENST00000488147 Transcript intron_variant,non_coding_transcript_variant - - - - - rs3210724 G MODIFIER - -1 - SNV WASH7P HGNC HGNC:38034 unprocessed_pseudogene YES - - - - - - - - Ensembl G G - - - - - 8/10 - - ENST00000488147.1:n.1067+112C>G - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs141130360 chr1:16495 C ENSG00000278267 ENST00000619216 Transcript downstream_gene_variant - - - - - rs3210724 G MODIFIER 874 -1 - SNV MIR6859-1 HGNC HGNC:50039 miRNA YES - - - - - - - - Ensembl G G - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs141130360 chr1:16495 C 653635 NC_024540.1 Transcript intron_variant,non_coding_transcript_variant - - - - - rs3210724 G MODIFIER - -1 - SNV WASH7P EntrezGene HGNC:38034 transcribed_pseudogene - - - - - - - - - RefSeq G G OK - - - - 8/10 - - NR_024540.1:n.1080+112C>G - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs141130360 chr1:16495 C 100287102 NR_046018.2 Transcript frameshift_variant - - - - - rs3210724 G MODIFIER 2086 1 - SNV DDX11L1 EntrezGene HGNC:37102 transcribed_pseudogene - - - - - - - - - RefSeq G G - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs141130360 chr1:16495 C 102466751 NG_106918.1 Transcript downstream_gene_variant - - - - - rs3210724 G MODIFIER 874 -1 - SNV MIR6859-1 EntrezGene HGNC:50039 miRNA - - - - - - - - - RefSeq G G - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs141130360 chr1:16495 C 102466751 NG_106918.1 Transcript downstream_gene_variant - - - - - rs3210724 G MODIFIER 874 -1 - SNV MIR6859-1 EntrezGene HGNC:50039 miRNA - - - - - - - - - RefSeq G G - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs62636367 chr1:16719 A ENSG00000223972 ENST00000450305 Transcript downstream_gene_variant - - - - - rs62636367 T MODIFIER 3049 1 - SNV DDX11L1 HGNC HGNC:37102 transcribed_unprocessed_pseudogene - - - - - - - - - Ensembl T T - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs62636367 chr1:16719 A ENSG00000223972 ENST00000456328 Transcript downstream_gene_variant - - - - - rs62636367 T MODIFIER 2310 1 - SNV DDX11L1 HGNC HGNC:37102 processed_transcript YES 1 - - - - - - - Ensembl T T - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs62636367 chr1:16719 A ENSG00000227232 ENST00000488147 Transcript non_coding_transcript_exon_variant 955 - - - - rs62636367 T MODIFIER - -1 - SNV WASH7P HGNC HGNC:38034 unprocessed_pseudogene YES - - - - - - - - Ensembl T T - - - - 8/11 - - - ENST00000488147.1:n.955A>T - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs62636367 chr1:16719 A ENSG00000278267 ENST00000619216 Transcript downstream_gene_variant - - - - - rs62636367 T MODIFIER 650 -1 - SNV MIR6859-1 HGNC HGNC:50039 miRNA YES - - - - - - - - Ensembl T T - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs62636367 chr1:16719 A 653635 NR_024540.1 Transcript non_coding_transcript_exon_variant 968 - - - - rs62636367 T MODIFIER - -1 - SNV WASH7P EntrezGene HGNC:38034 transcribed_pseudogene - - - - - - - - - RefSeq T T OK - - - 8/11 - - - NR_024540.1:n.968A>T - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs62636367 chr1:16719 A 100287102 NR_046018.2 Transcript downstream_gene_variant - - - - - rs62636367 T MODIFIER 2310 1 - SNV DDX11L1 EntrezGene HGNC:37102 transcribed_pseudogene - - - - - - - - - RefSeq T T - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs62636367 chr1:16719 A 102466751 NC_106918.1 Transcript frameshift_variant - - - - - rs62636367 T MODIFIER 650 -1 - SNV MIR6859-1 EntrezGene HGNC:50039 miRNA - - - - - - - - - RefSeq T T - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs62636368 chr1:16841 T ENSG00000223972 ENST00000450305 Transcript frameshift_variant,downstream_gene_variant - - - - - rs62636368 G MODIFIER 3171 1 - SNV DDX11L1 HGNC HGNC:37102 transcribed_unprocessed_pseudogene - - - - - - - - - Ensembl G G - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs62636368 chr1:16841 T ENSG00000227232 ENST00000488147 Transcript intron_variant,frameshift_variant,non_coding_transcript_variant - - - - - rs62636368 G MODIFIER - -1 - SNV WASH7P HGNC HGNC:38034 unprocessed_pseudogene YES - - - - - - - - Ensembl G G - - - - - 7/10 - - ENST00000488147.1:n.908+17C>A - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rs373847457 chr1:139233 A ENSG00000756734 ENST00000425211 Transcript intron_variant,non_coding_transcript_variant - -- - - rs373847457 C MODIFIER - -1 - SNV LOC100996442 EntrezGene - misc_RNA - - - - - - - - - RefSeq C C - - - - - 5/6 - - XR_001737578.2:n.1135+26651G>T - - - - - - - - - - 0.26820.3372 0.1808 0.2029 0.3592 0.3082 0.3105 0.2765 0.2229 0.3592 gnomAD_EAS - - - - - - - -
rs373847457 chr1:139233 A ENSG00000754812 ENST00000643524 Transcript intron_variant - -- - - rs373847457 C MODIFIER - -1 - SNV LOC100996442 EntrezGene - misc_RNA - - - - - - - - - RefSeq C C - - - - - 5/5 - - XR_001737579.2:n.1134-18301G>T - - - - - - - - - - 0.26820.3372 0.1808 0.2029 0.3592 0.3082 0.3105 0.2765 0.2229 0.3592 gnomAD_EAS - - - - - - - -
где
list1 = refseq_list
list2 = ensembl_list
`key1` = `rs141130360` and `NC_046018.2` = `J_stuff` from list1 and `downstream_gene_variant` = `K` from `list2`
Предоставленный код @ Ajax1234 прекрасно работает на примере, который я привел первым.Но фактические данные дают ошибку:
Traceback (most recent call last):
File "readFile.py", line 71, in <module>
result = {a:find_result(b) for a, b in dictionary.items()}
File "readFile.py", line 71, in <dictcomp>
result = {a:find_result(b) for a, b in dictionary.items()}
File "readFile.py", line 64, in find_result
return sorted(_c, key=lambda x:(lambda c:(len(c) != max(_max) and 1 not in _max if _flag else 0, sum(c)))([refseq_list.index(h) for h in re.findall('[A-Z]+', x)]))[0]
File "readFile.py", line 64, in <lambda>
return sorted(_c, key=lambda x:(lambda c:(len(c) != max(_max) and 1 not in _max if _flag else 0, sum(c)))([refseq_list.index(h) for h in re.findall('[A-Z]+', x)]))[0]
File "readFile.py", line 64, in <listcomp>
return sorted(_c, key=lambda x:(lambda c:(len(c) != max(_max) and 1 not in _max if _flag else 0, sum(c)))([refseq_list.index(h) for h in re.findall('[A-Z]+', x)]))[0]
ValueError: 'C' is not in list
Редактировать 5 : Это другой подход, который я подошел близко, но я все еще не знаю, как вывести окончательный ответ:
import sys,re
import collections
dictionary = collections.defaultdict(list)
refseq = []
refseq_order = []
ensembl = []
ensembl_order = []
ensembl_more_than_one_consequence = []
refseq_dict = {'NC': 0, 'NG': 1, 'NM': 2, 'NP': 3, 'NR': 4, 'XM': 5, 'XP': 6, 'XR': 7, 'WP': 8}
refseq_list = ["NC", "NG", "NM", "NP", "NR", "XM", "XP", "XR", "WP"]
ensembl_dict = {'frameshift_variant': 0, 'missense_variant': 1, 'inframe_insertion': 2, 'inframe_deletion': 3, 'initiator_codon_variant': 4, 'stop_gained': 5, 'stop_lost': 6, 'splice_donor_variant': 7, 'splice_acceptor_variant': 8, 'splice_region_variant': 9, 'mature_miRNA_variant': 10, 'TF_binding_site_variant': 11, 'regulatory_region_variant': 12, 'TFBS_ablation': 13, 'TFBS_amplification': 14, 'regulatory_region_ablation': 15, 'regulatory_region_amplification': 16, 'coding_sequence_variant': 17, 'stop_retained_variant': 18, 'NMD_transcript_variant': 19, 'incomplete_terminal_codon_variant': 20, 'non_coding_exon_variant': 21, 'nc_transcript_variant': 22, '5_prime_UTR_variant': 23, '3_prime_UTR_variant': 24, 'upstream_gene_variant': 25, 'downstream_gene_variant': 26, 'intron_variant': 27, 'transcript_ablation': 28, 'transcript_amplification': 29, 'feature_elongation': 30, 'feature_truncation': 31, 'intergenic_variant': 32, 'synonymous_variant': 33}
ensembl_list = ["frameshift_variant", "missense_variant", "inframe_insertion", "inframe_deletion", "initiator_codon_variant", "stop_gained", "stop_lost", "splice_donor_variant", "splice_acceptor_variant", "splice_region_variant", "mature_miRNA_variant", "TF_binding_site_variant", "regulatory_region_variant", "TFBS_ablation", "TFBS_amplification", "regulatory_region_ablation", "regulatory_region_amplification", "coding_sequence_variant", "stop_retained_variant", "NMD_transcript_variant", "incomplete_terminal_codon_variant", "non_coding_exon_variant", "nc_transcript_variant", "5_prime_UTR_variant", "3_prime_UTR_variant", "upstream_gene_variant", "downstream_gene_variant", "intron_variant", "transcript_ablation", "transcript_amplification", "feature_elongation", "feature_truncation", "intergenic_variant", "synonymous_variant"]
with open("/home/test.txt") as FileObj:
for line in FileObj:
if not line.startswith("#"):
line_split = line.split("\t")
dictionary[line_split[0]].append(line)
def get_index_refseq(s):
by_tabs = s.split("\t")
by_underscore = by_tabs[4].split("_")
return refseq_dict[by_underscore[0]]
def get_index_ensembl(s):
by_tabs = s.split("\t")
return ensembl_dict[by_tabs[6]]
for value in dictionary.values():
for line in value:
line_split = line.split("\t")
if line_split[4].split("_")[0] in refseq_list:
refseq.append(line)
elif line_split[6] in ensembl_list:
ensembl.append(line)
elif len(line_split[6].split(",")) > 1:
ensembl_more_than_one_consequence.append(line)
if refseq:
refseq_order = sorted(refseq, key=get_index_refseq)
for i in refseq_order:
print(i)
выходы:
rs141130360 chr1:16495 C 653635 NC_024540.1 Transcript intron_variant,non_coding_transcript_variant - - -- - rs3210724 G MODIFIER - -1 - SNV WASH7P EntrezGene HGNC:38034 transcribed_pseudogene - - - - - - - - - RefSeq G G OK - - - -8/10 - - NR_024540.1:n.1080+112C>G - - - - - - - - - - - -- - - - - - - - - - - - - - - - -
rs62636367 chr1:16719 A 102466751 NC_106918.1 Transcript frameshift_variant - - - - -rs62636367 T MODIFIER 650 -1 - SNV MIR6859-1 EntrezGene HGNC:50039 miRNA - - -- - - - - - RefSeq T T - - - - - - - - - -- - - - - - - - - - - - - - - - - - -- - - - - - - - -
rs141130360 chr1:16495 C 102466751 NG_106918.1 Transcript downstream_gene_variant - - - - -rs3210724 G MODIFIER 874 -1 - SNV MIR6859-1 EntrezGene HGNC:50039 miRNA - - -- - - - - - RefSeq G G - - - - - - - - - -- - - - - - - - - - - - - - - - - - -- - - - - - - - -
rs141130360 chr1:16495 C 102466751 NG_106918.1 Transcript downstream_gene_variant - - - - -rs3210724 G MODIFIER 874 -1 - SNV MIR6859-1 EntrezGene HGNC:50039 miRNA - - -- - - - - - RefSeq G G - - - - - - - - - -- - - - - - - - - - - - - - - - - - -- - - - - - - - -
rs141130360 chr1:16495 C 100287102 NR_046018.2 Transcript frameshift_variant - - - - -rs3210724 G MODIFIER 2086 1 - SNV DDX11L1 EntrezGene HGNC:37102 transcribed_pseudogene - -- - - - - - - RefSeq G G - - - - - - - - -- - - - - - - - - - - - - - - - - - -- - - - - - - - - -
rs62636367 chr1:16719 A 653635 NR_024540.1 Transcript non_coding_transcript_exon_variant 968 - - -- rs62636367 T MODIFIER - -1 - SNV WASH7P EntrezGene HGNC:38034 transcribed_pseudogene- - - - - - - - - RefSeq T T OK - - - 8/11 - -- NR_024540.1:n.968A>T - - - - - - - - - - - - - - -- - - - - - - - - - - - - -
rs62636367 chr1:16719 A 100287102 NR_046018.2 Transcript downstream_gene_variant - - - - -rs62636367 T MODIFIER 2310 1 - SNV DDX11L1 EntrezGene HGNC:37102 transcribed_pseudogene - -- - - - - - - RefSeq T T - - - - - - - - -- - - - - - - - - - - - - - - - - - -- - - - - - - - - -
, но я не знаю, как выбрать только первые value
, которые приходят для каждого key
Редактировать6 : Согласно запросу Ajax1234, это словарь ввода из моего кода выше.
dictionary = {'rs141130360': ['rs141130360\tchr1:16495\tC\tENSG00000223972\tENST00000450305\tTranscript\tdownstream_gene_variant\t-\t-\t-\t-\t-\trs3210724\tG\tMODIFIER\t2825\t1\t-\tSNV\tDDX11L1\tHGNC\tHGNC:37102\ttranscribed_unprocessed_pseudogene\t-\t-\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tG\tG\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs141130360\tchr1:16495\tC\tENSG00000223972\tENST00000456328\tTranscript\tdownstream_gene_variant\t-\t-\t-\t-\t-\trs3210724\tG\tMODIFIER\t2086\t1\t-\tSNV\tDDX11L1\tHGNC\tHGNC:37102\tprocessed_transcript\tYES\t1\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tG\tG\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs141130360\tchr1:16495\tC\tENSG00000227232\tENST00000488147\tTranscript\tintron_variant,non_coding_transcript_variant\t-\t-\t-\t-\t-\trs3210724\tG\tMODIFIER\t-\t-1\t-\tSNV\tWASH7P\tHGNC\tHGNC:38034\tunprocessed_pseudogene\tYES\t-\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tG\tG\t-\t-\t-\t-\t-\t8/10\t-\t-\tENST00000488147.1:n.1067+112C>G\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs141130360\tchr1:16495\tC\tENSG00000278267\tENST00000619216\tTranscript\tdownstream_gene_variant\t-\t-\t-\t-\t-\trs3210724\tG\tMODIFIER\t874\t-1\t-\tSNV\tMIR6859-1\tHGNC\tHGNC:50039\tmiRNA\tYES\t-\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tG\tG\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs141130360\tchr1:16495\tC\t653635\tNC_024540.1\tTranscript\tintron_variant,non_coding_transcript_variant\t-\t-\t-\t-\t-\trs3210724\tG\tMODIFIER\t-\t-1\t-\tSNV\tWASH7P\tEntrezGene\tHGNC:38034\ttranscribed_pseudogene\t-\t-\t-\t-\t-\t-\t-\t-\t-\tRefSeq\tG\tG\tOK\t-\t-\t-\t-\t8/10\t-\t-\tNR_024540.1:n.1080+112C>G\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs141130360\tchr1:16495\tC\t100287102\tNR_046018.2\tTranscript\tframeshift_variant\t-\t-\t-\t-\t-\trs3210724\tG\tMODIFIER\t2086\t1\t-\tSNV\tDDX11L1\tEntrezGene\tHGNC:37102\ttranscribed_pseudogene\t-\t-\t-\t-\t-\t-\t-\t-\t-\tRefSeq\tG\tG\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs141130360\tchr1:16495\tC\t102466751\tNG_106918.1\tTranscript\tdownstream_gene_variant\t-\t-\t-\t-\t-\trs3210724\tG\tMODIFIER\t874\t-1\t-\tSNV\tMIR6859-1\tEntrezGene\tHGNC:50039\tmiRNA\t-\t-\t-\t-\t-\t-\t-\t-\t-\tRefSeq\tG\tG\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs141130360\tchr1:16495\tC\t102466751\tNG_106918.1\tTranscript\tdownstream_gene_variant\t-\t-\t-\t-\t-\trs3210724\tG\tMODIFIER\t874\t-1\t-\tSNV\tMIR6859-1\tEntrezGene\tHGNC:50039\tmiRNA\t-\t-\t-\t-\t-\t-\t-\t-\t-\tRefSeq\tG\tG\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n'], 'rs62636367': ['rs62636367\tchr1:16719\tA\tENSG00000223972\tENST00000450305\tTranscript\tdownstream_gene_variant\t-\t-\t-\t-\t-\trs62636367\tT\tMODIFIER\t3049\t1\t-\tSNV\tDDX11L1\tHGNC\tHGNC:37102\ttranscribed_unprocessed_pseudogene\t-\t-\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tT\tT\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs62636367\tchr1:16719\tA\tENSG00000223972\tENST00000456328\tTranscript\tdownstream_gene_variant\t-\t-\t-\t-\t-\trs62636367\tT\tMODIFIER\t2310\t1\t-\tSNV\tDDX11L1\tHGNC\tHGNC:37102\tprocessed_transcript\tYES\t1\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tT\tT\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs62636367\tchr1:16719\tA\tENSG00000227232\tENST00000488147\tTranscript\tnon_coding_transcript_exon_variant\t955\t-\t-\t-\t-\trs62636367\tT\tMODIFIER\t-\t-1\t-\tSNV\tWASH7P\tHGNC\tHGNC:38034\tunprocessed_pseudogene\tYES\t-\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tT\tT\t-\t-\t-\t-\t8/11\t-\t-\t-\tENST00000488147.1:n.955A>T\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs62636367\tchr1:16719\tA\tENSG00000278267\tENST00000619216\tTranscript\tdownstream_gene_variant\t-\t-\t-\t-\t-\trs62636367\tT\tMODIFIER\t650\t-1\t-\tSNV\tMIR6859-1\tHGNC\tHGNC:50039\tmiRNA\tYES\t-\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tT\tT\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs62636367\tchr1:16719\tA\t653635\tNR_024540.1\tTranscript\tnon_coding_transcript_exon_variant\t968\t-\t-\t-\t-\trs62636367\tT\tMODIFIER\t-\t-1\t-\tSNV\tWASH7P\tEntrezGene\tHGNC:38034\ttranscribed_pseudogene\t-\t-\t-\t-\t-\t-\t-\t-\t-\tRefSeq\tT\tT\tOK\t-\t-\t-\t8/11\t-\t-\t-\tNR_024540.1:n.968A>T\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs62636367\tchr1:16719\tA\t100287102\tNR_046018.2\tTranscript\tdownstream_gene_variant\t-\t-\t-\t-\t-\trs62636367\tT\tMODIFIER\t2310\t1\t-\tSNV\tDDX11L1\tEntrezGene\tHGNC:37102\ttranscribed_pseudogene\t-\t-\t-\t-\t-\t-\t-\t-\t-\tRefSeq\tT\tT\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs62636367\tchr1:16719\tA\t102466751\tNC_106918.1\tTranscript\tframeshift_variant\t-\t-\t-\t-\t-\trs62636367\tT\tMODIFIER\t650\t-1\t-\tSNV\tMIR6859-1\tEntrezGene\tHGNC:50039\tmiRNA\t-\t-\t-\t-\t-\t-\t-\t-\t-\tRefSeq\tT\tT\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n'], 'rs62636368': ['rs62636368\tchr1:16841\tT\tENSG00000223972\tENST00000450305\tTranscript\tframeshift_variant,downstream_gene_variant\t-\t-\t-\t-\t-\trs62636368\tG\tMODIFIER\t3171\t1\t-\tSNV\tDDX11L1\tHGNC\tHGNC:37102\ttranscribed_unprocessed_pseudogene\t-\t-\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tG\tG\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs62636368\tchr1:16841\tT\tENSG00000227232\tENST00000488147\tTranscript\tintron_variant,frameshift_variant,non_coding_transcript_variant\t-\t-\t-\t-\t-\trs62636368\tG\tMODIFIER\t-\t-1\t-\tSNV\tWASH7P\tHGNC\tHGNC:38034\tunprocessed_pseudogene\tYES\t-\t-\t-\t-\t-\t-\t-\t-\tEnsembl\tG\tG\t-\t-\t-\t-\t-\t7/10\t-\t-\tENST00000488147.1:n.908+17C>A\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\n'], 'rs373847457': ['rs373847457\tchr1:139233\tA\tENSG00000756734\tENST00000425211\tTranscript\tintron_variant,non_coding_transcript_variant\t-\t--\t-\t-\trs373847457\tC\tMODIFIER\t-\t-1\t-\tSNV\tLOC100996442\tEntrezGene\t-\tmisc_RNA\t-\t-\t-\t-\t-\t-\t-\t-\t-\tRefSeq\tC\tC\t-\t-\t-\t-\t-\t5/6\t-\t-\tXR_001737578.2:n.1135+26651G>T\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t0.26820.3372\t0.1808\t0.2029\t0.3592\t0.3082\t0.3105\t0.2765\t0.2229\t0.3592\tgnomAD_EAS\t-\t-\t-\t-\t-\t-\t-\t-\n', 'rs373847457\tchr1:139233\tA\tENSG00000754812\tENST00000643524\tTranscript\tintron_variant\t-\t--\t-\t-\trs373847457\tC\tMODIFIER\t-\t-1\t-\tSNV\tLOC100996442\tEntrezGene\t-\tmisc_RNA\t-\t-\t-\t-\t-\t-\t-\t-\t-\tRefSeq\tC\tC\t-\t-\t-\t-\t-\t5/5\t-\t-\tXR_001737579.2:n.1134-18301G>T\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t0.26820.3372\t0.1808\t0.2029\t0.3592\t0.3082\t0.3105\t0.2765\t0.2229\t0.3592\tgnomAD_EAS\t-\t-\t-\t-\t-\t-\t-\t-']}