Наконец, пройдя вкладку сети веб-сайта, я обнаружил, как веб-сайт работает. По сути, за кулисами он выполняет четыре разных вызова API. Это следующие:
- https://asia.ensembl.org/Multi/Ajax/search?q=name%3A%22CXCR4%22&rows=200&fq=feature_type%3AGene+AND+database_type%3Acore&facet.field=species&facet.mincount=1&facet=true
- https://asia.ensembl.org/Multi/Ajax/search?q= (+ НЕ + разновидности% 3Axxx +) + AND + (+ CXCR4 +) + И + (+ НЕ + разновидность% 3Ayyy +) & fq = & rows = 1 & facet.field = разновидность & facet.field = feature_type & facet.field = stretch & facet.mincount = 1 & facet = true & facet.limit = -1
- https://asia.ensembl.org/Multi/Ajax/search?q= (+ CXCR4% 5E316 + AND + виды% 3A% 22 CrossSpecies% 22 +) + OR + (+ CXCR4% 5E190 + AND + виды% 3A% 22Human% 22 +) + OR + (+ CXCR4% 5E80 + AND + виды% 3A% 22Мышь% 22 +) + OR + (+ CXCR4 + AND + разновидность% 3A% 22Zebrafish% 22 +) & fq = (++ (++ разновидность% 3A% 22CrossSpecies% 22 + AND + (+ reference_strain% 3A1 +) ++) ++ OR ++ (++ виды% 3A% 22Человек% 22 + AND + (+ эталонная_ разновидность% 3A1 +) ++) ++ OR ++ (++ разновидность% 3A% 22Mouse% 22 + AND + (+ эталонная_ разновидность% 3A1 +) ++) ++ ИЛИ ++ (++ разновидностей% 3A% 22Zebrafish% 22 + AND + (+ reference_strain% 3A1 +) ++) ++) & hl = true & hl.fl = _hr & hl.fl = content & hl.fl = description & hl.fragsize = 500 & rows = 10 & start = 0
введите описание изображения здесь
Следовательно, результаты являются объединенными результатами четырех вышеуказанных вызовов API, и они отображаются на разных страницах веб-сайта.
import requests
res = requests.get("https://asia.ensembl.org/Multi/Ajax/search?q=(+CXCR4%5E316+AND+species%3A%22CrossSpecies%22+)+OR+(+CXCR4%5E190+AND+species%3A%22Human%22+)+OR+(+CXCR4%5E80+AND+species%3A%22Mouse%22+)+OR+(+CXCR4+AND+species%3A%22Zebrafish%22+)&fq=(++(++species%3A%22CrossSpecies%22+AND+(+reference_strain%3A1+)++)++OR++(++species%3A%22Human%22+AND+(+reference_strain%3A1+)++)++OR++(++species%3A%22Mouse%22+AND+(+reference_strain%3A1+)++)++OR++(++species%3A%22Zebrafish%22+AND+(+reference_strain%3A1+)++)++)&hl=true&hl.fl=_hr&hl.fl=content&hl.fl=description&hl.fragsize=500&rows=10&start=0", verify=False)
result = res.json()
print(result)
Примечание *: не забудьте использовать verify=False
в вызове запросов, иначе он выдаст SSLException
Вывод:
{'error': '',
'result': {'highlighting': {'1d3be01c-f969-40de-a1f8-bfd5bbf40fc1': {},
'5b2accd3-cfef-4e2a-9d9c-2e70752e4a68': {'_hr': ['<strong><em>Cxcr4</em></strong>-001 (Vega transcript) is an external reference matched to Transcript ENSMUST00000052172']},
'd2f9e02b-f3f3-4823-9e39-3f727a265acb': {'_hr': ['GO:0031723 (GO record; description: <strong><em>CXCR4</em></strong> chemokine receptor binding,) is an external reference matched to Transcript ENST00000291526']},
'b66c389f-ade7-4bc6-bcd6-b7011e7bc10e': {'_hr': ['LRG_51t1 (LRG display in Ensembl transcript record; description: Locus Reference Genomic record for <strong><em>CXCR4</em></strong>) is an external reference matched to Transcript ENST00000409817']},
'dc70ef4d-7627-49d3-bfe8-f7e0c5fde994': {'_hr': ['<strong><em>Cxcr4</em></strong>-002 (Vega transcript) is an external reference matched to Transcript ENSMUST00000142893']},
'e7d394ec-fd37-4cc2-8a5c-81482299c695': {},
'8a02b397-ad39-420e-a4ed-89b709d4a3f5': {},
'2d5880cc-d9f6-4fec-a154-ce9b7ba3c590': {'_hr': ['LRG_51t1 (LRG display in Ensembl transcript record; description: Locus Reference Genomic record for <strong><em>CXCR4</em></strong>) is an external reference matched to Transcript ENST00000241393']},
'7f406926-0470-4c70-b8c7-f2bd8228be08': {'_hr': ['<strong><em>Cxcr4</em></strong>-001 (Vega transcript) is an external reference matched to Transcript ENSMUST00000052172']},
'cf47bc6b-6bd0-4690-a0ac-8feed5a5a112': {'_hr': ['LRG_51 (LRG display in Ensembl gene record; description: Locus Reference Genomic record for <strong><em>CXCR4</em></strong>,) is an external reference matched to Gene ENSG00000121966']}},
'responseHeader': {'QTime': 37,
'params': {'fq': '( ( species:"CrossSpecies" AND ( reference_strain:1 ) ) OR ( species:"Human" AND ( reference_strain:1 ) ) OR ( species:"Mouse" AND ( reference_strain:1 ) ) OR ( species:"Zebrafish" AND ( reference_strain:1 ) ) )',
'hl.fragsize': '500',
'hl.fl': ['_hr', 'content', 'description'],
'q': '( CXCR4^316 AND species:"CrossSpecies" ) OR ( CXCR4^190 AND species:"Human" ) OR ( CXCR4^80 AND species:"Mouse" ) OR ( CXCR4 AND species:"Zebrafish" )',
'hl': 'true',
'wt': 'json',
'start': ['0', '0'],
'rows': '10'},
'status': 0},
'response': {'numFound': 24,
'docs': [{'domain_url': 'homo_sapiens/Gene/Summary?g=ENSG00000121966&db=core',
'name': 'CXCR4',
'species': 'Human',
'ref_boost': 10,
'location': '2:136114349-136118149:-1',
'quick_links': ['orthologues:1'],
'db_boost': 40,
'website': 'http://www.ensembl.org',
'reference_strain': 1,
'id': 'ENSG00000121966',
'domain': 'http://www.ensembl.org',
'uid': 'cf47bc6b-6bd0-4690-a0ac-8feed5a5a112',
'feature_type': 'Gene',
'description': 'C-X-C motif chemokine receptor 4 [Source:HGNC Symbol;Acc:HGNC:2561]',
'score': 3.3581953,
'database_type': 'core'},
{'feature_type': 'Transcript',
'score': 2.238805,
'database_type': 'core',
'description': 'C-X-C motif chemokine receptor 4 [Source:HGNC Symbol;Acc:HGNC:2561]',
'reference_strain': 1,
'website': 'http://www.ensembl.org',
'db_boost': 40,
'uid': '2d5880cc-d9f6-4fec-a154-ce9b7ba3c590',
'domain': 'http://www.ensembl.org',
'id': 'ENST00000241393',
'name': 'CXCR4-201',
'location': '2:136114349-136118149:-1',
'quick_links': ['protein:1'],
'ref_boost': 10,
'species': 'Human',
'domain_url': 'homo_sapiens/Transcript/Summary?t=ENST00000241393&db=core'},
{'feature_type': 'Transcript',
'description': 'C-X-C motif chemokine receptor 4 [Source:HGNC Symbol;Acc:HGNC:2561]',
'database_type': 'core',
'score': 2.238805,
'website': 'http://www.ensembl.org',
'db_boost': 40,
'reference_strain': 1,
'domain': 'http://www.ensembl.org',
'id': 'ENST00000409817',
'uid': 'b66c389f-ade7-4bc6-bcd6-b7011e7bc10e',
'name': 'CXCR4-202',
'location': '2:136114349-136116243:-1',
'quick_links': ['protein:1'],
'species': 'Human',
'ref_boost': 10,
'domain_url': 'homo_sapiens/Transcript/Summary?t=ENST00000409817&db=core'},
{'name': 'CXCR4-203',
'quick_links': ['protein:0'],
'location': '2:136114637-136117737:-1',
'species': 'Human',
'ref_boost': 10,
'domain_url': 'homo_sapiens/Transcript/Summary?t=ENST00000466288&db=core',
'feature_type': 'Transcript',
'description': 'C-X-C motif chemokine receptor 4 [Source:HGNC Symbol;Acc:HGNC:2561]',
'database_type': 'core',
'score': 2.238805,
'website': 'http://www.ensembl.org',
'db_boost': 40,
'reference_strain': 1,
'domain': 'http://www.ensembl.org',
'id': 'ENST00000466288',
'uid': '1d3be01c-f969-40de-a1f8-bfd5bbf40fc1'},
{'domain_url': 'mus_musculus/Gene/Summary?g=ENSMUSG00000045382&db=core',
'strain': 'Mouse reference (CL57BL6)',
'name': 'Cxcr4',
'ref_boost': 10,
'species': 'Mouse',
'quick_links': ['orthologues:1'],
'location': '1:128588199-128592293:-1',
'db_boost': 40,
'website': 'http://www.ensembl.org',
'reference_strain': 1,
'id': 'ENSMUSG00000045382',
'domain': 'http://www.ensembl.org',
'uid': '5b2accd3-cfef-4e2a-9d9c-2e70752e4a68',
'feature_type': 'Gene',
'description': 'chemokine (C-X-C motif) receptor 4 [Source:MGI Symbol;Acc:MGI:109563]',
'score': 1.4139885,
'database_type': 'core'},
{'location': '1:128588199-128592290:-1',
'quick_links': ['protein:1'],
'species': 'Mouse',
'ref_boost': 10,
'name': 'Cxcr4-201',
'strain': 'Mouse reference (CL57BL6)',
'domain_url': 'mus_musculus/Transcript/Summary?t=ENSMUST00000052172&db=core',
'score': 0.9426663,
'database_type': 'core',
'description': 'chemokine (C-X-C motif) receptor 4 [Source:MGI Symbol;Acc:MGI:109563]',
'feature_type': 'Transcript',
'uid': '7f406926-0470-4c70-b8c7-f2bd8228be08',
'domain': 'http://www.ensembl.org',
'id': 'ENSMUST00000052172',
'reference_strain': 1,
'website': 'http://www.ensembl.org',
'db_boost': 40},
{'reference_strain': 1,
'website': 'http://www.ensembl.org',
'db_boost': 40,
'uid': 'dc70ef4d-7627-49d3-bfe8-f7e0c5fde994',
'domain': 'http://www.ensembl.org',
'id': 'ENSMUST00000142893',
'feature_type': 'Transcript',
'score': 0.9426663,
'database_type': 'core',
'description': 'chemokine (C-X-C motif) receptor 4 [Source:MGI Symbol;Acc:MGI:109563]',
'domain_url': 'mus_musculus/Transcript/Summary?t=ENSMUST00000142893&db=core',
'strain': 'Mouse reference (CL57BL6)',
'name': 'Cxcr4-202',
'location': '1:128589099-128592293:-1',
'quick_links': ['protein:1'],
'species': 'Mouse',
'ref_boost': 10},
{'reference_strain': 1,
'website': 'http://www.ensembl.org',
'uid': 'e7d394ec-fd37-4cc2-8a5c-81482299c695',
'id': 'Cxcr4',
'domain': 'http://www.ensembl.org',
'feature_type': 'Marker',
'database_type': 'core',
'score': 0.01179975,
'domain_url': 'mus_musculus/Marker/Details?m=Cxcr4',
'strain': 'Mouse reference (CL57BL6)',
'species': 'Mouse'},
{'domain_url': 'homo_sapiens/Gene/Summary?g=ENSG00000160181&db=core',
'ref_boost': 10,
'species': 'Human',
'quick_links': ['orthologues:1'],
'location': '21:42346357-42350997:-1',
'name': 'TFF2',
'id': 'ENSG00000160181',
'domain': 'http://www.ensembl.org',
'uid': 'd2f9e02b-f3f3-4823-9e39-3f727a265acb',
'db_boost': 40,
'website': 'http://www.ensembl.org',
'reference_strain': 1,
'description': 'trefoil factor 2 [Source:HGNC Symbol;Acc:HGNC:11756]',
'database_type': 'core',
'score': 0.0072125974,
'feature_type': 'Gene'},
{'feature_type': 'Protein Family',
'description': 'Ensembl protein family PTHR24227 [C C CHEMOKINE RECEPTOR TYPE C C CKR CC CKR CCR ANTIGEN]: 27 genes / 77 proteins in homo sapiens',
'score': 0.004816582,
'database_type': 'core',
'website': 'http://www.ensembl.org',
'reference_strain': 1,
'domain': 'http://www.ensembl.org',
'id': 'PTHR24227',
'uid': '8a02b397-ad39-420e-a4ed-89b709d4a3f5',
'name': 'PTHR24227',
'species': 'Human',
'domain_url': 'homo_sapiens/Gene/Family?family=PTHR24227;g=ENSG00000163464'}],
'start': 0,
'maxScore': 3.3581953}}}