Цитаты публикаций: Как я могу добавить, как запрашивать определенное количество ссылок c и EOF Detection? - PullRequest
0 голосов
/ 23 марта 2020

Я работаю над этим проектом, чтобы напечатать цитаты всех публикаций, используя JSON, XML и urllib. Я застрял на двух вещах:

  1. Чтобы вставить в программу часть, чтобы попросить пользователей ввести, сколько ссылок на печать
  2. Вставить функцию обнаружения EOF Может кто-нибудь помочь мне ? Я действительно потерян и нуждаюсь в помощи. Ниже приведен мой код.

Код:

#First, import all necessary programs such as JSON and XML
import json
import urllib.request
import urllib
import ssl
import xml.etree.ElementTree as ET

#Second, create means to verify unverified https context
try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
#Reminder: Legacy Python that doesn’t verify HTTPS certificates by default
        pass
else:
#Handle target environment that doesn’t support HTTPS verification
    ssl._create_default_https_context = _create_unverified_https_context

#Third, define all months
def convertNumToMonth(month):
    month
    if (month ==  '1'):
        month = 'Jan'
    elif (month ==  '2'):
        month = 'Feb'
    elif (month ==  '3'):
        month = 'Mar'
    elif (month ==  '4'):
        month = 'Apr'
    elif (month ==  '5'):
        month = 'May'
    elif (month ==  '6'):
        month = 'Jun'
    elif (month ==  '7'):
        month = 'Jul'
    elif (month ==  '8'):
        month = 'Aug'
    elif (month ==  '9'):
        month = 'Sept'
    elif (month ==  '10'):
        month = 'Oct'
    elif (month ==  '11'):
        month = 'Nov'
    elif (month ==  '12'):
        month = 'Dec'
    return month

#Fourth, define inputs in order to take requested URL, publication size, and 
#keyword of publications
url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&retmode=json&retmax=25&sort=relevance&term='
#Retmax=# will show the total amounts of publications that can be displayed.
#This value can be adjusted. 25 was placed to keep a limit on results.
keyword = input("Please enter a keyword: ")
pageFound = 0
try:
    page = urllib.request.urlopen(url + keyword)
    pageFound = 1
except urllib.error.HTTPError as e:
    print(e) 
    print("Initial request failed")

while(pageFound == 0):
    print("Sorry your request failed")
    keyword = input("Please enter a keyword: ")
    try:
        page = urllib.request.urlopen(url + keyword)
        pageFound = 1
    except urllib.error.HTTPError as e:
        print(e) 
        print("Subsequent request failed")

jsonData  = json.loads(page.read().decode('utf-8'))

#Fifth, check requested result size. Define idList. 
idList = jsonData['esearchresult']['idlist']
if (len(idList) == 0):
    print("Sorry no results were returned")
for x in range(0, len(idList)):
    print(' ')    
    url = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&id=' + idList[x]
    page = urllib.request.urlopen(url)
    pageDecoded = page.read().decode('utf-8')
    root = ET.fromstring(pageDecoded)
#Sixth, print the bibliography
    print('Bibliography: ')
    biblio = " "

#Seventh, define authors, how to count authors, and last name/first name through a
#for loop. Then add it to the bibliography. 
    authorsIter = root.iter(tag='Author') 
    count = sum(1 for _ in authorsIter) 
    authorsIter = root.iter(tag='Author')
    for author in authorsIter:    
        if(author.attrib['ValidYN'] == 'Y'):   
            for child in author:
                if(child.tag == 'Initials'):
                    initials = child.text
                if(child.tag == 'LastName'):
                    lastName = child.text        
            if(count > 1):
                biblio += initials + ". " + lastName + ", "
                count = count - 1
            else:
                biblio += "and " +  initials + ". " + lastName + ", "

#Eighth, using a for loop, find the article title and add it to the bibliography
    articleTitleIter = root.iter(tag = 'ArticleTitle')
    for articleTitle in articleTitleIter:
        biblio += ' " ' + articleTitle.text + ', " '

#Ninth, gather  journal information (title, volume, etc), create for loop to determine if all
#these features are available      
    title = ' '
    volume = ' '
    issue = ' '
    year = ' '
    month = ' '
    day = ' '
    pages = ' '
    pii = ' '
    doi = ' '
    articleIter = root.iter(tag='Article')
    for o in articleIter:
        for n in o:
            if(n.tag ==  'Journal'):        
                for m in n:
                    if(m.tag ==  'JournalIssue'):
                        for l in m:
                            if(l.tag ==  'Volume'):
                                volume = l.text
                            elif(l.tag ==  'Issue'):
                                issue = l.text
                            elif(l.tag ==  'PubDate'):
                                for k in l:
                                    if(k.tag ==  'Year'):
                                        year = k.text
                                    elif(k.tag ==  'Month'):
                                        month = k.text
                                    elif(k.tag ==  'Day'):
                                        day = k.text
                    elif(m.tag == 'Title'):
                        title = m.text
            elif(n.tag ==  'Pagination'):
                for m in n: 
                    if(m.tag ==  'MedlinePgn'):
                        pages = m.text
            elif(n.tag ==  'ELocationID'):
                if(n.attrib['EIdType'] == 'pii'):
                    pii = n.text
                if(n.attrib['EIdType'] == 'doi'):
                    doi = n.text           

#Tenth, add journal info (such as title, volume, etc) to the bibliography if it is present,
#check if the month needs any kind of conversion, and then print the bibliography                  
    biblio += title + ' '
    if((volume != '') and (issue != '')):
        biblio += volume + '(' + issue + ') '
    elif(volume != ''):
        biblio += volume + ' '
    if(pages != ''):
        biblio += 'pp. ' + pages + ' '
    biblio += '('
    if(month != ''):
        convertedMonth = convertNumToMonth(month) 
        biblio += month + '. '
    if(day != ''):
        biblio += day + ' '
    if(year != ''):
        biblio += year + '). '
    biblio += 'PUBMED: ' + idList[x] + '; '
    if(doi != ''):
        biblio += 'DOI ' + doi
    elif(pii != ''):
        biblio += 'PII ' + pii
    print(biblio)
    print('')

#Lastly, print the abstract
    print('Abstract:')     
    abstract = ''
    abstractTextIter = root.iter(tag= 'AbstractText')
    for a in abstractTextIter:
        abstract += a.text
    print(abstract)
# This is the end of the for loop. Have fun.
...