Ошибка поиска многих IMDB заголовки (темы) с IMDBPY - PullRequest
0 голосов
/ 24 февраля 2020

У меня есть программа, создающая n потоков, и каждый поток ищет заголовок Imdb для каждого элемента в списке.

Если я ищу несколько названий, программа работает.

Если я пытаюсь найти пример> 50 названий Я получаю много ошибок, подобных этим:

Traceback (most recent call last):
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\__init__.py", line 732, in up
date
    ret = method(mopID)
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\parser\http\__init__.py", lin
e 439, in get_movie_main
    cont = self._retrieve(self.urls['movie_main'] % movieID + 'reference')
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\parser\http\__init__.py", lin
e 376, in _retrieve
    ret = self.urlOpener.retrieve_unicode(url, size=size)
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\parser\http\__init__.py", lin
e 253, in retrieve_unicode
    'original exception': e}
imdb._exceptions.IMDbDataAccessError: {'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt10267396/
reference', 'proxy': 'yyyy', 'exception type': 'IOError', 'original exception'
: <HTTPError 503: 'Service Unavailable'>}
2020-02-24 16:12:05,178 CRITICAL [imdbpy] C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\
imdb\_exceptions.py:34: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https:/
/www.imdb.com/title/tt2402153/reference', 'proxy': 'yyyy', 'exception type': '
IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\parser\http\__init__.py", lin
e 219, in retrieve_unicode
    response = uopener.open(url)
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 531, in open
    response = meth(req, response)
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 569, in error
    return self._call_chain(*args)
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 503, in _call_chain
    result = func(*args)
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 649, in http_error_defa
ult
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Service Unavailable
2020-02-24 16:12:05,178 CRITICAL [imdbpy] C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\
imdb\__init__.py:737: caught an exception retrieving or parsing "main" info set for mopID "2402153" (accessSystem: http)

Traceback (most recent call last):
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\parser\http\__init__.py", lin
e 219, in retrieve_unicode
    response = uopener.open(url)
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 531, in open
    response = meth(req, response)
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 569, in error
    return self._call_chain(*args)
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 503, in _call_chain
    result = func(*args)
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 649, in http_error_defa
ult
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Service Unavailable

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\__init__.py", line 732, in up
date
    ret = method(mopID)
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\parser\http\__init__.py", lin
e 439, in get_movie_main
    cont = self._retrieve(self.urls['movie_main'] % movieID + 'reference')
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\parser\http\__init__.py", lin
e 376, in _retrieve
    ret = self.urlOpener.retrieve_unicode(url, size=size)
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\parser\http\__init__.py", lin
e 253, in retrieve_unicode
    'original exception': e}
imdb._exceptions.IMDbDataAccessError: {'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt2402153/r
eference', 'proxy': 'yyyy', 'exception type': 'IOError', 'original exception':
 <HTTPError 503: 'Service Unavailable'>}
2020-02-24 16:12:05,248 CRITICAL [imdbpy] C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\
imdb\_exceptions.py:34: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https:/
/www.imdb.com/title/tt9193612/reference', 'proxy': 'yyyy', 'exception type': '
IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\parser\http\__init__.py", lin
e 219, in retrieve_unicode
    response = uopener.open(url)
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 531, in open
    response = meth(req, response)
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 641, in http_response
    'http', request, response, code, msg, hdrs)
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 569, in error
    return self._call_chain(*args)
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 503, in _call_chain
    result = func(*args)
  File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 649, in http_error_defa
ult
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Service Unavailable
2020-02-24 16:12:05,258 CRITICAL [imdbpy] C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\
imdb\__init__.py:737: caught an exception retrieving or parsing "main" info set for mopID "9193612" (accessSystem: http)

есть ли предел для поиска? Я пробовал с прокси / без прокси.

Это служебный класс, который я использую в своей программе:

import imdb
from setup_logger import logger
class ImdbUtility(): 

    # __init__ function 
    def __init__(self, proxyEnabled = False):         
      try:
        self.titolo     = None;
        self.anno       = None;
        self.voto       = None;
        self.numeroVoti = None;

        if proxyEnabled:
          self._imdb = imdb.IMDb(accessSystem='https');
          self._imdb.set_proxy('xx:xxx@yyyyyy');
        else:
          self._imdb = imdb.IMDb();   

      except:
              logger.error('Errore ImdbUtility  __init__ -'+ titolo +" - "+sys.exc_info()[0]);         

    def __repr__(self): 
        return "TITOLO --> " + self.titolo + " - "+"ANNO --> "+self.anno+ " - "+"VOTO --> "+str(self.voto)+str(self.numeroVoti)+"\n";

    def __str__(self): 
        return "TITOLO --> " + self.titolo + " - "+"ANNO --> "+self.anno+ " - "+"VOTO --> "+str(self.voto)+str(self.numeroVoti)+"\n";


    def getFilm(self,titolo,anno):            
            try:           
              movies = self._imdb.search_movie(titolo);

              #Aggiorno le info dei film trovati
              for x in movies:
                self._imdb.update(x,['main', 'vote details']);


              #cancello i film con anno diverso

              movies[:] = [x for x in movies if x.get('year') and x.get('rating') and x.get('votes') and int(x.get('year')) == anno and int(x.get('rating')) >= 7 and int(x.get('votes')) > 5000 ];

            except:
              logger.error('Errore ImdbUtility getFilm -'+ titolo +" - "+sys.exc_info()[0]);              

            return movies;
...