У меня есть программа, создающая n потоков, и каждый поток ищет заголовок Imdb для каждого элемента в списке.
Если я ищу несколько названий, программа работает.
Если я пытаюсь найти пример> 50 названий Я получаю много ошибок, подобных этим:
Traceback (most recent call last):
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\__init__.py", line 732, in up
date
ret = method(mopID)
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\parser\http\__init__.py", lin
e 439, in get_movie_main
cont = self._retrieve(self.urls['movie_main'] % movieID + 'reference')
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\parser\http\__init__.py", lin
e 376, in _retrieve
ret = self.urlOpener.retrieve_unicode(url, size=size)
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\parser\http\__init__.py", lin
e 253, in retrieve_unicode
'original exception': e}
imdb._exceptions.IMDbDataAccessError: {'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt10267396/
reference', 'proxy': 'yyyy', 'exception type': 'IOError', 'original exception'
: <HTTPError 503: 'Service Unavailable'>}
2020-02-24 16:12:05,178 CRITICAL [imdbpy] C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\
imdb\_exceptions.py:34: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https:/
/www.imdb.com/title/tt2402153/reference', 'proxy': 'yyyy', 'exception type': '
IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\parser\http\__init__.py", lin
e 219, in retrieve_unicode
response = uopener.open(url)
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 531, in open
response = meth(req, response)
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 641, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 569, in error
return self._call_chain(*args)
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 503, in _call_chain
result = func(*args)
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 649, in http_error_defa
ult
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Service Unavailable
2020-02-24 16:12:05,178 CRITICAL [imdbpy] C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\
imdb\__init__.py:737: caught an exception retrieving or parsing "main" info set for mopID "2402153" (accessSystem: http)
Traceback (most recent call last):
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\parser\http\__init__.py", lin
e 219, in retrieve_unicode
response = uopener.open(url)
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 531, in open
response = meth(req, response)
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 641, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 569, in error
return self._call_chain(*args)
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 503, in _call_chain
result = func(*args)
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 649, in http_error_defa
ult
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Service Unavailable
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\__init__.py", line 732, in up
date
ret = method(mopID)
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\parser\http\__init__.py", lin
e 439, in get_movie_main
cont = self._retrieve(self.urls['movie_main'] % movieID + 'reference')
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\parser\http\__init__.py", lin
e 376, in _retrieve
ret = self.urlOpener.retrieve_unicode(url, size=size)
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\parser\http\__init__.py", lin
e 253, in retrieve_unicode
'original exception': e}
imdb._exceptions.IMDbDataAccessError: {'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt2402153/r
eference', 'proxy': 'yyyy', 'exception type': 'IOError', 'original exception':
<HTTPError 503: 'Service Unavailable'>}
2020-02-24 16:12:05,248 CRITICAL [imdbpy] C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\
imdb\_exceptions.py:34: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https:/
/www.imdb.com/title/tt9193612/reference', 'proxy': 'yyyy', 'exception type': '
IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\imdb\parser\http\__init__.py", lin
e 219, in retrieve_unicode
response = uopener.open(url)
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 531, in open
response = meth(req, response)
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 641, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 569, in error
return self._call_chain(*args)
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 503, in _call_chain
result = func(*args)
File "C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\urllib\request.py", line 649, in http_error_defa
ult
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Service Unavailable
2020-02-24 16:12:05,258 CRITICAL [imdbpy] C:\Users\xx\AppData\Local\Programs\Python\Python37\lib\site-packages\
imdb\__init__.py:737: caught an exception retrieving or parsing "main" info set for mopID "9193612" (accessSystem: http)
есть ли предел для поиска? Я пробовал с прокси / без прокси.
Это служебный класс, который я использую в своей программе:
import imdb
from setup_logger import logger
class ImdbUtility():
# __init__ function
def __init__(self, proxyEnabled = False):
try:
self.titolo = None;
self.anno = None;
self.voto = None;
self.numeroVoti = None;
if proxyEnabled:
self._imdb = imdb.IMDb(accessSystem='https');
self._imdb.set_proxy('xx:xxx@yyyyyy');
else:
self._imdb = imdb.IMDb();
except:
logger.error('Errore ImdbUtility __init__ -'+ titolo +" - "+sys.exc_info()[0]);
def __repr__(self):
return "TITOLO --> " + self.titolo + " - "+"ANNO --> "+self.anno+ " - "+"VOTO --> "+str(self.voto)+str(self.numeroVoti)+"\n";
def __str__(self):
return "TITOLO --> " + self.titolo + " - "+"ANNO --> "+self.anno+ " - "+"VOTO --> "+str(self.voto)+str(self.numeroVoti)+"\n";
def getFilm(self,titolo,anno):
try:
movies = self._imdb.search_movie(titolo);
#Aggiorno le info dei film trovati
for x in movies:
self._imdb.update(x,['main', 'vote details']);
#cancello i film con anno diverso
movies[:] = [x for x in movies if x.get('year') and x.get('rating') and x.get('votes') and int(x.get('year')) == anno and int(x.get('rating')) >= 7 and int(x.get('votes')) > 5000 ];
except:
logger.error('Errore ImdbUtility getFilm -'+ titolo +" - "+sys.exc_info()[0]);
return movies;