Я пытаюсь запустить следующий код, чтобы найти следующие слова в тексте после слова «экзамен».
Ввод - это PDF, который я пытаюсь преобразовать в текст с помощью тинки.
Каким-то образом код выдает ошибку, относящуюся к тинке, которую я не понимаю.
Кто-нибудь знает, как это исправить или знает другой способ реализации моей проблемы?
import re
from tika import parser
raw = parser.from_file('application0001.pdf')
print(raw['content'])
list_of_words = raw.split()
search="examination"
next_word = list_of_words[list_of_words.index(search) + 1]
print(next_word)
Это ошибка, которую я получаю при запуске, и я не понимаю, что это значит.
2019-05-24 09:53:53,217 [MainThread ] [INFO ] Retrieving http://search.maven.org/remotecontent?filepath=org/apache/tika/tika-server/1.19/tika-server-1.19.jar to /var/folders/xn/p33pzhs179n33z55z66lqcn00000gn/T/tika-server.jar.
Traceback (most recent call last):
File "/Users/Mauritius/anaconda3/lib/python3.6/site-packages/tika/tika.py", line 716, in getRemoteJar
urlretrieve(urlOrPath, destPath)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 248, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 532, in open
response = meth(req, response)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 570, in error
return self._call_chain(*args)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 504, in _call_chain
result = func(*args)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Service Unavailable
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/Mauritius/Desktop/text_search.py", line 7, in <module>
raw = parser.from_file('application0001.pdf')
File "/Users/Mauritius/anaconda3/lib/python3.6/site-packages/tika/parser.py", line 36, in from_file
jsonOutput = parse1('all', filename, serverEndpoint, headers=headers, config_path=config_path)
File "/Users/Mauritius/anaconda3/lib/python3.6/site-packages/tika/tika.py", line 328, in parse1
headers, verbose, tikaServerJar, config_path=config_path, rawResponse=rawResponse)
File "/Users/Mauritius/anaconda3/lib/python3.6/site-packages/tika/tika.py", line 522, in callServer
serverEndpoint = checkTikaServer(scheme, serverHost, port, tikaServerJar, classpath, config_path)
File "/Users/Mauritius/anaconda3/lib/python3.6/site-packages/tika/tika.py", line 571, in checkTikaServer
getRemoteJar(tikaServerJar, jarPath)
File "/Users/Mauritius/anaconda3/lib/python3.6/site-packages/tika/tika.py", line 726, in getRemoteJar
urlretrieve(urlOrPath, destPath)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 248, in urlretrieve
with contextlib.closing(urlopen(url, data)) as fp:
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 532, in open
response = meth(req, response)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 570, in error
return self._call_chain(*args)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 504, in _call_chain
result = func(*args)
File "/Users/Mauritius/anaconda3/lib/python3.6/urllib/request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Service Unavailable
[Finished in 2.7s with exit code 1]
[shell_cmd: python -u "/Users/Mauritius/Desktop/text_search.py"]
[dir: /Users/Mauritius/Desktop]
[path: /Users/Mauritius/miniconda3/bin:/opt/local/bin:/opt/local/sbin:/Users/Mauritius/anaconda3/bin:/Library/Frameworks/Python.framework/Versions/3.5/bin://anaconda/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/Library/TeX/texbin]