Я запустил tika server на моей машине и вызываю api, используя терминал, который работает нормально.Я могу извлечь текст из изображения и PDF.Но я хочу реализовать вызов API в моем приложении Python.
curl -T price.xls http://localhost:9998/tika --header "Accept: text/plain"
Выше я должен сделать вызов API.Я могу запустить это в моем терминале и работает нормально, но как реализовать в приложении Python.Я установил и попробовал запросы.
API_URL = 'http://localhost:9998/tika'
APP_ROOT = os.path.dirname(os.path.abspath(__file__))
tika_client = TikaApp(file_jar=join(APP_ROOT,'../tika-app-1.19.jar'))
data = {
"url": join(APP_ROOT,'../static/image/a.pdf')
}
response = requests.put(API_URL, data)
print(response.content)
Любая помощь будет признателен.Спасибо :)
ошибка вывода
INFO tika (application/x-www-form-urlencoded)
WARN tika: Text extraction failed
org.apache.tika.exception.TikaException: Unexpected RuntimeException from org.apache.tika.server.resource.TikaResource$1@475b0e2
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:282)
at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:143)
at org.apache.tika.server.resource.TikaResource.parse(TikaResource.java:402)
at org.apache.tika.server.resource.TikaResource$5.write(TikaResource.java:513)
at org.apache.cxf.jaxrs.provider.BinaryDataProvider.writeTo(BinaryDataProvider.java:177)
at org.apache.cxf.jaxrs.utils.JAXRSUtils.writeMessageBody(JAXRSUtils.java:1391)
at org.apache.cxf.jaxrs.interceptor.JAXRSOutInterceptor.serializeMessage(JAXRSOutInterceptor.java:246)
at org.apache.cxf.jaxrs.interceptor.JAXRSOutInterceptor.processResponse(JAXRSOutInterceptor.java:122)
at org.apache.cxf.jaxrs.interceptor.JAXRSOutInterceptor.handleMessage(JAXRSOutInterceptor.java:84)
at org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:308)
at org.apache.cxf.interceptor.OutgoingChainInterceptor.handleMessage(OutgoingChainInterceptor.java:90)
at org.apache.cxf.phase.PhaseInterceptorChain.doIntercept(PhaseInterceptorChain.java:308)
at org.apache.cxf.transport.ChainInitiationObserver.onMessage(ChainInitiationObserver.java:121)
at org.apache.cxf.transport.http.AbstractHTTPDestination.invoke(AbstractHTTPDestination.java:267)
at org.apache.cxf.transport.http_jetty.JettyHTTPDestination.doService(JettyHTTPDestination.java:247)
at org.apache.cxf.transport.http_jetty.JettyHTTPHandler.handle(JettyHTTPHandler.java:79)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:257)
at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1317)
at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:205)
at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1219)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:144)
at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:219)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
at org.eclipse.jetty.server.Server.handle(Server.java:531)
at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:352)
at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:260)
at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:281)
at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:102)
at org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:118)
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:333)
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:310)
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:168)
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:126)
at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:366)
at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:762)
at org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:680)
at java.lang.Thread.run(Thread.java:748)
Caused by: javax.ws.rs.WebApplicationException: HTTP 415 Unsupported Media Type
at org.apache.tika.server.resource.TikaResource$1.parse(TikaResource.java:128)
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
... 37 more
ERROR Problem with writing the data, class org.apache.tika.server.resource.TikaResource$5, ContentType: text/plain