Я использую apache tika
, чтобы получить контент как html/xhtml
в проекте appengine.Я читаю файл из облачного хранилища и удаляю все изображения, используя apache poi
, но есть некоторые формы (например, прямоугольники, круги).Я не могу удалить их, поэтому в конечном итоге изображения в файле.Таким образом, синтаксический анализ неудачен, поскольку GAE ограничил доступ к javax.image.io
.
. Мне нужно прочитать файл .docx
, проанализировать его как html/xhtml
и использовать извлеченный текст html
, чтобы отобразить файл в браузере.
Вот код, который я попробовал ниже:
Blob blob = storageService.downloadFile({Bucket Name}, file.getCloudFileName());
ReadChannel reader = blob.reader();
InputStream is = Channels.newInputStream(reader);
//deleting images from body paragraphs
XWPFDocument document= new XWPFDocument(is);
List<XWPFPictureData> pictureData=document.getAllPictures();
if(pictureData.size()>0){
TaxOrganizerController.removePictures(document.getParagraphs());
}
//removing images from headers
**strong text**List<XWPFHeader> headers=document.getHeaderList();
for(XWPFHeader header:headers){
header.clearHeaderFooter();
}
//removing images from footers
List<XWPFFooter> footers= document.getFooterList();
for(XWPFFooter footer:footers){
footer.clearHeaderFooter();
}
ByteArrayOutputStream baos = new ByteArrayOutputStream();
document.write(baos);
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
Metadata metadata = new Metadata();
OOXMLParser parser = new OOXMLParser();
ContentHandler handler = new ToHTMLContentHandler();
ParseContext context = new ParseContext();
OfficeParserConfig config= new OfficeParserConfig();
config.setIncludeShapeBasedContent(false);
context.set(OfficeParserConfig.class, config);
parser.parse(bais,handler,metadata,context);
Это трассировка стека:
java.lang.NoClassDefFoundError: javax.imageio.ImageIO is a restricted class. Please see the Google App Engine developer's guide for more details.
at com.google.appengine.tools.development.agent.runtime.Runtime.reject(Runtime.java:52)
at org.apache.tika.parser.image.ImageParser.parse(ImageParser.java:180)
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135)
at org.apache.tika.parser.DelegatingParser.parse(DelegatingParser.java:72)
at org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor.parseEmbedded(ParsingEmbeddedDocumentExtractor.java:102)
at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.handleEmbeddedFile(AbstractOOXMLExtractor.java:368)
at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.handleEmbeddedPart(AbstractOOXMLExtractor.java:253)
at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.handleEmbeddedParts(AbstractOOXMLExtractor.java:198)
at org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.getXHTML(AbstractOOXMLExtractor.java:135)
at org.apache.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.parse(OOXMLExtractorFactory.java:142)
at org.apache.tika.parser.microsoft.ooxml.OOXMLParser.parse(OOXMLParser.java:106)
at com.act.aotax.controller.TaxOrganizerController.wordOrganizer(TaxOrganizerController.java:421)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at com.google.appengine.tools.development.agent.runtime.Runtime.invoke(Runtime.java:130)
at org.springframework.web.method.support.InvocableHandlerMethod.doInvoke(InvocableHandlerMethod.java:221)
at org.springframework.web.method.support.InvocableHandlerMethod.invokeForRequest(InvocableHandlerMethod.java:136)
at org.springframework.web.servlet.mvc.method.annotation.ServletInvocableHandlerMethod.invokeAndHandle(ServletInvocableHandlerMethod.java:110)
at org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerAdapter.invokeHandlerMethod(RequestMappingHandlerAdapter.java:817)
at org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerAdapter.handleInternal(RequestMappingHandlerAdapter.java:731)
at org.springframework.web.servlet.mvc.method.AbstractHandlerMethodAdapter.handle(AbstractHandlerMethodAdapter.java:85)
at org.springframework.web.servlet.DispatcherServlet.doDispatch(DispatcherServlet.java:959)
at org.springframework.web.servlet.DispatcherServlet.doService(DispatcherServlet.java:893)
at org.springframework.web.servlet.FrameworkServlet.processRequest(FrameworkServlet.java:968)
at org.springframework.web.servlet.FrameworkServlet.doGet(FrameworkServlet.java:859)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:617)
at org.springframework.web.servlet.FrameworkServlet.service(FrameworkServlet.java:844)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:717)
at org.mortbay.jetty.servlet.ServletHolder.handle(ServletHolder.java:511)
at org.mortbay.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1166)
at com.googlecode.objectify.ObjectifyFilter.doFilter(ObjectifyFilter.java:48)
at org.mortbay.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1157)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:316)
at org.springframework.security.web.access.intercept.FilterSecurityInterceptor.invoke(FilterSecurityInterceptor.java:126)
at org.springframework.security.web.access.intercept.FilterSecurityInterceptor.doFilter(FilterSecurityInterceptor.java:90)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:330)
at org.springframework.security.web.access.ExceptionTranslationFilter.doFilter(ExceptionTranslationFilter.java:114)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:330)
at org.springframework.security.web.authentication.AnonymousAuthenticationFilter.doFilter(AnonymousAuthenticationFilter.java:111)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:330)
at org.springframework.security.web.servletapi.SecurityContextHolderAwareRequestFilter.doFilter(SecurityContextHolderAwareRequestFilter.java:169)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:330)
at org.springframework.security.web.authentication.AbstractAuthenticationProcessingFilter.doFilter(AbstractAuthenticationProcessingFilter.java:205)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:330)
at com.act.aotax.security.AotaxPreAuthenticationFilter.doFilter(AotaxPreAuthenticationFilter.java:36)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:330)
at org.springframework.security.web.authentication.logout.LogoutFilter.doFilter(LogoutFilter.java:120)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:330)
at org.springframework.security.web.header.HeaderWriterFilter.doFilterInternal(HeaderWriterFilter.java:64)
at org.springframework.web.filter.OncePerRequestFilter.doFilter(OncePerRequestFilter.java:107)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:330)
at org.springframework.security.web.context.SecurityContextPersistenceFilter.doFilter(SecurityContextPersistenceFilter.java:91)
at org.springframework.security.web.FilterChainProxy$VirtualFilterChain.doFilter(FilterChainProxy.java:330)
at org.springframework.security.web.FilterChainProxy.doFilterInternal(FilterChainProxy.java:213)
at org.springframework.security.web.FilterChainProxy.doFilter(FilterChainProxy.java:176)
at org.springframework.web.filter.DelegatingFilterProxy.invokeDelegate(DelegatingFilterProxy.java:346)
at org.springframework.web.filter.DelegatingFilterProxy.doFilter(DelegatingFilterProxy.java:262)
at org.mortbay.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1157)
at com.google.appengine.api.socket.dev.DevSocketFilter.doFilter(DevSocketFilter.java:74)
at org.mortbay.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1157)
at com.google.appengine.tools.development.ResponseRewriterFilter.doFilter(ResponseRewriterFilter.java:128)
at org.mortbay.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1157)
at com.google.appengine.tools.development.HeaderVerificationFilter.doFilter(HeaderVerificationFilter.java:34)
at org.mortbay.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1157)
at com.google.appengine.api.blobstore.dev.ServeBlobFilter.doFilter(ServeBlobFilter.java:63)
at org.mortbay.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1157)
at com.google.apphosting.utils.servlet.TransactionCleanupFilter.doFilter(TransactionCleanupFilter.java:50)
at org.mortbay.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1157)
at com.google.appengine.tools.development.StaticFileFilter.doFilter(StaticFileFilter.java:125)
at org.mortbay.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1157)
at com.google.appengine.tools.development.DevAppServerModulesFilter.doDirectRequest(DevAppServerModulesFilter.java:366)
at com.google.appengine.tools.development.DevAppServerModulesFilter.doDirectModuleRequest(DevAppServerModulesFilter.java:349)
at com.google.appengine.tools.development.DevAppServerModulesFilter.doFilter(DevAppServerModulesFilter.java:116)
at org.mortbay.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1157)
at org.mortbay.jetty.servlet.ServletHandler.handle(ServletHandler.java:388)
at org.mortbay.jetty.security.SecurityHandler.handle(SecurityHandler.java:216)
at org.mortbay.jetty.servlet.SessionHandler.handle(SessionHandler.java:182)
at org.mortbay.jetty.handler.ContextHandler.handle(ContextHandler.java:765)
at org.mortbay.jetty.webapp.WebAppContext.handle(WebAppContext.java:418)
at com.google.appengine.tools.development.DevAppEngineWebAppContext.handle(DevAppEngineWebAppContext.java:98)
at org.mortbay.jetty.handler.HandlerWrapper.handle(HandlerWrapper.java:152)
at com.google.appengine.tools.development.JettyContainerService$ApiProxyHandler.handle(JettyContainerService.java:511)
at org.mortbay.jetty.handler.HandlerWrapper.handle(HandlerWrapper.java:152)
at org.mortbay.jetty.Server.handle(Server.java:326)
at org.mortbay.jetty.HttpConnection.handleRequest(HttpConnection.java:542)
at org.mortbay.jetty.HttpConnection$RequestHandler.headerComplete(HttpConnection.java:923)
at org.mortbay.jetty.HttpParser.parseNext(HttpParser.java:547)
at org.mortbay.jetty.HttpParser.parseAvailable(HttpParser.java:212)
at org.mortbay.jetty.HttpConnection.handle(HttpConnection.java:404)
at org.mortbay.io.nio.SelectChannelEndPoint.run(SelectChannelEndPoint.java:409)
at org.mortbay.thread.QueuedThreadPool$PoolThread.run(QueuedThreadPool.java:582)