Я использую Solr 3.1, Apache Tika 0.9 и Solrnet 0.3.1 для индексирования документа, например, файла .doc и .pdf.
Я успешно проиндексировал и извлек документ на локальном уровне, используя этот код
Startup.Init<Article>("http://k9server:8080/solr");
ISolrOperations<Article> solr = ServiceLocator.Current.GetInstance <ISolrOperations<Article>>();
string filecontent = null;
using(var file = File.OpenRead(@"D:\\solr.doc")){
var response = solr.Extract(new ExtractParameters(file, "abcd1") {
ExtractOnly = true,
ExtractFormat = ExtractFormat.Text,
});
filecontent = response.Content;
}
solr.Add(new Article() {
ID = "36",
EMAIL = "1234",
COMMENTS = filecontent,
PRO_ID = 256
});
// commit to the index
solr.Commit();
Но я столкнулся с проблемой удаленного извлечения или индексирования документа с использованием того же кода, я получил ошибку:
The remote server returned an error: (500) Internal Server Error.
SolrNet.Exceptions.SolrConnectionException was unhandled
Сообщение
Apache Tomcat/6.0.32 - Error report HTTP Status 500 - org.apache.poi.poifs.filesystem.POIFSFileSystem.getRoot()Lorg/apache/poi/poifs/filesystem/DirectoryNode;
java.lang.NoSuchMethodError: org.apache.poi.poifs.filesystem.POIFSFileSystem.getRoot()Lorg/apache/poi/poifs/filesystem/DirectoryNode;
at org.apache.tika.parser.microsoft.SummaryExtractor.parseSummaryEntryIfExists(SummaryExtractor.java:65)
at org.apache.tika.parser.microsoft.SummaryExtractor.parseSummaries(SummaryExtractor.java:57)
at org.apache.tika.parser.microsoft.OfficeParser.parse(OfficeParser.java:164)
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:197)
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:197)
at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135)
at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:196)
at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:55)
at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:129)
at org.apache.solr.core.RequestHandlers$LazyRequestHandlerWrapper.handleRequest(RequestHandlers.java:238)
at org.apache.solr.core.SolrCore.execute(SolrCore.java:1360)
at org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:356)
at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:252)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:235)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:206)
at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:233)
at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:191)
at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:127)
at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:102)
at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:109)
at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:298)
at org.apache.coyote.http11.Http11AprProcessor.process(Http11AprProcessor.java:864)
at org.apache.coyote.http11.Http11AprProtocol$Http11ConnectionHandler.process(Http11AprProtocol.java:579)
at org.apache.tomcat.util.net.AprEndpoint$Worker.run(AprEndpoint.java:1665)
at java.lang.Thread.run(Unknown Source)
Сообщение
org.apache.poi.poifs.filesystem.POIFSFileSystem.getRoot()Lorg/apache/poi/poifs/filesystem/DirectoryNode;
java.lang.NoSuchMethodError: org.apache.poi.poifs.filesystem.POIFSFileSystem.getRoot()Lorg/apache/poi/poifs/filesystem/DirectoryNode;
at org.apache.tika.parser.microsoft.SummaryExtractor.parseSummaryEntryIfExists(SummaryExtractor.java:65)
at org.apache.tika.parser.microsoft.SummaryExtractor.parseSummaries(SummaryExtractor.java:57)
at org.apache.tika.parser.microsoft.OfficeParser.parse(OfficeParser.java:164)
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:197)
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:197)
at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135)
at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:196)
at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:55)
at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:129)
at org.apache.solr.core.RequestHandlers$LazyRequestHandlerWrapper.handleRequest(RequestHandlers.java:238)
at org.apache.solr.core.SolrCore.execute(SolrCore.java:1360)
at org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:356)
at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:252)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:235)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:206)
at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:233)
at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:191)
at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:127)
at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:102)
at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:109)
at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:298)
at org.apache.coyote.http11.Http11AprProcessor.process(Http11AprProcessor.java:864)
at org.apache.coyote.http11.Http11AprProtocol$Http11ConnectionHandler.process(Http11AprProtocol.java:579)
at org.apache.tomcat.util.net.AprEndpoint$Worker.run(AprEndpoint.java:1665)
at java.lang.Thread.run(Unknown Source)
Описание
The server encountered an internal error (org.apache.poi.poifs.filesystem.POIFSFileSystem.getRoot()Lorg/apache/poi/poifs/filesystem/DirectoryNode;
java.lang.NoSuchMethodError: org.apache.poi.poifs.filesystem.POIFSFileSystem.getRoot()Lorg/apache/poi/poifs/filesystem/DirectoryNode;
at org.apache.tika.parser.microsoft.SummaryExtractor.parseSummaryEntryIfExists(SummaryExtractor.java:65)
at org.apache.tika.parser.microsoft.SummaryExtractor.parseSummaries(SummaryExtractor.java:57)
at org.apache.tika.parser.microsoft.OfficeParser.parse(OfficeParser.java:164)
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:197)
at org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:197)
at org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:135)
at org.apache.solr.handler.extraction.ExtractingDocumentLoader.load(ExtractingDocumentLoader.java:196)
at org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:55)
at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:129)
at org.apache.solr.core.RequestHandlers$LazyRequestHandlerWrapper.handleRequest(RequestHandlers.java:238)
at org.apache.solr.core.SolrCore.execute(SolrCore.java:1360)
at org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:356)
at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:252)
at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:235)
at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:206)
at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:233)
at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:191)
at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:127)
at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:102)
at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:109)
at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:298)
at org.apache.coyote.http11.Http11AprProcessor.process(Http11AprProcessor.java:864)
at org.apache.coyote.http11.Http11AprProtocol$Http11ConnectionHandler.process(Http11AprProtocol.java:579)
at org.apache.tomcat.util.net.AprEndpoint$Worker.run(AprEndpoint.java:1665)
at java.lang.Thread.run(Unknown Source)
) that prevented it from fulfilling this request.
Source=SolrNet
StackTrace:
at SolrNet.Impl.SolrConnection.PostStream(String relativeUrl, String contentType, Stream content, IEnumerable`1 parameters)
at SolrNet.Commands.ExtractCommand.Execute(ISolrConnection connection)
at SolrNet.Impl.SolrBasicServer`1.Send(ISolrCommand cmd)
at SolrNet.Impl.SolrBasicServer`1.SendAndParseExtract(ISolrCommand cmd)
at SolrNet.Impl.SolrBasicServer`1.Extract(ExtractParameters parameters)
at SolrNet.Impl.SolrServer`1.Extract(ExtractParameters parameters)
at SolrNetSample.Program.Main(String[] args) in E:\TestProject\SolrNetSample\SolrNetSample\SolrNetSample\Program.cs:line 38
at System.AppDomain._nExecuteAssembly(Assembly assembly, String[] args)
at System.AppDomain.ExecuteAssembly(String assemblyFile, Evidence assemblySecurity, String[] args)
at Microsoft.VisualStudio.HostingProcess.HostProc.RunUsersAssembly()
at System.Threading.ThreadHelper.ThreadStart_Context(Object state)
at System.Threading.ExecutionContext.Run(ExecutionContext executionContext, ContextCallback callback, Object state)
at System.Threading.ThreadHelper.ThreadStart()
InnerException: System.Net.WebException
Message=The remote server returned an error: (500) Internal Server Error.
Source=System
StackTrace:
at System.Net.HttpWebRequest.GetResponse()
at HttpWebAdapters.Adapters.HttpWebRequestAdapter.GetResponse()
at SolrNet.Impl.SolrConnection.GetResponse(IHttpWebRequest request)
at SolrNet.Impl.SolrConnection.PostStream(String relativeUrl, String contentType, Stream content, IEnumerable`1 parameters)