Apache Tika не сохраняет настройки и цвета фона контента.Где я не прав?
Написал что-то вроде ниже, Любые предложения?
{
InputStream input = TikaInputStream.get(new File("abc.docx"));
Parser parser = new AutoDetectParser();
try {
StringWriter sw = new StringWriter();
SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, "utf-16");
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "html");
handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
handler.setResult(new StreamResult(sw));
Metadata metadata = new Metadata();
metadata.add(Metadata.CONTENT_TYPE, "text/html;charset=utf-16");
metadata.add(Metadata.CONTENT_ENCODING, "charset=utf-16");
parser.parse(input, new ExpandedTitleContentHandler(handler), metadata, new ParseContext());
String xml = sw.toString();
System.out.print("TIKA : " + xml);
BufferedWriter bw = new BufferedWriter(new FileWriter(new File("xyz.html")));
bw.write(xml);
bw.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
input.close();
}
}