使用忠告
使用该方式进行xhtml到word的转换, 简单转换是可以, 但是可能并没有想象中那么满意, 转换出来的word格式并不完美, 比如目录和标题都会丢失, 标题显示看起来一样, 但是是用正文加粗和加大字号来显示的. 毕竟word是一种文档格式, 而html是一种标记性语言, 要想实现完美兼容和转换很难
加上word与html互转(1) -- word转html, 虽然word与html互转都有实现手段, 但是考虑到转换的格式复杂度和后期的维护成本, 我们最后放弃去实现这个成本高但是对项目影响不大的功能
实现
实现方式
使用poi+xdocreport来实现
poi:都熟悉, 这边不作介绍
Docx4j:是github上的一个开源项目, 使用起来很简单, 可以很轻松的将xhtml转为docx, 他的具体介绍可以去他的项目地址查看--项目地址
引入相关程序包
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>ooxml-schemas</artifactId>
<version>1.3</version>
</dependency>
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-ImportXHTML</artifactId>
<version>3.0.0</version>
</dependency>
html转docx
public static void main(String[] args) throws Exception {
String inputfilepath = "C:\\Users\\Administrator\\Desktop\\test.html";
String baseURL = "C:\\Users\\Administrator\\Desktop";
String stringFromFile = FileUtils.readFileToString(new File(inputfilepath), "UTF-8");
String unescaped = stringFromFile;
if (stringFromFile.contains("</") ) {
unescaped = StringEscapeUtils.unescapeHtml(stringFromFile);
}
// 设置字体映射
RFonts rfonts = Context.getWmlObjectFactory().createRFonts();
rfonts.setAscii("Century Gothic");
XHTMLImporterImpl.addFontMapping("Century Gothic", rfonts);
// 创建一个空的docx对象
WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.createPackage();
XHTMLImporter importer = new XHTMLImporterImpl(wordMLPackage);
importer.setTableFormatting(FormattingOption.IGNORE_CLASS);
importer.setParagraphFormatting(FormattingOption.IGNORE_CLASS);
NumberingDefinitionsPart ndp = new NumberingDefinitionsPart();
wordMLPackage.getMainDocumentPart().addTargetPart(ndp);
ndp.unmarshalDefaultNumbering();
// 转换XHTML,并将其添加到我们制作的空docx中
XHTMLImporterImpl XHTMLImporter = new XHTMLImporterImpl(wordMLPackage);
XHTMLImporter.setHyperlinkStyle("Hyperlink");
wordMLPackage.getMainDocumentPart().getContent().addAll(
XHTMLImporter.convert(unescaped, baseURL));
wordMLPackage.save(new java.io.File("C:\\Users\\Administrator\\Desktop\\test.docx"));
}
html转doc
public void test() throws IOException {
//这边我为了测试, 使用的是自己拼接出html
String html = getHtml();
byte b[] = html.getBytes("utf-8");
// 根据数组
ByteArrayInputStream bais = new ByteArrayInputStream(b);
POIFSFileSystem poifs = new POIFSFileSystem();
DirectoryEntry directory = poifs.getRoot();
directory.createDocument("WordDocument", bais);
poifs.writeFilesystem(new FileOutputStream("C:\\Users\\Administrator\\Desktop\\test.doc"));
}
public String getHtml() {
StringBuilder html = new StringBuilder();
html.append("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">");
html.append("<html xmlns=\"http://www.w3.org/1999/xhtml\">");
html.append("<head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>");
html.append("</head><body>");
html.append("<ol style='list-style-type: decimal;' class=' list-paddingleft-2'>");
html.append("<li><p><strong>111</strong></p></li>");
html.append("<li><p><em>2222</em></p></li>");
html.append("<li><p><span style='text-decoration: underline;'>33333</span></p></li>");
html.append("<li><p><span style='text-decoration: line-through; border: medium none;'>444444</span></p></li>");
html.append("<li><img src='https://csdnimg.cn/pubfooter/images/csdn_cs_qr.png'></img></li>");
html.append("</ol>");
html.append("<p><br/><span style='text-decoration: line-through; border: medium none;'></span></p>");
html.append("<table><tbody>");
html.append("<tr class='firstRow'>");
html.append("<td style='word-break: break-all;' width='402' valign='top'><span style='color: #ff0000'>table1<br/></span></td>");
html.append("<td style='word-break: break-all;' width='402' valign='top'><p><span style='color: #ff0000'>table2</span></p></td>");
html.append("</tr>");
html.append("<tr>");
html.append("<td style='word-break: break-all;' class='selectTdClass' width='402' valign='top'><strong>table3<br/></strong></td>");
html.append("<td style='word-break: break-all;' class='selectTdClass' width='402' valign='top'><p><strong>table4</strong></p></td>");
html.append("</tr>");
html.append("</tbody></table>");
html.append("<p><br/></p>");
html.append("</body></html>");
return html.toString();
}