如何使用 Apache POI 将 altChunk 元素添加到 XWPFDocument


我想使用 Apache POI 将 HTML 作为 altChunk 添加到 DOCX 文件中。我知道 doc4jx 可以使用更简单的 API 来完成此操作,但出于技术原因,我需要使用 Apache POI。

使用 CT 类对 xml 进行低级操作有点棘手。我可以使用以下代码创建 altChunk:

import java.io.File;
import java.io.FileOutputStream;

import javax.xml.namespace.QName;

import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.xmlbeans.impl.values.XmlComplexContentImpl;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTBodyImpl;

public class AltChunkTest {
    public static void main(String[] args) throws Exception  {
        XWPFDocument doc = new XWPFDocument();
        doc.createParagraph().createRun().setText("AltChunk below:");
        QName ALTCHUNK =  new QName ( "http://schemas.openxmlformats.org/wordprocessingml/2006/main" ,  "altChunk" ) ; 
        CTDocument1 ctDoc = doc.getDocument() ; 
        CTBodyImpl ctBody =  (CTBodyImpl) ctDoc. getBody(); 
        XmlComplexContentImpl xcci =  ( XmlComplexContentImpl ) ctBody.get_store().add_element_user(ALTCHUNK); 
        // what's need to now add "<b>Hello World!</b>"
        FileOutputStream out = new FileOutputStream(new File("test.docx"));

但是我现在如何将 html 内容添加到“xcci”呢?

In Office Open XML for Word (*.docx) the altChunk提供了一种使用纯HTML描述文档部分。


第一:它仅用于导入内容。如果您使用打开文档Word并保存它,新保存的文档将不包含替代格式内容部分,也不包含引用它的altChunk标记。 Word 将所有导入的内容保存为默认值Office Open XML元素。

第二:大多数应用程序,除了Word哪些人能够阅读*.docx也会not阅读altChunk内容根本。例如Libreoffice or OpenOffice Writer will not阅读altChunk内容以及apache poi will not阅读altChunk打开时的内容*.docx file.

How is altChunk实施于*.docx ZIP文件结构?

/word/*.html文件在*.docx ZIP文件。这些由 Id 引用/word/document.xml as <w:altChunk r:id="htmlDoc1"/>例如。 Id 和 ID 之间的关系/word/*.html文件给出在/word/_rels/document.xml.rels as <Relationship Id="htmlDoc1" Target="htmlDoc1.html" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/aFChunk"/>例如。

所以我们首先需要POIXMLDocumentParts 为/word/*.html文件和POIXMLRelations 表示 Id 和/word/*.html文件。以下代码提供了一个扩展 POIXMLDocumentPart 的包装类/word/htmlDoc#.html*.docx ZIP 存档中的文件。这还提供了操作 HTML 的方法。它还提供了一种创建方法/word/htmlDoc#.html*.docx ZIP 存档中的文件并与其创建关系。


import java.io.*;

import org.apache.poi.*;
import org.apache.poi.ooxml.*;
import org.apache.poi.openxml4j.opc.*;

import org.apache.poi.xwpf.usermodel.*;

public class CreateWordWithHTMLaltChunk {

 //a method for creating the htmlDoc /word/htmlDoc#.html in the *.docx ZIP archive  
 //String id will be htmlDoc#.
 private static MyXWPFHtmlDocument createHtmlDoc(XWPFDocument document, String id) throws Exception {
  OPCPackage oPCPackage = document.getPackage();
  PackagePartName partName = PackagingURIHelper.createPartName("/word/" + id + ".html");
  PackagePart part = oPCPackage.createPart(partName, "text/html");
  MyXWPFHtmlDocument myXWPFHtmlDocument = new MyXWPFHtmlDocument(part, id);
  document.addRelation(myXWPFHtmlDocument.getId(), new XWPFHtmlRelation(), myXWPFHtmlDocument);
  return myXWPFHtmlDocument;

 public static void main(String[] args) throws Exception {

  XWPFDocument document = new XWPFDocument();
  XWPFParagraph paragraph;
  XWPFRun run;
  MyXWPFHtmlDocument myXWPFHtmlDocument;

  paragraph = document.createParagraph();
  run = paragraph.createRun();
  run.setText("Default paragraph followed by first HTML chunk.");

  myXWPFHtmlDocument = createHtmlDoc(document, "htmlDoc1");
   "<body><p>Simple <b>HTML</b> <i>formatted</i> <u>text</u></p></body>"));

  paragraph = document.createParagraph();
  run = paragraph.createRun();
  run.setText("Default paragraph followed by second HTML chunk.");

  myXWPFHtmlDocument = createHtmlDoc(document, "htmlDoc2");
   "<body>" +
   "<caption>A table></caption>" +
   "<tr><th>Name</th><th>Date</th><th>Amount</th></tr>" +
   "<tr><td>John Doe</td><td>2018-12-01</td><td>1,234.56</td></tr>" +
   "</table>" +

  FileOutputStream out = new FileOutputStream("CreateWordWithHTMLaltChunk.docx");


 //a wrapper class for the  htmlDoc /word/htmlDoc#.html in the *.docx ZIP archive
 //provides methods for manipulating the HTML
 //TODO: We should *not* using String methods for manipulating HTML!
 private static class MyXWPFHtmlDocument extends POIXMLDocumentPart {

  private String html;
  private String id;

  private MyXWPFHtmlDocument(PackagePart part, String id) throws Exception {
   this.html = "<!DOCTYPE html><html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\"><style></style><title>HTML import</title></head><body></body>";
   this.id = id;

  private String getId() {
   return id;

  private String getHtml() {
   return html;

  private void setHtml(String html) {
   this.html = html;

  protected void commit() throws IOException {
   PackagePart part = getPackagePart();
   OutputStream out = part.getOutputStream();
   Writer writer = new OutputStreamWriter(out, "UTF-8");


 //the XWPFRelation for /word/htmlDoc#.html
 private final static class XWPFHtmlRelation extends POIXMLRelation {
  private XWPFHtmlRelation() {

注意:由于使用altChunk此代码需要所有模式的完整 jarooxml-schemas-*.jar如中提到的apache poi 常见问题解答-N10025.


