综合类管理系统不管是自研还是外包项目都会被客户或者产品经理要求,实现word导出,excel导出,pdf导出等功能,其实pdf导出呢,有很多种方式,我实现过的就有两种,接下来呢,就说说其中的一种,就是当你已经实现了word导出,或有明确的要求说要用word文件转化为pdf文件的时候,可以看看下来,实现的这种 word文件doc、docx转pdf。
项目中jar包引用情况
<properties>
<java.version>1.8</java.version>
<poi.version>3.17</poi.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>fr.opensagres.poi.xwpf.converter.pdf-gae</artifactId>
<version>2.0.1</version>
<exclusions>
<exclusion>
<artifactId>poi-ooxml</artifactId>
<groupId>org.apache.poi</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.core</artifactId>
<version>1.0.6</version>
<exclusions>
<exclusion>
<artifactId>poi-ooxml</artifactId>
<groupId>org.apache.poi</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.5.13</version>
</dependency>
<dependency>
<groupId>com.itextpdf.tool</groupId>
<artifactId>xmlworker</artifactId>
<version>5.5.11</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itext-asian</artifactId>
<version>5.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.9</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
</dependencies>
以上就是doc、docx转pdf涉及到的包。
转换工具代码
package com.ttos.utils;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.FontProvider;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.XMLWorkerHelper;
import com.lowagie.text.DocumentException;
import com.lowagie.text.Font;
import com.lowagie.text.pdf.BaseFont;
import fr.opensagres.poi.xwpf.converter.pdf.PdfConverter;
import fr.opensagres.poi.xwpf.converter.pdf.PdfOptions;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Entities;
import org.jsoup.select.Elements;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
public class Word2PdfUtils {
/**
* docx2pdf
*
* @param wordFilePath wordFilePath
* @param pdfFilePath pdfFilePath
* @throws Exception Exception
*/
public static void docx2pdf(String wordFilePath, String pdfFilePath) throws Exception {
if (Objects.isNull(wordFilePath)) {
log.error("docx的word文件路径不能为空");
return;
}
docx2Pdf(new File(wordFilePath), pdfFilePath);
}
/**
* docx2Pdf
*
* @param wordFile wordFile
* @param pdfFilePath pdfFilePath
* @throws Exception Exception
*/
public static void docx2Pdf(File wordFile, String pdfFilePath) throws Exception {
if (Objects.isNull(pdfFilePath)) {
log.error("pdf转化后的pdf目标文件路径不能为空");
return;
}
docx2Pdf(wordFile, new File(pdfFilePath));
}
/**
* doc2Pdf
*
* @param docFilePath docFilePath
* @param pdfFIlePath pdfFIlePath
* @param imgTmpPath imgTmpPath
*/
public static void doc2Pdf(String docFilePath, String pdfFIlePath,String imgTmpPath) throws Exception {
final String html = word2Html(docFilePath, imgTmpPath);
final String formatHtml = formatHtml(html);
html2Pdf(formatHtml, pdfFIlePath);
}
/**
* doc2Pdf
*
* @param docFIle docFIle
* @param pdfFIlePath pdfFIlePath
* @param imgTmpPath imgTmpPath
* @throws Exception Exception
*/
public static void doc2Pdf(File docFIle,String pdfFIlePath, String imgTmpPath) throws Exception {
final String html = word2Html(docFIle, imgTmpPath);
final String formatHtml = formatHtml(html);
html2Pdf(formatHtml, pdfFIlePath);
}
/**
* docx2Pdf
*
* @param wordFile wordFile
* @param pdfFile pdfFile
* @throws Exception Exception
*/
public static void docx2Pdf(File wordFile, File pdfFile) throws Exception {
if (Objects.isNull(wordFile) || !wordFile.exists()) {
log.error("docx的word文件不存在,请核实");
return;
}
if (Objects.isNull(pdfFile)) {
log.error("转化后的pdf目标文件不能为空");
return;
}
if (!pdfFile.exists()) {
log.info("转化后的pdf目标文件不存在,创建pdf目标文件");
if (!pdfFile.createNewFile()) {
log.error("转化后的pdf目标文件创建失败");
return;
}
}
try (final FileInputStream inputStream = new FileInputStream(wordFile);
final FileOutputStream outputStream = new FileOutputStream(pdfFile)) {
XWPFDocument xwpfDocument = new XWPFDocument(inputStream);
PdfOptions pdfOptions = PdfOptions.create();
pdfOptions.fontProvider((familyName, encoding, size, style, color) -> {
try {
final String path = Objects.requireNonNull(ClassLoader.getSystemClassLoader().getResource("0.ttf")).getFile();
final BaseFont bfChn = BaseFont.createFont(path, BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
final Font fontChn = new Font(bfChn, size, style, color);
if (!Objects.isNull(familyName)) {
fontChn.setFamily(familyName);
}
return fontChn;
} catch (DocumentException | IOException e) {
log.error("pdf文件内容样式设置失败");
return null;
}
});
PdfConverter.getInstance().convert(xwpfDocument, outputStream, pdfOptions);
}
}
/**
* word2Html
*
* @param wordPath wordPath
* @param imageTempPath imageTempPath
* @return html
* @throws Exception Exception
*/
public static String word2Html(String wordPath, String imageTempPath) throws Exception {
if (Objects.isNull(wordPath)) {
log.error("doc文件路径: {} ,不能为空", wordPath);
return null;
}
return word2Html(new File(wordPath), imageTempPath);
}
/**
* word2Html
*
* @param wordFile wordFile
* @param imgTmpPath imgTmpPath
* @return html
* @throws Exception Exception
*/
public static String word2Html(File wordFile, String imgTmpPath) throws Exception {
if (Objects.isNull(imgTmpPath)) {
log.error("doc文件内容图片缓存文件路径: {} ,不能为空", imgTmpPath);
return null;
}
return word2Html(wordFile, new File(imgTmpPath));
}
/**
* word2Html
*
* @param wordFile wordFile
* @param imgTmpPath imgTmpPath
* @return html
* @throws Exception Exception
*/
public static String word2Html(File wordFile, File imgTmpPath) throws Exception {
if (Objects.isNull(wordFile) || !wordFile.exists()) {
log.error("doc的word文件不存在,请核实");
return null;
}
if (Objects.isNull(imgTmpPath)) {
log.error("doc文件内容图片缓存文件路径不能为空");
return null;
}
if (!imgTmpPath.exists()) {
log.info("doc文件内容图片缓存文件路径不存在,创建目标文件路径");
if (!imgTmpPath.createNewFile()) {
log.error("doc文件内容图片缓存文件路径创建失败");
return null;
}
}
try (final FileInputStream docInputStream = new FileInputStream(wordFile);
final ByteArrayOutputStream byteArrayStream = new ByteArrayOutputStream()) {
final HWPFDocument hwpfDocument = new HWPFDocument(docInputStream);
final Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
final WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
wordToHtmlConverter.setPicturesManager((bts, pictureType, picName, width, height) -> {
String tmpPicFilePath = imgTmpPath.getAbsolutePath() + File.separator + picName;
final File tmpPicFile = new File(tmpPicFilePath);
try (final FileOutputStream picOutputStream = new FileOutputStream(tmpPicFile)) {
picOutputStream.write(bts);
} catch (IOException e) {
e.printStackTrace();
}
return tmpPicFilePath;
});
wordToHtmlConverter.processDocument(hwpfDocument);
final Document htmlDocument = wordToHtmlConverter.getDocument();
final DOMSource domSource = new DOMSource(htmlDocument);
final StreamResult streamResult = new StreamResult(byteArrayStream);
final Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.METHOD, "html");
transformer.transform(domSource, streamResult);
return byteArrayStream.toString(StandardCharsets.UTF_8.name());
}
}
/**
* formatHtml 格式化html,补全内容html标签
*
* @param html html
* @return formatHtml
*/
private static String formatHtml(String html) {
final org.jsoup.nodes.Document document = Jsoup.parse(html);
final String style = document.attr("style");
if (StringUtils.isNotEmpty(style) && style.indexOf("width") > 0) {
document.attr("style", "");
}
final Elements divs = document.select("div");
for (Element div : divs) {
final String divStyle = div.attr("style");
if (StringUtils.isNotEmpty(divStyle) && divStyle.indexOf("width") > 0) {
div.attr("style", "");
}
}
document.outputSettings().syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml);
document.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
return document.html();
}
/**
* html2Pdf
*
* @param html html
* @param pdfFilePath pdfFilePath
*/
public static void html2Pdf(String html, String pdfFilePath) throws Exception {
if (Objects.isNull(pdfFilePath)) {
log.error("转化后的pdf文件路径不能为空");
return;
}
html2Pdf(html, new File(pdfFilePath));
}
public static void html2Pdf(String html, File pdfFile) throws Exception {
if (Objects.isNull(html)) {
log.error("需转化的html内容不能为空");
return;
}
if (Objects.isNull(pdfFile)) {
log.error("需转化的pdf文件不能为空");
return;
}
if (!pdfFile.exists()) {
log.error("需转化的pdf文件 不存在,创建文件");
if (!pdfFile.createNewFile()) {
log.error("需转化的pdf文件创建失败");
return;
}
}
try (final FileOutputStream fileOutputStream = new FileOutputStream(pdfFile);
final ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(html.getBytes())) {
final com.itextpdf.text.Document document = new com.itextpdf.text.Document(PageSize.A4);
final PdfWriter pdfWriter = PdfWriter.getInstance(document, fileOutputStream);
document.open();
XMLWorkerHelper.getInstance().parseXHtml(pdfWriter, document, byteArrayInputStream, StandardCharsets.UTF_8,
new FontProvider() {
@Override
public boolean isRegistered(String s) {
return false;
}
@Override
public com.itextpdf.text.Font getFont(String s, String s1, boolean b, float v, int i, BaseColor baseColor) {
try {
final com.itextpdf.text.pdf.BaseFont baseFont = com.itextpdf.text.pdf.BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", com.itextpdf.text.pdf.BaseFont.EMBEDDED);
final com.itextpdf.text.Font font = new com.itextpdf.text.Font(baseFont, v, i, baseColor);
font.setColor(baseColor);
return font;
} catch (IOException | com.itextpdf.text.DocumentException e) {
log.error("pdf内容设置格式出现错误");
}
return null;
}
});
document.close();
}
}
public static void main(String[] args) throws Exception {
docx2pdf("G:\\00_com\\123.docx", "G:\\00_com\\1234.pdf");
doc2Pdf("G:\\123.doc", "G:\\1234.pdf","G:\\pic\\");
}
}
以上功能代码亲测有效,可以拷进项目直接用。