一、 概述
最近做了一个word文档格式的报告输出需求,涉及比较多Word文档内容格式操作,例如插入段落、插入表格、删掉段落、表格内容填充、表格合并单元格、段落复制、内容占位符替换、pdf转图片插入文档等等。个人总结了下工具类,也分享一下
二、代码
2.1 引入相关依赖
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.8.35</version>
</dependency>
<!-- Apache POI for Docx -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.4.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.xmlbeans</groupId>
<artifactId>xmlbeans</artifactId>
<version>5.3.0</version>
</dependency>
<!-- PDF Renderer for PDF to Image -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.27</version>
</dependency>
2.1代码
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.collection.ListUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.io.IoUtil;
import cn.hutool.core.lang.Dict;
import lombok.SneakyThrows;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.poi.util.Units;
import org.apache.poi.xwpf.usermodel.*;
import org.apache.xmlbeans.XmlCursor;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
import org.springframework.util.ObjectUtils;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.geom.AffineTransform;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class DocxUtil {
// 用于匹配占位符 ${key}
private final static Pattern pattern = Pattern.compile("\\$\\{[^}]+\\}");
/**
* 获取XWPFDocument对象
* @param inputStream 输入流
* @return
*/
@SneakyThrows
public static XWPFDocument getXWPFDocument(InputStream inputStream){
return new XWPFDocument(inputStream);
}
/**
* 写出输出流
* @param document 文档对象
* @param outputStream 输出流
*/
@SneakyThrows
public static void writeOut(XWPFDocument document, OutputStream outputStream){
document.write(outputStream);
IoUtil.close(outputStream);
}
/**
* 在指定占位符段落插入表格
* @param document 文档对象
* @param key 占位符
* @return
*/
public static XWPFTable insertTbl(XWPFDocument document,String key){
XWPFTable xwpfTable = null;
// 查找指定段落
XWPFParagraph targetParagraph = getXwpfParagraphByKey(document, key);
// 插入表格
if (targetParagraph != null) {
// 获取目标段落的底层 XML 对象
XmlCursor xmlCursor = targetParagraph.getCTP().newCursor();
// 创建新的表格
xwpfTable = document.insertNewTbl(xmlCursor);// 在段落后插入表格
// 删除占位符段落
delXWPFParagraph(document,targetParagraph);
}
/**
* 注意,表格一定要设置列宽,不然合并单元格后,libreoffice转PDF,合并单元格会乱
*/
// DocxUtil.setColumnWidth(xwpfTable, 0, 1000); // 第一列宽度
return xwpfTable;
}
/**
* 删除段落
* @param document 文档对象
* @param targetParagraph 段落对象
*/
public static void delXWPFParagraph(XWPFDocument document,XWPFParagraph targetParagraph){
document.removeBodyElement(document.getPosOfParagraph(targetParagraph));
}
/**
* 文档替换占位符,替换段落和表格占位符 占位符:${key}
* @param document 文档对象
* @param replacements 占位符填充值Map对象 示例:{"${name}":"小明","${age}":"18"}
* @param isReplaceTable 是否替换表格的占位符
*/
public static void documentReplacePlaceholders(XWPFDocument document, Dict replacements,boolean isReplaceTable){
// 替换段落中的占位符
List<XWPFParagraph> paragraphs = document.getParagraphs();
replaceXWPFParagraph(replacements,paragraphs);
// 替换表格中的占位符
if (isReplaceTable) {
for (XWPFTable tbl : document.getTables()) {
for (XWPFTableRow row : tbl.getRows()) {
for (XWPFTableCell cell : row.getTableCells()) {
List<XWPFParagraph> paragraphs1 = cell.getParagraphs();
replaceXWPFParagraph(replacements,paragraphs1);
}
}
}
}
}
/**
* 设置表格标题
* @param table 表格对象
* @param fontSize 字体大小
* @param head 标题 示例:["序号","名字","年龄"]
*/
public static void setTableHead(XWPFTable table,int fontSize,String... head){
if(ObjectUtils.isEmpty(head)) return;
XWPFTableRow rowOne = table.getRow(0);
for(int i=0;i<head.length;i++){
XWPFTableCell cell = rowOne.getCell(i);
if(null == cell){
cell = rowOne.createCell();
}
// 设置表头内容
setCellText(cell, head[i], fontSize,false,ParagraphAlignment.CENTER,"华文楷体");
}
}
/**
* 填充表格内容
* @param table 表格对象
* @param heads 标题key集合 示例 {"no","name","age"}
* @param contextMapList 内容数据 示例: [{"no":"1","name":"小明","age":"18"}]
* @param colIndex 需要居中显示的列
*/
public static void setTableContext(XWPFTable table,int fontSize, List<String> heads, List<Map<String,Object>> contextMapList,List<Integer> colIndex){
if(CollUtil.isEmpty(contextMapList)) return;
for (int i =1;i<=contextMapList.size();i++){
// 获取当前行
Map<String, Object> map = contextMapList.get(i - 1);
XWPFTableRow tableRow = table.getRow(i);
if(null == tableRow){
tableRow = table.createRow();
}
for (int j=0;j<heads.size();j++){
XWPFTableCell cell = tableRow.getCell(j);
if(null == cell){
cell = tableRow.createCell();
}
Object obj = map.get(heads.get(j));
String text = null == obj ? "":obj.toString();
if (colIndex.contains(j)) {
DocxUtil.setCellText(cell, text, fontSize,false,ParagraphAlignment.CENTER,"宋体");
}else {
DocxUtil.setCellText(cell, text, fontSize,false,ParagraphAlignment.LEFT,"宋体");
}
}
}
}
/**
* 表格单元格垂直合并
* @param table 表格
* @param col 列下标
* @param rowStart 开始行下标
* @param rowEnd 结束行下标
*/
public static void mergeCellV(XWPFTable table,int col,int rowStart,int rowEnd) {
List<XWPFTableCell> cells = new ArrayList<>();
for (int i=rowStart;i<=rowEnd;i++){
cells.add(table.getRow(i).getCell(col));
}
mergeCell(cells.toArray(XWPFTableCell[]::new));
}
/**
* 合并单元格
* @param cells 单元格对象
*/
public static void mergeCell(XWPFTableCell... cells) {
if(ObjectUtils.isEmpty(cells) || cells.length < 2) return;
for (int i=0;i<cells.length;i++){
XWPFTableCell cell = cells[i];
CTTcPr ctTcPr = cell.getCTTc().getTcPr();
if (ctTcPr == null) {
ctTcPr = cell.getCTTc().addNewTcPr();
}
CTVMerge vMerge = ctTcPr.addNewVMerge();
if (i==0) {
vMerge.setVal(STMerge.RESTART);
}else {
vMerge.setVal(STMerge.CONTINUE);
}
}
}
/**
* 设置列宽
* @param table 表格对象
* @param colIndex 列下标
* @param width 宽度值
*/
public static void setColumnWidth(XWPFTable table, int colIndex, int width) {
for (int i = 0; i < table.getNumberOfRows(); i++) {
XWPFTableRow row = table.getRow(i);
XWPFTableCell cell = row.getCell(colIndex);
if (cell != null) {
CTTc cttc = cell.getCTTc();
CTTcPr tcpr = cttc.addNewTcPr();
CTTblWidth tblWidth = tcpr.addNewTcW();
tblWidth.setType(STTblWidth.DXA);
tblWidth.setW(BigInteger.valueOf(width));
}
}
}
/**
* 设置单元格垂直对齐
* @param cell 单元格对象
* @param enumValue 枚举值
*/
public static void setAlignment(XWPFTableCell cell,
org.openxmlformats.schemas.wordprocessingml.x2006.main.STVerticalJc.Enum enumValue
) {
if(null == enumValue) enumValue=STVerticalJc.CENTER;
CTTcPr ctTcPrA = cell.getCTTc().getTcPr();
if (ctTcPrA == null) {
ctTcPrA = cell.getCTTc().addNewTcPr();
}
CTVerticalJc verticalAlignmentA1 = ctTcPrA.addNewVAlign();
verticalAlignmentA1.setVal(enumValue);
}
/**
* 表格设置单元格内容
* @param cell 单元格对象
* @param text 文本
* @param fontSize 字体大小
* @param isBold 是否加粗
* @param alignment 段落的水平对齐方式
* @param fontFamily 字体
*/
public static void setCellText(XWPFTableCell cell, String text, int fontSize, boolean isBold,ParagraphAlignment alignment,String fontFamily) {
// 清空默认行
cell.removeParagraph(0);
// 创建段落
XWPFParagraph paragraph = cell.addParagraph();
// 创建运行
XWPFRun run = paragraph.createRun();
// 设置文本内容
run.setText(text);
// 设置字体为宋体
run.setFontFamily(fontFamily);
// 设置字体大小
run.setFontSize(fontSize);
// 是否加粗
run.setBold(isBold);
// 设置段落的水平对齐方式
paragraph.setAlignment(alignment);
// 设置单元格垂直对齐
setAlignment(cell,STVerticalJc.CENTER);
}
/**
* 段落替换占位符 占位符:${key}
* @param replacements 占位符填充值Map对象 示例:{"${name}":"小明","${age}":"18"}
* @param paragraphs 段落对象
*/
private static void replaceXWPFParagraph(Dict replacements, List<XWPFParagraph> paragraphs) {
if(CollUtil.isEmpty(paragraphs)) return ;
// 遍历段落
for (XWPFParagraph paragraph : paragraphs) {
documentReplacePlaceholders(paragraph,replacements);
}
}
/**
* 段落替换占位符 占位符:${key}
* @param paragraph 段落
* @param replacements 占位符填充值Map对象 示例:{"${name}":"小明","${age}":"18"}
*/
public static void documentReplacePlaceholders(XWPFParagraph paragraph,Dict replacements) {
// 获取段落文本片段
List<XWPFRun> paragraphRuns = paragraph.getRuns();
if(CollUtil.isEmpty(paragraphRuns)) return;
StringBuilder text = new StringBuilder();
boolean replaceFlag = false;
int startIndex=0;
for (int i=0;i<paragraphRuns.size();i++){
XWPFRun xwpfRun = paragraphRuns.get(i);
String runText = xwpfRun.getText(0);
if(null == runText) continue;
text.append(runText);
String value = text.toString();
if(!value.contains("$")){
startIndex=i+1;
text.setLength(0);
continue;
}
// 判断是否包含占位符
Matcher matcher = pattern.matcher(text);
while (matcher.find()) {
replaceFlag = true;
String key = matcher.group();
Object obj = replacements.get(key);
String objStr = null ==obj?key:obj.toString();
String newValue = text.toString().replace(key, objStr);
text.replace(0, text.length(), newValue);
}
if(replaceFlag){
if (startIndex != i) {
for (int j=startIndex+1;j<=i;j++){
paragraphRuns.get(j).setText("",0);
}
paragraphRuns.get(startIndex).setText(text.toString(),0);
}else {
paragraphRuns.get(startIndex).setText(text.toString(),0);
}
startIndex = i+1;
replaceFlag=false;
text.setLength(0);
}
}
}
/**
* 复制段落内容和样式
* 测试直接复制样式和内容,相邻的两段分别复制不通的段落,用libreoffice
转PDF时,段落格式会丢失。不复制直接设样式可以解决
* @param sourceParagraph 源段落
* @param targetParagraph 目标段落
*/
public static void copyParagraph(XWPFParagraph sourceParagraph, XWPFParagraph targetParagraph) {
// 复制段落样式
copyParagraphStyle(sourceParagraph, targetParagraph);
// 复制段落中的所有 Run(文本块)及其样式
for (XWPFRun sourceRun : sourceParagraph.getRuns()) {
XWPFRun targetRun = targetParagraph.createRun();
copyRunStyle(sourceRun, targetRun);
if (sourceRun.getText(0) != null) {
targetRun.setText(sourceRun.getText(0));
}
}
}
// 复制段落样式
private static void copyParagraphStyle(XWPFParagraph source, XWPFParagraph target) {
target.setIndentationFirstLine(source.getIndentationFirstLine()); // 首行缩进 (首行缩进2个字符:643)
target.setSpacingAfter(source.getSpacingAfter()); // 段后间距
target.setSpacingBefore(source.getSpacingBefore()); // 段前间距
target.setIndentationLeft(source.getIndentationLeft()); // 左缩进
target.setIndentationRight(source.getIndentationRight()); // 右缩进
target.setSpacingLineRule(source.getSpacingLineRule()); // 行距规则
target.setAlignment(source.getAlignment()); // 对齐方式
}
// 复制 Run 样式
private static void copyRunStyle(XWPFRun source, XWPFRun target) {
target.setFontFamily(source.getFontFamily()); // 字体 ("")
target.setFontSize(source.getFontSizeAsDouble()); // 字号 (三号字体:16.0)
target.setBold(source.isBold()); // 加粗
target.setItalic(source.isItalic()); // 斜体
target.setColor(source.getColor()); // 颜色
target.setUnderline(source.getUnderline()); // 下划线
target.setTextPosition(source.getTextPosition()); // 上标/下标
}
/**
* 在指定占位符段落插入段落
* @param document 文档对象
* @param key 占位符
* @return
*/
public static XWPFParagraph insertParagraph(XWPFDocument document,String key){
XWPFParagraph xwpfParagraph = null;
// 查找指定段落
XWPFParagraph targetParagraph = getXwpfParagraphByKey(document, key);
// 插入段落
if (targetParagraph != null) {
// 获取目标段落的底层 XML 对象
XmlCursor xmlCursor = targetParagraph.getCTP().newCursor();
// 创建新的表格
xwpfParagraph = document.insertNewParagraph(xmlCursor);
// 删除占位符段落
DocxUtil.delXWPFParagraph(document,targetParagraph);
}
return xwpfParagraph;
}
/**
* 获取占位符目标段落
* @param document 文档对象
* @param key 占位符
* @return
*/
public static XWPFParagraph getXwpfParagraphByKey(XWPFDocument document, String key) {
List<XWPFParagraph> paragraphs = document.getParagraphs();
for (XWPFParagraph paragraph : paragraphs) {
String text = paragraph.getText();
if (text != null && text.contains(key)) {
return paragraph;
}
}
return null;
}
/**
* 在指定段落插入表格
* @param document 文档对象
* @param targetParagraph 字段段落
* @return
*/
public static XWPFParagraph insertParagraph(XWPFDocument document,XWPFParagraph targetParagraph){
// 查找指定段落
XWPFParagraph xwpfParagraph = null;
// 插入表格
if (targetParagraph != null) {
// 获取目标段落的底层 XML 对象
XmlCursor xmlCursor = targetParagraph.getCTP().newCursor();
// 创建新的表格
xwpfParagraph = document.insertNewParagraph(xmlCursor);
}
return xwpfParagraph;
}
/**
* 将PDF文件转换为图片
* @param pdfFilePath pdf文件
* @param outputDir 图片输出文件夹
*/
@SneakyThrows
public static void convertPdfToImages(String pdfFilePath, String outputDir, AtomicInteger atomicInteger) {
// 加载 PDF 文件
PDDocument document = PDDocument.load(new File(pdfFilePath));
// 创建 PDF 渲染器
convertPdfToImages(outputDir, atomicInteger, document);
}
/**
* 将PDF文件转换为图片
* @param inputStream pdf文件
* @param outputDir 图片输出文件夹
*/
@SneakyThrows
public static void convertPdfToImages(InputStream inputStream, String outputDir, AtomicInteger atomicInteger) {
// 加载 PDF 文件
PDDocument document = PDDocument.load(inputStream);
convertPdfToImages(outputDir, atomicInteger, document);
}
private static void convertPdfToImages(String outputDir, AtomicInteger atomicInteger, PDDocument document) throws IOException {
// 创建 PDF 渲染器
PDFRenderer pdfRenderer = new PDFRenderer(document);
// 确保输出目录存在
File outputDirFile = new File(outputDir);
if (!outputDirFile.exists()) {
outputDirFile.mkdirs();
}
// 遍历每一页并转换为图片
for (int pageIndex = 0; pageIndex < document.getNumberOfPages(); pageIndex++) {
// 将 PDF 页面渲染为图片
BufferedImage image = pdfRenderer.renderImageWithDPI(pageIndex, 100, ImageType.RGB);
// 旋转图片 90 度(横向变竖向)
image = rotateImage(image);
// 保存裁剪后的图片
File outputImageFile = new File(outputDir, atomicInteger.getAndAdd(1) + ".png");
ImageIO.write(image, "png", outputImageFile);
}
IoUtil.close(document);
}
/**
* 旋转图片 90 度(横向变竖向)
*
* @param image 原始图片
* @return 旋转后的图片
*/
private static BufferedImage rotateImage(BufferedImage image) {
int width = image.getWidth();
int height = image.getHeight();
// 判断是否横向
if (width > height) {
// 创建一个新的 BufferedImage,宽度和高度互换
BufferedImage rotatedImage = new BufferedImage(height, width, image.getType());
// 创建 AffineTransform 对象并设置旋转
AffineTransform transform = new AffineTransform();
transform.translate(height, 0); // 将图片向右平移
transform.rotate(Math.toRadians(90)); // 旋转 90 度
// 绘制旋转后的图片
Graphics2D g2d = rotatedImage.createGraphics();
g2d.setTransform(transform);
g2d.drawImage(image, 0, 0, null);
g2d.dispose();
return rotatedImage;
}else {
return image;
}
}
/**
* 将图片追加到现有的Docx文件末尾
* @param document 文档对象
* @param imageDir 图片文件夹
*/
@SneakyThrows
public static void appendImagesToDocx(XWPFDocument document, String imageDir){
List<File> images = ListUtil.toList(FileUtil.ls(imageDir));
if(CollUtil.isEmpty(images)) return;
images.sort(Comparator.comparingInt(o -> Integer.parseInt(FileUtil.getPrefix(o.getName()))));
for (File imageFile : images) {
// 创建一个段落
XWPFParagraph paragraph = document.createParagraph();
// 设置段落居中对齐
paragraph.setAlignment(ParagraphAlignment.CENTER);
// 创建一个运行
XWPFRun run = paragraph.createRun();
// 图片路径
String imagePath = imageFile.getPath();
// 读取图片文件
InputStream imageStream = FileUtil.getInputStream(imageFile);
// 插入图片并设置大小
run.addPicture(imageStream, XWPFDocument.PICTURE_TYPE_PNG, imagePath,
Units.toEMU(420), Units.toEMU(594));
}
}
}