Word文档内容处理JAVA工具

一、 概述

最近做了一个word文档格式的报告输出需求,涉及比较多Word文档内容格式操作,例如插入段落、插入表格、删掉段落、表格内容填充、表格合并单元格、段落复制、内容占位符替换、pdf转图片插入文档等等。个人总结了下工具类,也分享一下

二、代码

2.1 引入相关依赖

        <dependency>
          <groupId>cn.hutool</groupId>
          <artifactId>hutool-all</artifactId>
          <version>5.8.35</version>
        </dependency>
        <!-- Apache POI for Docx -->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>5.4.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml-schemas</artifactId>
            <version>4.1.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.xmlbeans</groupId>
            <artifactId>xmlbeans</artifactId>
            <version>5.3.0</version>
        </dependency>
        <!-- PDF Renderer for PDF to Image -->
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox</artifactId>
            <version>2.0.27</version>
        </dependency>

2.1代码

import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.collection.ListUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.io.IoUtil;
import cn.hutool.core.lang.Dict;
import lombok.SneakyThrows;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.poi.util.Units;
import org.apache.poi.xwpf.usermodel.*;
import org.apache.xmlbeans.XmlCursor;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
import org.springframework.util.ObjectUtils;

import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.geom.AffineTransform;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class DocxUtil {

    // 用于匹配占位符 ${key}
    private final static Pattern pattern = Pattern.compile("\\$\\{[^}]+\\}");

    /**
     * 获取XWPFDocument对象
     * @param inputStream 输入流
     * @return
     */
    @SneakyThrows
    public static XWPFDocument getXWPFDocument(InputStream inputStream){
        return  new XWPFDocument(inputStream);
    }

    /**
     * 写出输出流
     * @param document 文档对象
     * @param outputStream 输出流
     */
    @SneakyThrows
    public static void writeOut(XWPFDocument document, OutputStream outputStream){
        document.write(outputStream);
        IoUtil.close(outputStream);
    }

    /**
     * 在指定占位符段落插入表格
     * @param document 文档对象
     * @param key 占位符
     * @return
     */
    public static XWPFTable insertTbl(XWPFDocument document,String key){
        XWPFTable xwpfTable = null;
        // 查找指定段落
        XWPFParagraph targetParagraph = getXwpfParagraphByKey(document, key);

        // 插入表格
        if (targetParagraph != null) {
            // 获取目标段落的底层 XML 对象
            XmlCursor xmlCursor = targetParagraph.getCTP().newCursor();
            // 创建新的表格
            xwpfTable = document.insertNewTbl(xmlCursor);// 在段落后插入表格

            // 删除占位符段落
            delXWPFParagraph(document,targetParagraph);
        }
    /**
      * 注意,表格一定要设置列宽,不然合并单元格后,libreoffice转PDF,合并单元格会乱
      */
    // DocxUtil.setColumnWidth(xwpfTable, 0, 1000); // 第一列宽度
        return xwpfTable;
    }


    /**
     * 删除段落
     * @param document 文档对象
     * @param targetParagraph 段落对象
     */
    public static void delXWPFParagraph(XWPFDocument document,XWPFParagraph targetParagraph){
        document.removeBodyElement(document.getPosOfParagraph(targetParagraph));
    }

    /**
     * 文档替换占位符,替换段落和表格占位符 占位符:${key}
     * @param document 文档对象
     * @param replacements 占位符填充值Map对象 示例:{"${name}":"小明","${age}":"18"}
     * @param isReplaceTable 是否替换表格的占位符
     */
    public static void documentReplacePlaceholders(XWPFDocument document, Dict replacements,boolean isReplaceTable){
        // 替换段落中的占位符
        List<XWPFParagraph> paragraphs = document.getParagraphs();
        replaceXWPFParagraph(replacements,paragraphs);

        // 替换表格中的占位符
        if (isReplaceTable) {
            for (XWPFTable tbl : document.getTables()) {
                for (XWPFTableRow row : tbl.getRows()) {
                    for (XWPFTableCell cell : row.getTableCells()) {
                        List<XWPFParagraph> paragraphs1 = cell.getParagraphs();
                        replaceXWPFParagraph(replacements,paragraphs1);
                    }
                }
            }
        }

    }

    /**
     * 设置表格标题
     * @param table 表格对象
     * @param fontSize 字体大小
     * @param head 标题 示例:["序号","名字","年龄"]
     */
    public static void setTableHead(XWPFTable table,int fontSize,String... head){
        if(ObjectUtils.isEmpty(head)) return;

        XWPFTableRow rowOne = table.getRow(0);
        for(int i=0;i<head.length;i++){
            XWPFTableCell cell = rowOne.getCell(i);
            if(null == cell){
                cell = rowOne.createCell();
            }
            // 设置表头内容
            setCellText(cell, head[i], fontSize,false,ParagraphAlignment.CENTER,"华文楷体");
        }
    }

    /**
     * 填充表格内容
     * @param table 表格对象
     * @param heads 标题key集合 示例 {"no","name","age"}
     * @param contextMapList 内容数据 示例: [{"no":"1","name":"小明","age":"18"}]
     * @param colIndex 需要居中显示的列
     */
    public static void setTableContext(XWPFTable table,int fontSize, List<String> heads, List<Map<String,Object>> contextMapList,List<Integer> colIndex){
        if(CollUtil.isEmpty(contextMapList)) return;

        for (int i =1;i<=contextMapList.size();i++){
            // 获取当前行
            Map<String, Object> map = contextMapList.get(i - 1);
            XWPFTableRow tableRow = table.getRow(i);
            if(null == tableRow){
                tableRow = table.createRow();
            }
            for (int j=0;j<heads.size();j++){
                XWPFTableCell cell = tableRow.getCell(j);
                if(null == cell){
                    cell = tableRow.createCell();
                }
                Object obj = map.get(heads.get(j));
                String text = null == obj ? "":obj.toString();
                if (colIndex.contains(j)) {
                    DocxUtil.setCellText(cell, text, fontSize,false,ParagraphAlignment.CENTER,"宋体");
                }else {
                    DocxUtil.setCellText(cell, text, fontSize,false,ParagraphAlignment.LEFT,"宋体");
                }
            }
        }
    }

    /**
     * 表格单元格垂直合并
     * @param table 表格
     * @param col 列下标
     * @param rowStart 开始行下标
     * @param rowEnd 结束行下标
     */
    public static void mergeCellV(XWPFTable table,int col,int rowStart,int rowEnd) {
        List<XWPFTableCell> cells = new ArrayList<>();
        for (int i=rowStart;i<=rowEnd;i++){
            cells.add(table.getRow(i).getCell(col));
        }
        mergeCell(cells.toArray(XWPFTableCell[]::new));
    }

    /**
     * 合并单元格
     * @param cells 单元格对象
     */
    public static void mergeCell(XWPFTableCell... cells) {
        if(ObjectUtils.isEmpty(cells) || cells.length < 2) return;
        for (int i=0;i<cells.length;i++){
            XWPFTableCell cell = cells[i];
            CTTcPr ctTcPr = cell.getCTTc().getTcPr();
            if (ctTcPr == null) {
                ctTcPr = cell.getCTTc().addNewTcPr();
            }
            CTVMerge vMerge = ctTcPr.addNewVMerge();
            if (i==0) {
                vMerge.setVal(STMerge.RESTART);
            }else {
                vMerge.setVal(STMerge.CONTINUE);
            }
        }
    }

    /**
     * 设置列宽
     * @param table 表格对象
     * @param colIndex 列下标
     * @param width 宽度值
     */
    public static void setColumnWidth(XWPFTable table, int colIndex, int width) {
        for (int i = 0; i < table.getNumberOfRows(); i++) {
            XWPFTableRow row = table.getRow(i);
            XWPFTableCell cell = row.getCell(colIndex);
            if (cell != null) {
                CTTc cttc = cell.getCTTc();
                CTTcPr tcpr = cttc.addNewTcPr();
                CTTblWidth tblWidth = tcpr.addNewTcW();
                tblWidth.setType(STTblWidth.DXA);
                tblWidth.setW(BigInteger.valueOf(width));
            }
        }
    }

    /**
     * 设置单元格垂直对齐
     * @param cell 单元格对象
     * @param enumValue 枚举值
     */
    public static void setAlignment(XWPFTableCell cell,
                                     org.openxmlformats.schemas.wordprocessingml.x2006.main.STVerticalJc.Enum enumValue
    ) {
        if(null == enumValue) enumValue=STVerticalJc.CENTER;
        CTTcPr ctTcPrA = cell.getCTTc().getTcPr();
        if (ctTcPrA == null) {
            ctTcPrA = cell.getCTTc().addNewTcPr();
        }
        CTVerticalJc verticalAlignmentA1 = ctTcPrA.addNewVAlign();
        verticalAlignmentA1.setVal(enumValue);

    }

    /**
     * 表格设置单元格内容
     * @param cell 单元格对象
     * @param text 文本
     * @param fontSize 字体大小
     * @param isBold 是否加粗
     * @param alignment 段落的水平对齐方式
     * @param fontFamily 字体
     */
    public static void setCellText(XWPFTableCell cell, String text, int fontSize, boolean isBold,ParagraphAlignment alignment,String fontFamily) {

        // 清空默认行
        cell.removeParagraph(0);
        // 创建段落
        XWPFParagraph paragraph = cell.addParagraph();
        // 创建运行
        XWPFRun run = paragraph.createRun();
        // 设置文本内容
        run.setText(text);
        // 设置字体为宋体
        run.setFontFamily(fontFamily);
        // 设置字体大小
        run.setFontSize(fontSize);
        // 是否加粗
        run.setBold(isBold);
        // 设置段落的水平对齐方式
        paragraph.setAlignment(alignment);
        // 设置单元格垂直对齐
        setAlignment(cell,STVerticalJc.CENTER);
    }

    /**
     * 段落替换占位符 占位符:${key}
     * @param replacements 占位符填充值Map对象 示例:{"${name}":"小明","${age}":"18"}
     * @param paragraphs 段落对象
     */
    private static void replaceXWPFParagraph(Dict replacements, List<XWPFParagraph> paragraphs) {
        if(CollUtil.isEmpty(paragraphs)) return ;
        // 遍历段落
        for (XWPFParagraph paragraph : paragraphs) {
            documentReplacePlaceholders(paragraph,replacements);
        }
    }

    /**
     * 段落替换占位符 占位符:${key}
     * @param paragraph 段落
     * @param replacements 占位符填充值Map对象 示例:{"${name}":"小明","${age}":"18"}
     */
    public static void documentReplacePlaceholders(XWPFParagraph paragraph,Dict replacements) {
        // 获取段落文本片段
        List<XWPFRun> paragraphRuns = paragraph.getRuns();
        if(CollUtil.isEmpty(paragraphRuns)) return;
        StringBuilder text = new StringBuilder();
        boolean replaceFlag = false;
        int startIndex=0;
        for (int i=0;i<paragraphRuns.size();i++){
            XWPFRun xwpfRun = paragraphRuns.get(i);
            String runText = xwpfRun.getText(0);
            if(null == runText) continue;
            text.append(runText);
            String value = text.toString();
            if(!value.contains("$")){
                startIndex=i+1;
                text.setLength(0);
                continue;
            }
            // 判断是否包含占位符
            Matcher matcher = pattern.matcher(text);
            while (matcher.find()) {
                replaceFlag = true;
                String key = matcher.group();
                Object obj = replacements.get(key);
                String objStr = null ==obj?key:obj.toString();
                String newValue = text.toString().replace(key, objStr);
                text.replace(0, text.length(), newValue);
            }
            if(replaceFlag){
                if (startIndex != i) {
                    for (int j=startIndex+1;j<=i;j++){
                        paragraphRuns.get(j).setText("",0);
                    }
                    paragraphRuns.get(startIndex).setText(text.toString(),0);
                }else {
                    paragraphRuns.get(startIndex).setText(text.toString(),0);
                }

                startIndex = i+1;
                replaceFlag=false;
                text.setLength(0);
            }
        }
    }


    /**
     * 复制段落内容和样式
     * 测试直接复制样式和内容,相邻的两段分别复制不通的段落,用libreoffice
转PDF时,段落格式会丢失。不复制直接设样式可以解决
     * @param sourceParagraph 源段落
     * @param targetParagraph 目标段落
     */
    public static void copyParagraph(XWPFParagraph sourceParagraph, XWPFParagraph targetParagraph) {
        // 复制段落样式
        copyParagraphStyle(sourceParagraph, targetParagraph);

        // 复制段落中的所有 Run(文本块)及其样式
        for (XWPFRun sourceRun : sourceParagraph.getRuns()) {
            XWPFRun targetRun = targetParagraph.createRun();
            copyRunStyle(sourceRun, targetRun);
            if (sourceRun.getText(0) != null) {
                targetRun.setText(sourceRun.getText(0));
            }
        }
    }

    // 复制段落样式
    private static void copyParagraphStyle(XWPFParagraph source, XWPFParagraph target) {
        target.setIndentationFirstLine(source.getIndentationFirstLine()); // 首行缩进 (首行缩进2个字符:643)
        target.setSpacingAfter(source.getSpacingAfter()); // 段后间距
        target.setSpacingBefore(source.getSpacingBefore()); // 段前间距
        target.setIndentationLeft(source.getIndentationLeft()); // 左缩进
        target.setIndentationRight(source.getIndentationRight()); // 右缩进
        target.setSpacingLineRule(source.getSpacingLineRule()); // 行距规则
        target.setAlignment(source.getAlignment()); // 对齐方式
    }

    // 复制 Run 样式
    private static void copyRunStyle(XWPFRun source, XWPFRun target) {
        target.setFontFamily(source.getFontFamily()); // 字体 ("")
        target.setFontSize(source.getFontSizeAsDouble()); // 字号 (三号字体:16.0)
        target.setBold(source.isBold()); // 加粗
        target.setItalic(source.isItalic()); // 斜体
        target.setColor(source.getColor()); // 颜色
        target.setUnderline(source.getUnderline()); // 下划线
        target.setTextPosition(source.getTextPosition()); // 上标/下标
    }

    /**
     * 在指定占位符段落插入段落
     * @param document 文档对象
     * @param key 占位符
     * @return
     */
    public static XWPFParagraph insertParagraph(XWPFDocument document,String key){
        XWPFParagraph xwpfParagraph = null;
        // 查找指定段落
        XWPFParagraph targetParagraph = getXwpfParagraphByKey(document, key);

        // 插入段落
        if (targetParagraph != null) {
            // 获取目标段落的底层 XML 对象
            XmlCursor xmlCursor = targetParagraph.getCTP().newCursor();
            // 创建新的表格
            xwpfParagraph = document.insertNewParagraph(xmlCursor);

            // 删除占位符段落
            DocxUtil.delXWPFParagraph(document,targetParagraph);
        }
        return xwpfParagraph;
    }

    /**
     * 获取占位符目标段落
     * @param document 文档对象
     * @param key 占位符
     * @return
     */
    public static XWPFParagraph getXwpfParagraphByKey(XWPFDocument document, String key) {
        List<XWPFParagraph> paragraphs = document.getParagraphs();
        for (XWPFParagraph paragraph : paragraphs) {
            String text = paragraph.getText();
            if (text != null && text.contains(key)) {
                return paragraph;
            }
        }
        return null;
    }

    /**
     * 在指定段落插入表格
     * @param document 文档对象
     * @param targetParagraph 字段段落
     * @return
     */
    public static XWPFParagraph insertParagraph(XWPFDocument document,XWPFParagraph targetParagraph){
        // 查找指定段落
        XWPFParagraph xwpfParagraph = null;

        // 插入表格
        if (targetParagraph != null) {
            // 获取目标段落的底层 XML 对象
            XmlCursor xmlCursor = targetParagraph.getCTP().newCursor();
            // 创建新的表格
            xwpfParagraph = document.insertNewParagraph(xmlCursor);
        }
        return xwpfParagraph;
    }

    /**
     * 将PDF文件转换为图片
     * @param pdfFilePath pdf文件
     * @param outputDir 图片输出文件夹
     */
    @SneakyThrows
    public static void convertPdfToImages(String pdfFilePath, String outputDir, AtomicInteger atomicInteger) {
        // 加载 PDF 文件
        PDDocument document = PDDocument.load(new File(pdfFilePath));

        // 创建 PDF 渲染器
        convertPdfToImages(outputDir, atomicInteger, document);
    }

    /**
     * 将PDF文件转换为图片
     * @param inputStream pdf文件
     * @param outputDir 图片输出文件夹
     */
    @SneakyThrows
    public static void convertPdfToImages(InputStream inputStream, String outputDir, AtomicInteger atomicInteger) {
        // 加载 PDF 文件
        PDDocument document = PDDocument.load(inputStream);

        convertPdfToImages(outputDir, atomicInteger, document);
    }

    private static void convertPdfToImages(String outputDir, AtomicInteger atomicInteger, PDDocument document) throws IOException {
        // 创建 PDF 渲染器
        PDFRenderer pdfRenderer = new PDFRenderer(document);

        // 确保输出目录存在
        File outputDirFile = new File(outputDir);
        if (!outputDirFile.exists()) {
            outputDirFile.mkdirs();
        }

        // 遍历每一页并转换为图片
        for (int pageIndex = 0; pageIndex < document.getNumberOfPages(); pageIndex++) {
            // 将 PDF 页面渲染为图片
            BufferedImage image = pdfRenderer.renderImageWithDPI(pageIndex, 100, ImageType.RGB);

            // 旋转图片 90 度(横向变竖向)
            image = rotateImage(image);

            // 保存裁剪后的图片
            File outputImageFile = new File(outputDir, atomicInteger.getAndAdd(1) + ".png");
            ImageIO.write(image, "png", outputImageFile);
        }
        IoUtil.close(document);
    }

    /**
     * 旋转图片 90 度(横向变竖向)
     *
     * @param image 原始图片
     * @return 旋转后的图片
     */
    private static BufferedImage rotateImage(BufferedImage image) {
        int width = image.getWidth();
        int height = image.getHeight();

        // 判断是否横向
        if (width > height) {
            // 创建一个新的 BufferedImage,宽度和高度互换
            BufferedImage rotatedImage = new BufferedImage(height, width, image.getType());

            // 创建 AffineTransform 对象并设置旋转
            AffineTransform transform = new AffineTransform();
            transform.translate(height, 0); // 将图片向右平移
            transform.rotate(Math.toRadians(90)); // 旋转 90 度

            // 绘制旋转后的图片
            Graphics2D g2d = rotatedImage.createGraphics();
            g2d.setTransform(transform);
            g2d.drawImage(image, 0, 0, null);
            g2d.dispose();

            return rotatedImage;
        }else {
            return image;
        }
    }

    /**
     * 将图片追加到现有的Docx文件末尾
     * @param document 文档对象
     * @param imageDir 图片文件夹
     */
    @SneakyThrows
    public static void appendImagesToDocx(XWPFDocument document, String imageDir){

        List<File> images = ListUtil.toList(FileUtil.ls(imageDir));

        if(CollUtil.isEmpty(images)) return;

        images.sort(Comparator.comparingInt(o -> Integer.parseInt(FileUtil.getPrefix(o.getName()))));
        for (File imageFile : images) {
            // 创建一个段落
            XWPFParagraph paragraph = document.createParagraph();

            // 设置段落居中对齐
            paragraph.setAlignment(ParagraphAlignment.CENTER);

            // 创建一个运行
            XWPFRun run = paragraph.createRun();

            // 图片路径
            String imagePath = imageFile.getPath();

            // 读取图片文件
            InputStream imageStream = FileUtil.getInputStream(imageFile);


            // 插入图片并设置大小
            run.addPicture(imageStream, XWPFDocument.PICTURE_TYPE_PNG, imagePath,
                    Units.toEMU(420), Units.toEMU(594));
        }
    }
}

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容