文章概览
- 引入POI类库及注意事项
- 多个Word文档合并
- 替换文档中的占位符,包含段落占位符、表格占位符
- 富文本插入到Word及注意事项
- 给Word生成水印
- 传送门
- 鸣谢
引入POI类库及注意事项
Java操作Word用到的工具类库是基于POI4.1.0版本的,poi官方API,可以使用Google自带的全文翻译,很方便。注意文章中操作的Word都是docx后缀的,即Word2007版本,如果需要操作Word2003版本还需自行转换。
后续将更新从Excel读取表格数据写入到Word,从另一个Word读取模板表格到当前Word,项目代码中每一个功能都提供了test类,你需要拉下代码修改文件目录即可执行,一步到位。
下面开始进入主题,文章中只贴关键代码,全部代码请通过传送门去GitHub拉取,如果感觉对你有帮助请在GitHub上点亮你尊贵的小星星,码砖不易,转载请说明出处,谢谢。
pox.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.corey</groupId>
<artifactId>wordtools</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<!-- !! POI依赖包 -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>4.1.0</version>
</dependency>
<!-- POI依赖包 !!-->
<!--out net begin -->
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j</artifactId>
<version>3.3.6</version>
</dependency>
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-ImportXHTML</artifactId>
<version>3.3.6</version>
</dependency>
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j-export-fo</artifactId>
<version>3.3.6</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.2</version>
</dependency>
<!--out net end -->
<!-- https://mvnrepository.com/artifact/org.springframework/spring-core -->
<!--只是使用到用spring的工具类-->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-core</artifactId>
<version>5.2.1.RELEASE</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
<!-- https://mvnrepository.com/artifact/javax.servlet/javax.servlet-api -->
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>javax.servlet-api</artifactId>
<version>4.0.1</version>
<scope>provided</scope>
</dependency>
</dependencies>
</project>
多个Word文档合并
POI合并文档的基本思路,Word本身是一个xml文件,通过把不同xml的Xmlns去重合并,添加固定的格式标签,然后把不同xml里面的元素都拼接到一起,组成一个新的xml文件,输出成为一个新的Word。更多代码请查看项目的magerword目录。
package magerword;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.xmlbeans.XmlOptions;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
import org.springframework.util.ObjectUtils;
import java.io.*;
import java.util.*;
/**
* @program: 合并多份word文件
* @description:
* @author: corey
* @create: 2020-04-29 19:04
**/
public class MagerUtil {
/**
* 合并多个Word
* @param filepaths
* @throws Exception
*/
public static void mergeDoc(String... filepaths) throws Exception {
// 需要配置导出文件路径 记得替换为自己电脑的路径
OutputStream dest = new FileOutputStream("/Users/corey/Desktop/temp/wordtools/合并文档3.docx");
List<CTBody> ctBodyList = new ArrayList<>();
List<XWPFDocument> srcDocuments = new ArrayList<>();
for (String filepath : filepaths) {
InputStream in = null;
OPCPackage srcPackage = null;
try {
in = new FileInputStream(filepath);
srcPackage = OPCPackage.open(in);
} catch (Exception e) {
e.printStackTrace();
} finally {
closeStream(in);
}
XWPFDocument srcDocument = new XWPFDocument(srcPackage);
CTBody srcBody = srcDocument.getDocument().getBody();
ctBodyList.add(srcBody);
srcDocuments.add(srcDocument);
}
if (!ObjectUtils.isEmpty(ctBodyList)) {
appendBody(ctBodyList);
srcDocuments.get(0).write(dest);
}
}
/**
* 拼接所有的文档元素
* @param ctBodyList
* @throws Exception
*/
private static void appendBody(List<CTBody> ctBodyList) throws Exception {
XmlOptions optionsOuter = new XmlOptions();
optionsOuter.setSaveOuter();
// 所有的xmlns
StringBuffer allAmlns = new StringBuffer();
// 所有文档的内部元素
StringBuffer allElement = new StringBuffer();
ctBodyList.forEach(ct -> {
// 拿到每一个文档的完整xml
String appentString = ct.xmlText();
// 拼接所有的xmlns
allAmlns.append(appentString.substring(appentString.indexOf("xmlns"), appentString.indexOf(">")));
// 拼接所有的内部元素
allElement.append(appentString.substring(appentString.indexOf(">") + 1, appentString.lastIndexOf("</")));
});
// 将xmlns去重
String distinctPrefix = distinctXmlns(allAmlns.toString());
// 合并文档
CTBody makeBody = CTBody.Factory.parse(distinctPrefix + allElement.toString() + "</xml-fragment>");
ctBodyList.get(0).set(makeBody);
}
/**
* 去重合并xml的Xmlns
*
* @param prefix
* @return
*/
public static String distinctXmlns(String prefix) {
int start = prefix.indexOf("xmlns");
int end = prefix.indexOf("xmlns", start + 1);
Set s = new HashSet();
while (end > 0) {
s.add(prefix.substring(start, end));
start = end;
end = prefix.indexOf("xmlns", start + 1);
}
String xmlHead = "<xml-fragment ";
StringBuffer sb = new StringBuffer(xmlHead);
Map<String, String> map = distinctXmlns(s);
for (Map.Entry<String, String> entry : map.entrySet()) {
sb.append(" ");
sb.append(entry.getKey());
sb.append("=");
sb.append(entry.getValue());
}
sb.append(">");
return sb.toString();
}
/**
* xmlns 可能存在xmlns头相同但是指向地址不同的情况
*
* @param set
* @return
*/
public static Map<String, String> distinctXmlns(Set set) {
Map<String, String> map = new HashMap();
Iterator i = set.iterator();
while (i.hasNext()) {
String xmlns = (String) i.next();
map.put(xmlns.substring(0, xmlns.indexOf("=")), xmlns.substring(xmlns.indexOf("=") + 1));
}
return map;
}
/**
* 关闭流
* 这一步可以放到公用工具类中,close的类型可以使用Closeable,这样就可以关闭input和output的流
* @param inputStream
*/
public static void closeStream(InputStream... inputStream) {
for (InputStream i : inputStream) {
if (i != null) {
try {
i.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
<a name='3'>替换文档中的占位符,包含段落占位符、表格占位符
替换占位符的思路,首先需要遍历文档中所有的段落和表格,再去一个个匹配占位符与你需要替换的参数,Word中段落是XWPFParagraph对象,表格是XWPFTable对象。更多代码请查看项目的replacemark目录。
package replacemark;
import org.apache.poi.xwpf.usermodel.*;
import org.springframework.util.StringUtils;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 替换文档中的段落和表格占位符
* @author corey
* @version 1.0
* @date 2020/5/9 9:14 上午
*/
public class ReplaceUtil {
/**
* 替换段落中的占位符
* @param doc 需要替换的文档
* @param params 替换的参数,key=占位符,value=实际值
*/
public static void replaceInPara(XWPFDocument doc, Map<String,Object> params) {
Iterator<XWPFParagraph> iterator = doc.getParagraphsIterator();
XWPFParagraph para;
while (iterator.hasNext()) {
para = iterator.next();
if(!StringUtils.isEmpty(para.getParagraphText())){
replaceInPara(para, params);
}
}
}
/**
* 替换段落中的占位符
* @param para
*/
public static void replaceInPara(XWPFParagraph para, Map<String,Object> params) {
// 获取当前段落的文本
String sourceText = para.getParagraphText();
// 控制变量
boolean replace = false;
for (Map.Entry<String, Object> entry : params.entrySet()) {
String key = entry.getKey();
if(sourceText.indexOf(key)!=-1){
Object value = entry.getValue();
if(value instanceof String){
// 替换文本占位符
sourceText = sourceText.replace(key, value.toString());
replace = true;
}
}
}
if(replace){
// 获取段落中的行数
List<XWPFRun> runList = para.getRuns();
for (int i=runList.size();i>=0;i--){
// 删除之前的行
para.removeRun(i);
}
// 创建一个新的文本并设置为替换后的值 这样操作之后之前文本的样式就没有了,待改进
para.createRun().setText(sourceText);
}
}
/**
* 替换表格中的占位符
* @param doc
* @param params
*/
public static void replaceTable(XWPFDocument doc,Map<String,Object> params){
// 获取文档中所有的表格
Iterator<XWPFTable> iterator = doc.getTablesIterator();
XWPFTable table;
List<XWPFTableRow> rows;
List<XWPFTableCell> cells;
List<XWPFParagraph> paras;
while (iterator.hasNext()) {
table = iterator.next();
if (table.getRows().size() > 1) {
//判断表格是需要替换还是需要插入,判断逻辑有${为替换,
if (matcher(table.getText()).find()) {
rows = table.getRows();
for (XWPFTableRow row : rows) {
cells = row.getTableCells();
for (XWPFTableCell cell : cells) {
paras = cell.getParagraphs();
for (XWPFParagraph para : paras) {
replaceInPara(para, params);
}
}
}
}
}
}
}
/**
* 正则匹配字符串
*
* @param str
* @return
*/
private static Matcher matcher(String str) {
Pattern pattern = Pattern.compile("\\$\\{(.+?)\\}", Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(str);
return matcher;
}
}
富文本转Word及注意事项
富文本转成Word的思路,富文本本身就是一段HTML字符串,可以直接把这段字符串当做一个段落写入到Word中,但这样会丢失HTML样式,所以需要将识别到的HTML标签替换成Word标签,这也是难点所在,所以需要设计一个大而全的样式替换工具,目前笔者的项目中只做H1\H2\H3\段落\表格\img的src是url的图片转换(base64流放在富文本中太大了,不易识别),再提一句这些替换的工具可以设计为责任链模式,笔者也还没有这样做。更多代码在项目的insertword目录。
package insertword;
import org.apache.poi.util.Units;
import org.apache.poi.xwpf.usermodel.*;
import org.apache.xmlbeans.XmlCursor;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.util.ObjectUtils;
import org.springframework.util.StringUtils;
import java.io.*;
/**
* Html工具类
* @author corey
* @version 1.0
* @date 2020/5/5 9:36 下午
*/
public class HtmlUtil {
/**
* 给document添加指定元素
* @param document
*/
public static void addElement(Document document){
if(ObjectUtils.isEmpty(document)){
throw new NullPointerException("不允许为空的对象添加元素");
}
Elements elements = document.getAllElements();
for(Element e:elements){
String attrName = ElementEnum.getValueByCode(e.tag().getName());
if(!StringUtils.isEmpty(attrName)) {
e.attr(CommonConStant.COMMONATTR, attrName);
}
}
}
/**
* 将富文本内容写入到Word
* 因富文本样式种类繁多,不能一一枚举,目前实现了H1、H2、H3、段落、图片、表格枚举
* @param ritchText 富文本内容
* @param doc 需要写入富文本内容的Word 写入图片和表格需要用到
* @param paragraph
*/
public static void resolveHtml(String ritchText, XWPFDocument doc, XWPFParagraph paragraph){
Document document = Jsoup.parseBodyFragment(ritchText, "UTF-8");
try {
// 添加固定元素
HtmlUtil.addElement(document);
Elements elements = document.select("["+CommonConStant.COMMONATTR+"]");
for (Element em : elements) {
XmlCursor xmlCursor = paragraph.getCTP().newCursor();
switch (em.attr(CommonConStant.COMMONATTR)) {
case "title":
break;
case "subtitle":
break;
case "imgurl":
String url = em.attr("src");
InputStream inputStream = new FileInputStream(url);
XWPFParagraph imgurlparagraph = doc.insertNewParagraph(xmlCursor);
//居中
ParagraphStyleUtil.setImageCenter(imgurlparagraph);
imgurlparagraph.createRun().addPicture(inputStream,XWPFDocument.PICTURE_TYPE_PNG,"图片.jpeg", Units.toEMU(200),Units.toEMU(200));
closeStream(inputStream);
break;
case "imgbase64":
break;
case "table":
XWPFTable xwpfTable = doc.insertNewTbl(xmlCursor);
addTable(xwpfTable,em);
// 设置表格居中
ParagraphStyleUtil.setTableLocation(xwpfTable,"center");
// 设置内容居中
ParagraphStyleUtil.setCellLocation(xwpfTable,"CENTER","center");
break;
case "h1":
XWPFParagraph h1paragraph1 = doc.insertNewParagraph(xmlCursor);
XWPFRun xwpfRun_1 = h1paragraph1.createRun();
xwpfRun_1.setText(em.text());
// 设置字体
ParagraphStyleUtil.setTitle(xwpfRun_1, TitleFontEnum.H1.getTitle());
break;
case "h2":
XWPFParagraph h2paragraph = doc.insertNewParagraph(xmlCursor);
XWPFRun xwpfRun_2 = h2paragraph.createRun();
xwpfRun_2.setText(em.text());
// 设置字体
ParagraphStyleUtil.setTitle(xwpfRun_2, TitleFontEnum.H2.getTitle());
break;
case "h3":
XWPFParagraph h3paragraph = doc.insertNewParagraph(xmlCursor);
XWPFRun xwpfRun_3 = h3paragraph.createRun();
xwpfRun_3.setText(em.text());
// 设置字体
ParagraphStyleUtil.setTitle(xwpfRun_3, TitleFontEnum.H3.getTitle());
break;
case "paragraph":
XWPFParagraph paragraphd = doc.insertNewParagraph(xmlCursor);
// 设置段落缩进 4个空格
paragraphd.createRun().setText(" "+em.text());
break;
default:
break;
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 读取txt文件的内容
*
* @param file 想要读取的文件对象
* @return 返回文件内容
*/
public static String txt2String(File file) {
StringBuilder result = new StringBuilder();
try {
BufferedReader br = new BufferedReader(new FileReader(file));//构造一个BufferedReader类来读取文件
String s = null;
while ((s = br.readLine()) != null) {//使用readLine方法,一次读一行
result.append(System.lineSeparator() + s);
}
br.close();
} catch (Exception e) {
e.printStackTrace();
}
return result.toString();
}
/**
* 将富文本的表格转换为Word里面的表格
*/
private static void addTable(XWPFTable xwpfTable,Element table) {
Elements trs = table.getElementsByTag("tr");
// XWPFTableRow 第0行特殊处理
int rownum = 0;
for (Element tr : trs) {
addTableTr(xwpfTable,tr,rownum);
rownum++;
}
}
/**
* 将元素里面的tr 提取到 xwpfTabel
*/
private static void addTableTr(XWPFTable xwpfTable,Element tr,int rownum) {
Elements tds = tr.getElementsByTag("th").isEmpty() ? tr.getElementsByTag("td") : tr.getElementsByTag("th");
XWPFTableRow row_1 = null;
for (int i = 0, j = tds.size(); i < j; i++) {
if(0==rownum){
// XWPFTableRow 第0行特殊处理,
XWPFTableRow row_0 = xwpfTable.getRow(0);
if(i==0){
row_0.getCell(0).setText(tds.get(i).text());
}else{
row_0.addNewTableCell().setText(tds.get(i).text());
}
}else{
if(i==0) {
// 换行需要创建一个新行
row_1 = xwpfTable.createRow();
row_1.getCell(i).setText(tds.get(i).text());
}else {
row_1.getCell(i).setText(tds.get(i).text());
}
}
}
}
/**
* 关闭输入流
*
* @param closeables
*/
public static void closeStream(Closeable... closeables) {
for (Closeable c: closeables) {
if (c != null) {
try {
c.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
给Word生成水印
Word添加水印的思路,利用XWPFHeader对象创建页眉,给页眉添加文字,设置字体、大小、颜色、旋转角度即可。代码在项目的insertword目录
package insertword;
import com.microsoft.schemas.office.office.CTLock;
import com.microsoft.schemas.vml.*;
import org.apache.poi.wp.usermodel.HeaderFooterType;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFHeader;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
import java.util.stream.Stream;
/**
* @desc 添加水印
* @author corey
* @version 1.0
* @date 2020/5/5 10:07 下午
*/
public class WatermarkUtil {
// word字体
private static final String fontName = "宋体";
// 字体大小
private static final String fontSize = "0.2pt";
// 字体颜色
private static final String fontColor = "#d0d0d0";
// 一个字平均长度,单位pt,用于:计算文本占用的长度(文本总个数*单字长度)
private static final Integer widthPerWord = 10;
// 与顶部的间距
private static Integer styleTop = 0;
// 文本旋转角度
private static final String styleRotation = "45";
/**
* 给文档添加水印
* 此方法可以单独使用
* @param doc
* @param customText
*/
public static void waterMarkDocXDocument(XWPFDocument doc,String customText){
// 把整页都打上水印
for (int lineIndex = -5; lineIndex < 20; lineIndex++) {
styleTop = 100*lineIndex;
waterMarkDocXDocument_0(doc,customText);
}
}
/**
* 为文档添加水印
* @param doc 需要被处理的docx文档对象
* @param customText 需要添加的水印文字
*/
public static void waterMarkDocXDocument_0(XWPFDocument doc,String customText) {
// 水印文字之间使用8个空格分隔
customText = customText + repeatString(" ", 8);
// 一行水印重复水印文字次数
customText = repeatString(customText, 10);
// 如果之前已经创建过 DEFAULT 的Header,将会复用
XWPFHeader header = doc.createHeader(HeaderFooterType.DEFAULT);
int size = header.getParagraphs().size();
if (size == 0) {
header.createParagraph();
}
CTP ctp = header.getParagraphArray(0).getCTP();
byte[] rsidr = doc.getDocument().getBody().getPArray(0).getRsidR();
byte[] rsidrdefault = doc.getDocument().getBody().getPArray(0).getRsidRDefault();
ctp.setRsidP(rsidr);
ctp.setRsidRDefault(rsidrdefault);
CTPPr ppr = ctp.addNewPPr();
ppr.addNewPStyle().setVal("Header");
// 开始加水印
CTR ctr = ctp.addNewR();
CTRPr ctrpr = ctr.addNewRPr();
ctrpr.addNewNoProof();
CTGroup group = CTGroup.Factory.newInstance();
CTShapetype shapetype = group.addNewShapetype();
CTTextPath shapeTypeTextPath = shapetype.addNewTextpath();
shapeTypeTextPath.setOn(STTrueFalse.T);
shapeTypeTextPath.setFitshape(STTrueFalse.T);
CTLock lock = shapetype.addNewLock();
lock.setExt(STExt.VIEW);
CTShape shape = group.addNewShape();
shape.setId("PowerPlusWaterMarkObject");
shape.setSpid("_x0000_s102");
shape.setType("#_x0000_t136");
// 设置形状样式(旋转,位置,相对路径等参数)
shape.setStyle(getShapeStyle(customText));
shape.setFillcolor(fontColor);
// 字体设置为实心
shape.setStroked(STTrueFalse.FALSE);
// 绘制文本的路径
CTTextPath shapeTextPath = shape.addNewTextpath();
// 设置文本字体与大小
shapeTextPath.setStyle("font-family:" + fontName + ";font-size:" + fontSize);
shapeTextPath.setString(customText);
CTPicture pict = ctr.addNewPict();
pict.set(group);
}
/**
* 构建Shape的样式参数
* @param customText
* @return
*/
private static String getShapeStyle(String customText) {
StringBuilder sb = new StringBuilder();
// 文本path绘制的定位方式
sb.append("position: ").append("absolute");
// 计算文本占用的长度(文本总个数*单字长度)
sb.append(";width: ").append(customText.length() * widthPerWord).append("pt");
// 字体高度
sb.append(";height: ").append("20pt");
sb.append(";z-index: ").append("-251654144");
sb.append(";mso-wrap-edited: ").append("f");
// 设置水印的间隔,这是一个大坑,不能用top,必须要margin-top。
sb.append(";margin-top: ").append(styleTop);
sb.append(";mso-position-horizontal-relative: ").append("page");
sb.append(";mso-position-vertical-relative: ").append("page");
sb.append(";mso-position-vertical: ").append("left");
sb.append(";mso-position-horizontal: ").append("center");
sb.append(";rotation: ").append(styleRotation);
return sb.toString();
}
/**
* 将指定的字符串重复repeats次.
*/
private static String repeatString(String pattern, int repeats) {
StringBuilder buffer = new StringBuilder(pattern.length() * repeats);
Stream.generate(() -> pattern).limit(repeats).forEach(buffer::append);
return new String(buffer);
}
}
传送门
鸣谢
感谢项目中同事对Word操作提出的改善意见,让本代码得以顺利交付运行。感谢所有提供了源代码的博主。
感谢各位猿佬百忙之中抽空阅读、点赞、收藏,记得帮忙在GitHub上点亮你尊贵的小星星哦。