java 如何利用已经上传保存的word文档转换为html在线预览

import java.io.ByteArrayOutputStream;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStream;

import java.io.OutputStream;

import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.transform.OutputKeys;

import javax.xml.transform.Transformer;

import javax.xml.transform.TransformerException;

import javax.xml.transform.TransformerFactory;

import javax.xml.transform.dom.DOMSource;

import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.converter.PicturesManager;

import org.apache.poi.hwpf.converter.WordToHtmlConverter;

import org.apache.poi.hwpf.usermodel.PictureType;

import org.apache.poi.xwpf.converter.core.BasicURIResolver;

import org.apache.poi.xwpf.converter.core.FileImageExtractor;

import org.apache.poi.xwpf.converter.core.FileURIResolver;

import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;

import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;

import org.apache.poi.xwpf.usermodel.XWPFDocument;

import org.w3c.dom.Document;

/**

* word 转换成html 2017-2-27

*/

public class WordToHtml {

/**

* 将word2003转换为html文件 2017-2-27

* @param wordPath word文件路径

* @param wordName word文件名称无后缀

* @param suffix  word文件后缀

* @throws IOException

* @throws TransformerException

* @throws ParserConfigurationException

*/

public String Word2003ToHtml(String wordPath,String wordName,String suffix) throws IOException, TransformerException, ParserConfigurationException {

String htmlPath = wordPath + File.separator + wordName + "_show" + File.separator;

String htmlName = wordName + ".html";

final String imagePath = htmlPath + "image" + File.separator;

//判断html文件是否存在

File htmlFile = new File(htmlPath + htmlName);

if(htmlFile.exists()){

return htmlFile.getAbsolutePath();

}

//原word文档

final String file = wordPath + File.separator + wordName + suffix;

InputStream input = new FileInputStream(new File(file));

HWPFDocument wordDocument = new HWPFDocument(input);

WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());

//设置图片存放的位置

wordToHtmlConverter.setPicturesManager(new PicturesManager() {

public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {

File imgPath = new File(imagePath);

if(!imgPath.exists()){//图片目录不存在则创建

imgPath.mkdirs();

}

File file = new File(imagePath + suggestedName);

try {

OutputStream os = new FileOutputStream(file);

os.write(content);

os.close();

} catch (FileNotFoundException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

}

//图片在html文件上的路径 相对路径

return "image/" + suggestedName;

}

});

//解析word文档

wordToHtmlConverter.processDocument(wordDocument);

Document htmlDocument = wordToHtmlConverter.getDocument();

//生成html文件上级文件夹

File folder = new File(htmlPath);

if(!folder.exists()){

folder.mkdirs();

}

//生成html文件地址

OutputStream outStream = new FileOutputStream(htmlFile);

DOMSource domSource = new DOMSource(htmlDocument);

StreamResult streamResult = new StreamResult(outStream);

TransformerFactory factory = TransformerFactory.newInstance();

Transformer serializer = factory.newTransformer();

serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");

serializer.setOutputProperty(OutputKeys.INDENT, "yes");

serializer.setOutputProperty(OutputKeys.METHOD, "html");

serializer.transform(domSource, streamResult);

outStream.close();

return htmlFile.getAbsolutePath();

}

/**

* 2007版本word转换成html 2017-2-27

* @param wordPath word文件路径

* @param wordName word文件名称无后缀

* @param suffix  word文件后缀

* @return

* @throws IOException

*/

public String Word2007ToHtml(String wordPath,String wordName,String suffix) throws IOException {

String htmlPath = wordPath + File.separator + wordName + "_show" + File.separator;

String htmlName = wordName + ".html";

String imagePath = htmlPath + "image" + File.separator;

//判断html文件是否存在

File htmlFile = new File(htmlPath + htmlName);

if(htmlFile.exists()){

return htmlFile.getAbsolutePath();

}

//word文件

File wordFile = new File(wordPath + File.separator + wordName + suffix);

// 1) 加载word文档生成 XWPFDocument对象

InputStream in = new FileInputStream(wordFile);

XWPFDocument document = new XWPFDocument(in);

// 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)

File imgFolder = new File(imagePath);

XHTMLOptions options = XHTMLOptions.create();

options.setExtractor(new FileImageExtractor(imgFolder));

//html中图片的路径 相对路径

options.URIResolver(new BasicURIResolver("image"));

options.setIgnoreStylesIfUnused(false);

options.setFragment(true);

// 3) 将 XWPFDocument转换成XHTML

//生成html文件上级文件夹

File folder = new File(htmlPath);

if(!folder.exists()){

folder.mkdirs();

}

OutputStream out = new FileOutputStream(htmlFile);

XHTMLConverter.getInstance().convert(document, out, options);

return htmlFile.getAbsolutePath();

}

}

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容

  • Spring Cloud为开发人员提供了快速构建分布式系统中一些常见模式的工具(例如配置管理,服务发现,断路器,智...
    卡卡罗2017阅读 134,860评论 18 139
  • 1. Java基础部分 基础部分的顺序:基本语法,类相关的语法,内部类的语法,继承相关的语法,异常的语法,线程的语...
    子非鱼_t_阅读 31,742评论 18 399
  • ActiveMQ 即时通讯服务 浅析http://www.cnblogs.com/hoojo/p/active_m...
    bboymonk阅读 1,513评论 0 11
  • 1、 朋友的一个客户是即将毕业的大四男生,大家一起吹牛聊天时,他失落的说,最近真的很迷茫。 大家打趣说,小鲜肉,你...
    千盐万语阅读 3,361评论 55 109
  • 启动页是图片过渡两秒,componentWillUnmount()需要清除定时器
    董董董董董董董董董大笨蛋阅读 1,208评论 0 4