poi操作word(doc)转html 包含公式图片,

接到一个需求用word文档做一个批量导入,我当时用了两大种方法,

第一：jacob技术,确实挺好用不管是docx和doc互相转换还是转html都可以,docx的公式也能解决,但是有一个致命的问题只能在windows上使用,liunx的直接可以略过了，应为只能在windows上使用的缘故,只大致写了一些,需要jacob包和一个dll文件,
dll文件放在system32这个目录下面，这也是为什么只能在windows上使用的原因，大家可以取官网下载,也可以留下邮箱发你

import com.jacob.activeX.ActiveXComponent;
import com.jacob.com.Dispatch;
import com.jacob.com.Variant;

/**
 * @author shihao
 * @Title: JacobUtil
 * @ProjectName Second-order-center
 * @Description:
 * @date Created in
 * @Version: $
 */
public class JacobUtil {
    public static final int WORD_HTML = 8;



    public static final int WORD_TXT = 7;



    public static final int EXCEL_HTML = 44;



    /**

     * WORD转HTML

     *

     * @param docfile

     *            WORD文件全路径

     * @param htmlfile

     *            转换后HTML存放路径

     */

    public void wordToHtml(String docfile, String htmlfile) {

        ActiveXComponent app = new ActiveXComponent("Word.Application"); // 启动word

        try {

            // 设置word不可见

            app.setProperty("Visible", new Variant(false));

            //获得documents对象

            Dispatch docs = (Dispatch) app.getProperty("Documents")

                    .toDispatch();

            //打开文件

            Dispatch doc = Dispatch.invoke(

                    docs,

                    "Open",

                    Dispatch.Method,

                    new Object[] { docfile, new Variant(false),

                            new Variant(true) }, new int[1]).toDispatch();

            //保存新的文件

            Dispatch.invoke(doc, "SaveAs", Dispatch.Method, new Object[] {

                    htmlfile, new Variant(WORD_HTML) }, new int[1]);

            Variant f = new Variant(false);

            Dispatch.call(doc, "Close", f);

        } catch (Exception e) {

            e.printStackTrace();

        } finally {

            app.invoke("Quit", new Variant[] {});

        }

    }
}

第二：poi操作 poi读写word不同版本是不一样的，我当时是docx和doc

docx版本使用XWPFDocument

import com.inxedu.wxos.util.UploadPropertyUtil;
import net.arnx.wmf2svg.gdi.svg.SvgGdi;
import net.arnx.wmf2svg.gdi.svg.SvgGdiException;
import net.arnx.wmf2svg.gdi.wmf.WmfParseException;
import net.arnx.wmf2svg.gdi.wmf.WmfParser;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.FileURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.*;
import org.jsoup.nodes.Document;
import org.openxmlformats.schemas.officeDocument.x2006.math.CTOMath;

import java.io.*;
import java.util.List;
import java.util.UUID;
import java.util.zip.GZIPOutputStream;

/**
 * @author shihao
 * @Title: Word
 * @ProjectName Second-order-center
 * @Description:
 * @date Created in
 * @Version: $
 */
public class Word {
   // public static UploadPropertyUtil propertyUtil = UploadPropertyUtil.getInstance("application-project");
    public String html(String paths) throws IOException, WmfParseException, SvgGdiException {
       // String[] sourceArray = paths.split("/");
       // StringBuilder p = new StringBuilder();
       // for (int i=0; i<sourceArray.length-1;i++){
         //   p.append(sourceArray[i]);
         //   p.append("/");
      //  }
       // String path = propertyUtil.getProperty("project.file.root")+String.valueOf(p);
      //  String fileName = sourceArray[sourceArray.length-1];
       // final String filepath = path+fileName;
       // String htmlName = UUID.randomUUID().toString().replaceAll("-", "")+".html";
      //  final String file = filepath;
        File f = new File(paths);
        if (!f.exists()) {
            System.out.println("Sorry File does not Exists!");
            return "Sorry File does not Exists!";
        } else {
            if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {

                // 1) 加载word文档生成 XWPFDocument对象
                InputStream in = new FileInputStream(f);
                XWPFDocument document = new XWPFDocument(in);

                // 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)
                File imageFolderFile = new File(path);
                XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFolderFile));
                options.setExtractor(new FileImageExtractor(imageFolderFile));
                options.setIgnoreStylesIfUnused(false);
                options.setFragment(true);
                // 3) 将 XWPFDocument转换成XHTML
                OutputStream out = new FileOutputStream(new File(path + htmlName));
                XHTMLConverter.getInstance().convert(document, out, options);

                return path+htmlName;
            } else {
                System.out.println("Enter only MS Office 2007+ files");
                return "Enter only MS Office 2007+ files";
            }
        }
    }
}

这个方法虽然可以将文字和图片转html，应该是xhtml下面说读取xhtml,我也能解析出来,但是客户那边又说了,我们要公式也要读取,找了很多资料docx的没找到好的方法,只好又换方法,下面先贴出读取xhtml的方法，如果乱码注意编码

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Entities;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;

/**
 * @author shihao
 * @Title: XhtmltoHtml
 * @ProjectName Second-order-center
 * @Description:
 * @date Created in
 * @Version: $
 */
public class XhtmltoHtml {
    public String html2xhtml(String html) {
        Document doc = Jsoup.parse(html);

        doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml).escapeMode(Entities.EscapeMode.xhtml);

        return doc.html();

    }
    public String html(String path) throws IOException {
        File file = new File(path);

        FileInputStream input = new FileInputStream(file);

        int size = input.available();

        byte[] buff = new byte[size];

        input.read(buff);

        input.close();

        String html = new String(buff, "utf-8");

        System.out.println("============html===================");

        System.out.println(html);

        XhtmltoHtml xhtmltoHtml = new XhtmltoHtml();
        String xhtml = xhtmltoHtml.html2xhtml(html);

        System.out.println("============xhtml===================");

        System.out.println(xhtml);
        return  xhtml;
    }

}

没办法客户就是上帝,换吧,这次是读取doc文档的，话不多说直接上码

import com.inxedu.wxos.util.UploadPropertyUtil;

public class MainTest {
    public static UploadPropertyUtil propertyUtil = UploadPropertyUtil.getInstance("application-project");
    public static void main(String[] args) throws Exception{
        String FilePath = "C:\\Users\\MACHENIKE\\Desktop\\Batch.doc";
        String path = "C:\\Users\\MACHENIKE\\Desktop\\batt";
        ODocument odoc = new ODocument(FilePath);
        writeHtml(odoc,path);
//      writeXml(odoc,path);
        System.out.println("OK!");
    }
    
    public static void writeHtml(ODocument doc,String path){
        OTable otable = new OTable(doc.getDocument());
        String htmlData = otable.replaceHtmlTable(doc.readDoc());
        OImage oimage = new OImage(doc.getDocument(),path);
        htmlData = oimage.replaceImg(htmlData);
        OPrint oprint = new OPrint();
        oprint.printHtml(htmlData, path,"batch.html");
        
    }
    public static void writeXml(ODocument doc,String path){
        OTable otable = new OTable(doc.getDocument());
        String xmlData = otable.replaceXmlTable(doc.readDoc());
        OImage oimage = new OImage(doc.getDocument(),path);
        xmlData = oimage.replaceImg(xmlData);
        OPrint oprint = new OPrint();
        oprint.printXml(xmlData, path);
    }

}

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Range;

import java.io.File;
import java.io.FileInputStream;
import java.util.LinkedList;
import java.util.List;

public class ODocument {
    private HWPFDocument doc;
    private String path;
    private List<Integer> fontSize;
    private List<Integer> color;
    
    public ODocument(String path){
        this.doc = null;
        fontSize = new LinkedList<>();
        color = new LinkedList<>();
        this.path = path;
        this.loadDoc();
    }
    
    private void loadDoc(){
        try{
            FileInputStream in=new FileInputStream(new File(this.path));
            this.doc = new HWPFDocument(in);
            in.close();
        }
        catch(Exception e){
            System.out.println(e.getMessage());
        }
    }
    
    public HWPFDocument getDocument(){  
        return this.doc;
    }
    
    public String readDoc(){
        String Data = "";
        int length = doc.characterLength();
        String str="";
        char ch;
        int len;
        for (int i = 0; i < length - 1; i++) {
              Range r = new Range(i,i+1,doc);
              CharacterRun cr = r.getCharacterRun(0);
              str = cr.text();
              Data = Data+str;
              len = cr.text().length();
              color.add(cr.getColor());
              fontSize.add(cr.getFontSize());
              while(len-->0) ch = str.charAt(len);
        }
        System.out.println(Data);
        return DataPretreatment(Data);
    }
    
    private String DataPretreatment(String Data){
        Data = Data.replaceAll("(\u0013.{1,30}\u0015)+","\u0002");
        Data = Data.replaceAll("\\b.+\u0007","@TABLE@");
        return Data;
    }
    
    
}

import net.arnx.wmf2svg.gdi.svg.SvgGdi;
import net.arnx.wmf2svg.gdi.wmf.WmfParser;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.Picture;
import org.w3c.dom.Document;

import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.LinkedList;
import java.util.List;
import java.util.zip.GZIPOutputStream;

public class OImage {

    public List<String> ImgPath;
    public List<String> ImgSize;
    public List<String> wmfPath;
    public List<String> wmfSize;
    private HWPFDocument doc;
    public String ProjectPath;
    public String sp;
    
    public OImage(HWPFDocument document, String path){
        this.ProjectPath = path;
        this.sp = File.separator;
        this.doc = document;
        ImgPath  =  new LinkedList<>();
        ImgSize  =  new LinkedList<>();
        wmfPath = new LinkedList<>();
        wmfSize  =  new LinkedList<>();
        this.readImg();
    }
    
    private void readImg(){
        int id = 0;
        String name = "";
        PicturesTable pTable = doc.getPicturesTable();
        List<Picture> pic = pTable.getAllPictures();
        for(Picture img : pic) {
            name = "articleImg"+id;
            String afileName=img.suggestFullFileName();
            String suffix = afileName.substring(afileName.lastIndexOf(".") + 1);
            try{
                OutputStream out=new FileOutputStream(new File(ProjectPath+sp+name+"."+suffix));
                img.writeImageContent(out);
                out.close();
            }
            catch(Exception e){
                e.getMessage();
            }
            if(suffix.equals("wmf")) convert(ProjectPath+sp+name+"."+suffix,ProjectPath+sp+name+".svg");
            if(suffix.equals("wmf")){
                ImgPath.add(name+".svg");
                wmfPath.add(name+".svg"); 
                wmfSize.add(img.getWidth()+"@"+img.getHeight());
            }
            else {
                ImgPath.add(name+"."+suffix);
                ImgSize.add(img.getWidth()+"@"+img.getHeight());
            }
            id++;
        }
    }
    
    public String replaceImg(String data){
        System.out.println("图片路径："+ImgPath);
        String res = "<p>"+data;
        for(String path:ImgPath){
        path = "\n<img class='image' src='"+path+"'>";
        res = res.replaceFirst("\u0001",path);
        }
//      String[] xxx = res.split("(\\s\\d\u002e)+");
//      int i=1;
//      if(xxx[0].charAt(0)=='.') i = 0;
//      String path = "";
//      res = "";
//      for(;i<xxx.length;i++){
//          xxx[i] = i+xxx[i];
//          Matcher m = Pattern.compile("\u0001").matcher(xxx[i]);
//          xxx[i] = xxx[i].trim();
//          while(m.find()){
//              xxx[i] = xxx[i].replaceFirst("\u0001","");
//              path = "<img class='img' src='"+ImgPath.get(index++)+"'></img>";
//              xxx[i] = xxx[i]+path;
//          }
//          res = res+xxx[i]+"\n\n\n\n\n\n\n";
//      }
        return res;
    }
    
    

    public void convert(String file,String dest){ 
        try{
            InputStream in = new FileInputStream(new File(file));
            WmfParser parser = new WmfParser();
            final SvgGdi gdi = new SvgGdi(false);
            parser.parse(in, gdi);
            Document doc = gdi.getDocument();
            OutputStream out = new FileOutputStream(dest);
            if (dest.endsWith(".svgz")) {
                out = new GZIPOutputStream(out);
            }
            output(doc, out);
        }
        catch(Exception e){
            System.out.println("edn?????"+e.getMessage());
        }
   }

   public void output(Document doc, OutputStream out) throws Exception {
        TransformerFactory factory = TransformerFactory.newInstance();
        Transformer transformer = factory.newTransformer();
        transformer.setOutputProperty(OutputKeys.METHOD, "xml");
        transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
        transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC,"-//W3C//DTD SVG 1.0//EN");
        transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd");
        transformer.transform(new DOMSource(doc), new StreamResult(out));
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        transformer.transform(new DOMSource(doc), new StreamResult(bos));
        out.flush();
        out.close();
}
    
}

import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintStream;

public class OPrint {
    
    public String Data = "";
    public String xmlData = "";
    public String htmlData = "";
    public String path;
    public String sp;
    
    public OPrint(){
        this.sp = File.separator;
    }
    
    public void printHtml(String Data,String path,String fileName){
        String str= Data.replaceAll("\\r|\\n","</p><p>"); 
        System.out.println(str);
        //System.out.println(htmlData);
        OutputStreamWriter f = null;
        String css1 = "../../../aStyle.css";
        String css2 = "../../../../aStyle.css";
        try{
            f = new OutputStreamWriter(new FileOutputStream(path+sp+fileName), "utf-8");
            f.append("<!DOCTYPE html>");
            f.append("<head>");
            f.append("<meta charset='utf8'>");
            f.append("<title>word to html</title>");
            f.append("<link rel='stylesheet' href='"+css1+"'>");
            f.append("<link rel='stylesheet' href='"+css2+"'>"); 
            f.append("</head>");
            f.append("<body>\n<div>");
            f.append(str);
            f.append("</div>\n</body>");
            f.append("</html>");
            f.close();
        }
        catch(Exception e){
            e.getMessage();
        }
    }
    
    
    public void printXml(String xml,String path){
        String[][] xmlData = dataToXML(xml);
        writeToXML(xmlData,path);
    }
    
    private String[][] dataToXML(String str){
        //System.out.println(Data);
        str = str.replaceAll("\u0005", "");
        str = str.replaceAll("\u0007", "");
        String[] data = str.split("(\\s\\d\u002e)+");
        int i=1;
        if(data[0].charAt(0)=='1') i = 0;
        String[][] subject = new String[data.length-i][4];
        int index = 0;
        for(;i<data.length;i++){
            int tab,ch,an;
            System.out.println("data:"+data[i]);
            tab = data[i].indexOf("\u0003");
            ch = data[i].indexOf("A.");
            an = data[i].indexOf(":");
            
            if(tab!=-1) subject[index][0] = data[i].substring(0,tab);
            else if(ch!=-1) subject[index][0] = data[i].substring(0,ch);
            else if(an!=-1) subject[index][0] = data[i].substring(0,an);
            else subject[index][0] = data[i];
            
            if(tab!=-1 && ch!=-1) subject[index][1] = data[i].substring(tab,ch);
            else if(tab!=-1 && an!=-1) subject[index][1] = data[i].substring(tab,an);
            else if(tab!=-1) subject[index][1] = data[i].substring(tab,data[i].length());
            
            if(ch!=-1 && an!=-1) subject[index][2] = data[i].substring(ch,an);
            else if(ch!=-1) subject[index][2] = data[i].substring(ch,data[i].length());
            
            if(an!=-1) subject[index][3] = data[i].substring(an,data[i].length());
            index++;
        }
        return subject;
    }
    
    private void writeToXML(String[][] xml,String path){
        FileOutputStream f=null;
        PrintStream ps=null;
        try{
            f = new FileOutputStream(path+sp+"XMLData.xml");
            ps = new PrintStream(f);
        }
        catch(Exception e){
            e.getMessage();
        }
        ps.println("<?xml version='1.0' encoding='UTF-8'?>");
        ps.println("<Word>");
        for(int i=0;i<xml.length;i++){
            ps.println("\t<Data>");
                ps.println("\t\t<Subject>");
                ps.println("\t\t\t"+xml[i][0].trim());
                ps.println("\t\t</Subject>");
                
                if(xml[i][1]!=null) ps.println("\t\t\t"+xml[i][1].trim());
                if(xml[i][2]!=null){
                    ps.println("\t\t<Choose>");
                    ps.println("\t\t\t"+xml[i][2].trim());
                    ps.println("\t\t</Choose>");
                }
                if(xml[i][3]!=null){
                    ps.println("\t\t<Anser>");
                    ps.println("\t\t\t"+xml[i][3].trim());
                    ps.println("\t\t</Anser>");
                }
            ps.println("\t</Data>\n\n");
        }
        ps.println("</Word>");
    }
    
}

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.*;

import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class OTable {
    
    private List<String> tableData;
    private HWPFDocument doc;
    
    public OTable(HWPFDocument document){
        tableData  =  new LinkedList<>();
        this.doc = document;
        readTableMsg();
    }
    
    private void readTableMsg(){
        Range range = doc.getRange();
        TableIterator tab = new TableIterator(range);
        while(tab.hasNext()){
            Table table = tab.next();
            readTable(table);
        }
    }
    
    public  String readTable(Table tab){
        String res = "";
        for (int i = 0; i < tab.numRows(); i++) {     
            TableRow tr = tab.getRow(i);

            for (int j = 0; j < tr.numCells(); j++) {     
                TableCell td = tr.getCell(j);

                for(int k=0;k<td.numParagraphs();k++){ 
                    //System.out.println("k"+td.numParagraphs());
                    Paragraph paragraph =td.getParagraph(k);
                    res =res+ paragraph.text()+"\u0005"+"@TD@";
                }
            }
            res = res+"@TR@";
         }
        res = res.replaceAll("(\u0013.{1,30}\u0015)+","\u0002");
        tableData.add(res);
        return res;
    }
    
    public List<String> getTableData(){  //杩斿洖鏂囨。涓墍鏈塼able鐨勬暟鎹紝涓�涓猼able涓轰竴涓泦鍚堜竴涓厓绱�
        return tableData;
    }
    
    public String replaceXmlTable(String xmlData){
        String xml = xmlData.replaceAll("@TABLE@", "\u0003@TABLE@");
        Matcher mm = Pattern.compile("@TABLE@").matcher(xml);
        int index= 0;
        while(mm.find() && index<tableData.size()){
            xml = xmlData.replaceFirst("@TABLE@", xmlTable(tableData.get(index++)));
        }
        return xml;
    }
    
    public String replaceHtmlTable(String htmlData){
        Matcher mm = Pattern.compile("@TABLE@").matcher(htmlData);
        int index= 0;
        while(mm.find() && index<tableData.size()){
            htmlData = htmlData.replaceFirst("@TABLE@", htmlTable(tableData.get(index++)));
        }
        return htmlData;
    }
    
    public String htmlTable(String table){
        String res = "";
        res = "<table border='1'>";
        String[] tr = table.split("@TR@");
        for(int j=0;j<tr.length;j++){
            String[] td = tr[j].split("@TD@");
            res = res+"<tr>";
            for(int k=0;k<td.length;k++){
                res = res+"<td>";
                res = res+td[k];
                res = res+"</td>";
            }
            res = res+"</tr>";
        }
        res = res+"</table>";
        return res;
}

    public String xmlTable(String table){
        String res = "";
        res = "\t\t<table border='1'>\n";
        String[] tr = table.split("@TR@");
        for(int j=0;j<tr.length;j++){
            String[] td = tr[j].split("@TD@");
            res = res+"\t\t\t<tr>\n";
            for(int k=0;k<td.length;k++){
                res = res+"\t\t\t\t<td>";
                res = res+td[k];
                res = res+" </td>\n";
            }
            res = res+"\t\t\t</tr>\n";
        }
        res = res+"\t\t</table>\n";
        return res;
    }
    
}

这个可以读取图片,公式一些表格但是还有一个问题,docx转doc以后docx版本的公式不能转成doc的公式3.0,会自动转图片,这边读取的时候转换的图片会有两张,总的来说docx公式转doc后读取会有重复,这样设置图片定位图片位置会错误,客户不愿意了,不让我用docx还不让我用他们的公式吗,不行还得改，最终解决方案

import net.arnx.wmf2svg.gdi.svg.SvgGdi;
import net.arnx.wmf2svg.gdi.wmf.WmfParser;
import org.apache.commons.io.FileUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.GZIPOutputStream;

/**
 * <p>Title:Word2007ToHtml </p>
 * <p>Company: </p>
 * @author shihao
 * @date 2020年3月24日下午2:21:30
 * Description:
 */
public class Word2007ToHtml {

public void wordDoc(String path,String file) throws Throwable{
        List<String> wmfPath = new ArrayList<>();
        InputStream input = new FileInputStream(path + file);
        HWPFDocument wordDocument = new HWPFDocument(input);
        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder()
                        .newDocument());
        wordToHtmlConverter.setPicturesManager(new PicturesManager() {
            @Override
            public String savePicture(byte[] content, PictureType pictureType,
                                      String suggestedName, float widthInches, float heightInches) {
                return suggestedName;
            }
        });
        wordToHtmlConverter.processDocument(wordDocument);
        List pics = wordDocument.getPicturesTable().getAllPictures();
        System.out.println(pics.size());
        if (pics != null) {
            for (int i = 0; i < pics.size(); i++) {
                Picture pic = (Picture) pics.get(i);
                try {
                    pic.writeImageContent(new FileOutputStream(path
                            + pic.suggestFullFileName()));
                    String afileName=pic.suggestFullFileName();
                    String suffix = afileName.substring(afileName.lastIndexOf(".") + 1);
                    if(suffix.equals("wmf")){
                        wmfPath.add(pic.suggestFullFileName());
                    }
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                }
            }
        }
        Document htmlDocument = wordToHtmlConverter.getDocument();
        ByteArrayOutputStream outStream = new ByteArrayOutputStream();
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(outStream);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer = tf.newTransformer();
        serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        outStream.close();
        String content = new String(outStream.toByteArray());
//        convert("C:\\Users\\MACHENIKE\\Desktop\\battt\\a3149.wmf","C:\\Users\\MACHENIKE\\Desktop\\battt\\a3149.svg");
        System.out.println(content);
        for (String wmf:wmfPath){
            convert(path+wmf,path+wmf.substring(0,wmf.lastIndexOf("."))+".svg");
            content = content.replace(wmf,wmf.substring(0,wmf.lastIndexOf("."))+".svg");
        }
        FileUtils.writeStringToFile(new File(path, "1.html"), content, "UTF-8");
    }


    public void convert(String file,String dest){
        try{
            InputStream in = new FileInputStream(new File(file));
            WmfParser parser = new WmfParser();
            final SvgGdi gdi = new SvgGdi(false);
            parser.parse(in, gdi);
            Document doc = gdi.getDocument();
            OutputStream out = new FileOutputStream(dest);
            if (dest.endsWith(".svgz")) {
                out = new GZIPOutputStream(out);
            }
            output(doc, out);
        }
        catch(Exception e){
            System.out.println("edn?????"+e.getMessage());
        }
    }
    public void output(Document doc, OutputStream out) throws Exception {
        TransformerFactory factory = TransformerFactory.newInstance();
        Transformer transformer = factory.newTransformer();
        transformer.setOutputProperty(OutputKeys.METHOD, "xml");
        transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
        transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC,"-//W3C//DTD SVG 1.0//EN");
        transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd");
        transformer.transform(new DOMSource(doc), new StreamResult(out));
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        transformer.transform(new DOMSource(doc), new StreamResult(bos));
        out.flush();
        out.close();
    }
}

这个处理公式图片定位都很ok，这个也是xhtml用上方的读取类读取就好了乱码注意编码最后我在粘一下读取html的代码

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;

/**
 * @author shihao
 * @Title: ReadHtml
 * @ProjectName Second-order-center
 * @Description:
 * @date Created in
 * @Version: $
 */
public class ReadHtml {
    public String read(String url){
        String fileContent = "";
        try {
            File f = new File(url);
            if(f.isFile()&&f.exists()){
                InputStreamReader read = new InputStreamReader(new FileInputStream(f),"utf8");
                BufferedReader reader=new BufferedReader(read);
                String line;
                while ((line = reader.readLine()) != null) {
                    fileContent += line;
                }
                read.close();
            }
        } catch (Exception e) {
            System.out.println("读取文件内容操作出错");
            e.printStackTrace();
        }
        System.out.println("读取："+fileContent);
        return fileContent;
    }
}

重要的事情说三遍注意编码,文件编码和解析的编码

poi操作word(doc)转html 包含公式图片,

poi操作word(doc)转html 包含公式图片,

相关阅读更多精彩内容

友情链接更多精彩内容