接到一个需求用word文档做一个批量导入,我当时用了两大种方法,
第一:jacob技术,确实挺好用不管是docx和doc互相转换还是转html都可以,docx的公式也能解决,但是有一个致命的问题只能在windows上使用,liunx的直接可以略过了,应为只能在windows上使用的缘故,只大致写了一些,需要jacob包和一个dll文件,
dll文件放在system32这个目录下面,这也是为什么只能在windows上使用的原因,大家可以取官网下载,也可以留下邮箱发你
import com.jacob.activeX.ActiveXComponent;
import com.jacob.com.Dispatch;
import com.jacob.com.Variant;
/**
* @author shihao
* @Title: JacobUtil
* @ProjectName Second-order-center
* @Description:
* @date Created in
* @Version: $
*/
public class JacobUtil {
public static final int WORD_HTML = 8;
public static final int WORD_TXT = 7;
public static final int EXCEL_HTML = 44;
/**
* WORD转HTML
*
* @param docfile
* WORD文件全路径
* @param htmlfile
* 转换后HTML存放路径
*/
public void wordToHtml(String docfile, String htmlfile) {
ActiveXComponent app = new ActiveXComponent("Word.Application"); // 启动word
try {
// 设置word不可见
app.setProperty("Visible", new Variant(false));
//获得documents对象
Dispatch docs = (Dispatch) app.getProperty("Documents")
.toDispatch();
//打开文件
Dispatch doc = Dispatch.invoke(
docs,
"Open",
Dispatch.Method,
new Object[] { docfile, new Variant(false),
new Variant(true) }, new int[1]).toDispatch();
//保存新的文件
Dispatch.invoke(doc, "SaveAs", Dispatch.Method, new Object[] {
htmlfile, new Variant(WORD_HTML) }, new int[1]);
Variant f = new Variant(false);
Dispatch.call(doc, "Close", f);
} catch (Exception e) {
e.printStackTrace();
} finally {
app.invoke("Quit", new Variant[] {});
}
}
}
第二:poi操作 poi读写word不同版本是不一样的,我当时是docx和doc
docx版本 使用XWPFDocument
import com.inxedu.wxos.util.UploadPropertyUtil;
import net.arnx.wmf2svg.gdi.svg.SvgGdi;
import net.arnx.wmf2svg.gdi.svg.SvgGdiException;
import net.arnx.wmf2svg.gdi.wmf.WmfParseException;
import net.arnx.wmf2svg.gdi.wmf.WmfParser;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.FileURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.*;
import org.jsoup.nodes.Document;
import org.openxmlformats.schemas.officeDocument.x2006.math.CTOMath;
import java.io.*;
import java.util.List;
import java.util.UUID;
import java.util.zip.GZIPOutputStream;
/**
* @author shihao
* @Title: Word
* @ProjectName Second-order-center
* @Description:
* @date Created in
* @Version: $
*/
public class Word {
// public static UploadPropertyUtil propertyUtil = UploadPropertyUtil.getInstance("application-project");
public String html(String paths) throws IOException, WmfParseException, SvgGdiException {
// String[] sourceArray = paths.split("/");
// StringBuilder p = new StringBuilder();
// for (int i=0; i<sourceArray.length-1;i++){
// p.append(sourceArray[i]);
// p.append("/");
// }
// String path = propertyUtil.getProperty("project.file.root")+String.valueOf(p);
// String fileName = sourceArray[sourceArray.length-1];
// final String filepath = path+fileName;
// String htmlName = UUID.randomUUID().toString().replaceAll("-", "")+".html";
// final String file = filepath;
File f = new File(paths);
if (!f.exists()) {
System.out.println("Sorry File does not Exists!");
return "Sorry File does not Exists!";
} else {
if (f.getName().endsWith(".docx") || f.getName().endsWith(".DOCX")) {
// 1) 加载word文档生成 XWPFDocument对象
InputStream in = new FileInputStream(f);
XWPFDocument document = new XWPFDocument(in);
// 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)
File imageFolderFile = new File(path);
XHTMLOptions options = XHTMLOptions.create().URIResolver(new FileURIResolver(imageFolderFile));
options.setExtractor(new FileImageExtractor(imageFolderFile));
options.setIgnoreStylesIfUnused(false);
options.setFragment(true);
// 3) 将 XWPFDocument转换成XHTML
OutputStream out = new FileOutputStream(new File(path + htmlName));
XHTMLConverter.getInstance().convert(document, out, options);
return path+htmlName;
} else {
System.out.println("Enter only MS Office 2007+ files");
return "Enter only MS Office 2007+ files";
}
}
}
}
这个方法虽然可以将文字和图片转html,应该是xhtml下面说读取xhtml,我也能解析出来,但是客户那边又说了,我们要公式也要读取,找了很多资料docx的没找到好的方法,只好又换方法,下面先贴出读取xhtml的方法, 如果乱码注意编码
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Entities;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
/**
* @author shihao
* @Title: XhtmltoHtml
* @ProjectName Second-order-center
* @Description:
* @date Created in
* @Version: $
*/
public class XhtmltoHtml {
public String html2xhtml(String html) {
Document doc = Jsoup.parse(html);
doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml).escapeMode(Entities.EscapeMode.xhtml);
return doc.html();
}
public String html(String path) throws IOException {
File file = new File(path);
FileInputStream input = new FileInputStream(file);
int size = input.available();
byte[] buff = new byte[size];
input.read(buff);
input.close();
String html = new String(buff, "utf-8");
System.out.println("============html===================");
System.out.println(html);
XhtmltoHtml xhtmltoHtml = new XhtmltoHtml();
String xhtml = xhtmltoHtml.html2xhtml(html);
System.out.println("============xhtml===================");
System.out.println(xhtml);
return xhtml;
}
}
没办法客户就是上帝,换吧,这次是读取doc文档的,话不多说直接上码
import com.inxedu.wxos.util.UploadPropertyUtil;
public class MainTest {
public static UploadPropertyUtil propertyUtil = UploadPropertyUtil.getInstance("application-project");
public static void main(String[] args) throws Exception{
String FilePath = "C:\\Users\\MACHENIKE\\Desktop\\Batch.doc";
String path = "C:\\Users\\MACHENIKE\\Desktop\\batt";
ODocument odoc = new ODocument(FilePath);
writeHtml(odoc,path);
// writeXml(odoc,path);
System.out.println("OK!");
}
public static void writeHtml(ODocument doc,String path){
OTable otable = new OTable(doc.getDocument());
String htmlData = otable.replaceHtmlTable(doc.readDoc());
OImage oimage = new OImage(doc.getDocument(),path);
htmlData = oimage.replaceImg(htmlData);
OPrint oprint = new OPrint();
oprint.printHtml(htmlData, path,"batch.html");
}
public static void writeXml(ODocument doc,String path){
OTable otable = new OTable(doc.getDocument());
String xmlData = otable.replaceXmlTable(doc.readDoc());
OImage oimage = new OImage(doc.getDocument(),path);
xmlData = oimage.replaceImg(xmlData);
OPrint oprint = new OPrint();
oprint.printXml(xmlData, path);
}
}
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Range;
import java.io.File;
import java.io.FileInputStream;
import java.util.LinkedList;
import java.util.List;
public class ODocument {
private HWPFDocument doc;
private String path;
private List<Integer> fontSize;
private List<Integer> color;
public ODocument(String path){
this.doc = null;
fontSize = new LinkedList<>();
color = new LinkedList<>();
this.path = path;
this.loadDoc();
}
private void loadDoc(){
try{
FileInputStream in=new FileInputStream(new File(this.path));
this.doc = new HWPFDocument(in);
in.close();
}
catch(Exception e){
System.out.println(e.getMessage());
}
}
public HWPFDocument getDocument(){
return this.doc;
}
public String readDoc(){
String Data = "";
int length = doc.characterLength();
String str="";
char ch;
int len;
for (int i = 0; i < length - 1; i++) {
Range r = new Range(i,i+1,doc);
CharacterRun cr = r.getCharacterRun(0);
str = cr.text();
Data = Data+str;
len = cr.text().length();
color.add(cr.getColor());
fontSize.add(cr.getFontSize());
while(len-->0) ch = str.charAt(len);
}
System.out.println(Data);
return DataPretreatment(Data);
}
private String DataPretreatment(String Data){
Data = Data.replaceAll("(\u0013.{1,30}\u0015)+","\u0002");
Data = Data.replaceAll("\\b.+\u0007","@TABLE@");
return Data;
}
}
import net.arnx.wmf2svg.gdi.svg.SvgGdi;
import net.arnx.wmf2svg.gdi.wmf.WmfParser;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.usermodel.Picture;
import org.w3c.dom.Document;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.LinkedList;
import java.util.List;
import java.util.zip.GZIPOutputStream;
public class OImage {
public List<String> ImgPath;
public List<String> ImgSize;
public List<String> wmfPath;
public List<String> wmfSize;
private HWPFDocument doc;
public String ProjectPath;
public String sp;
public OImage(HWPFDocument document, String path){
this.ProjectPath = path;
this.sp = File.separator;
this.doc = document;
ImgPath = new LinkedList<>();
ImgSize = new LinkedList<>();
wmfPath = new LinkedList<>();
wmfSize = new LinkedList<>();
this.readImg();
}
private void readImg(){
int id = 0;
String name = "";
PicturesTable pTable = doc.getPicturesTable();
List<Picture> pic = pTable.getAllPictures();
for(Picture img : pic) {
name = "articleImg"+id;
String afileName=img.suggestFullFileName();
String suffix = afileName.substring(afileName.lastIndexOf(".") + 1);
try{
OutputStream out=new FileOutputStream(new File(ProjectPath+sp+name+"."+suffix));
img.writeImageContent(out);
out.close();
}
catch(Exception e){
e.getMessage();
}
if(suffix.equals("wmf")) convert(ProjectPath+sp+name+"."+suffix,ProjectPath+sp+name+".svg");
if(suffix.equals("wmf")){
ImgPath.add(name+".svg");
wmfPath.add(name+".svg");
wmfSize.add(img.getWidth()+"@"+img.getHeight());
}
else {
ImgPath.add(name+"."+suffix);
ImgSize.add(img.getWidth()+"@"+img.getHeight());
}
id++;
}
}
public String replaceImg(String data){
System.out.println("图片路径:"+ImgPath);
String res = "<p>"+data;
for(String path:ImgPath){
path = "\n<img class='image' src='"+path+"'>";
res = res.replaceFirst("\u0001",path);
}
// String[] xxx = res.split("(\\s\\d\u002e)+");
// int i=1;
// if(xxx[0].charAt(0)=='.') i = 0;
// String path = "";
// res = "";
// for(;i<xxx.length;i++){
// xxx[i] = i+xxx[i];
// Matcher m = Pattern.compile("\u0001").matcher(xxx[i]);
// xxx[i] = xxx[i].trim();
// while(m.find()){
// xxx[i] = xxx[i].replaceFirst("\u0001","");
// path = "<img class='img' src='"+ImgPath.get(index++)+"'></img>";
// xxx[i] = xxx[i]+path;
// }
// res = res+xxx[i]+"\n\n\n\n\n\n\n";
// }
return res;
}
public void convert(String file,String dest){
try{
InputStream in = new FileInputStream(new File(file));
WmfParser parser = new WmfParser();
final SvgGdi gdi = new SvgGdi(false);
parser.parse(in, gdi);
Document doc = gdi.getDocument();
OutputStream out = new FileOutputStream(dest);
if (dest.endsWith(".svgz")) {
out = new GZIPOutputStream(out);
}
output(doc, out);
}
catch(Exception e){
System.out.println("edn?????"+e.getMessage());
}
}
public void output(Document doc, OutputStream out) throws Exception {
TransformerFactory factory = TransformerFactory.newInstance();
Transformer transformer = factory.newTransformer();
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC,"-//W3C//DTD SVG 1.0//EN");
transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd");
transformer.transform(new DOMSource(doc), new StreamResult(out));
ByteArrayOutputStream bos = new ByteArrayOutputStream();
transformer.transform(new DOMSource(doc), new StreamResult(bos));
out.flush();
out.close();
}
}
import java.io.File;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
public class OPrint {
public String Data = "";
public String xmlData = "";
public String htmlData = "";
public String path;
public String sp;
public OPrint(){
this.sp = File.separator;
}
public void printHtml(String Data,String path,String fileName){
String str= Data.replaceAll("\\r|\\n","</p><p>");
System.out.println(str);
//System.out.println(htmlData);
OutputStreamWriter f = null;
String css1 = "../../../aStyle.css";
String css2 = "../../../../aStyle.css";
try{
f = new OutputStreamWriter(new FileOutputStream(path+sp+fileName), "utf-8");
f.append("<!DOCTYPE html>");
f.append("<head>");
f.append("<meta charset='utf8'>");
f.append("<title>word to html</title>");
f.append("<link rel='stylesheet' href='"+css1+"'>");
f.append("<link rel='stylesheet' href='"+css2+"'>");
f.append("</head>");
f.append("<body>\n<div>");
f.append(str);
f.append("</div>\n</body>");
f.append("</html>");
f.close();
}
catch(Exception e){
e.getMessage();
}
}
public void printXml(String xml,String path){
String[][] xmlData = dataToXML(xml);
writeToXML(xmlData,path);
}
private String[][] dataToXML(String str){
//System.out.println(Data);
str = str.replaceAll("\u0005", "");
str = str.replaceAll("\u0007", "");
String[] data = str.split("(\\s\\d\u002e)+");
int i=1;
if(data[0].charAt(0)=='1') i = 0;
String[][] subject = new String[data.length-i][4];
int index = 0;
for(;i<data.length;i++){
int tab,ch,an;
System.out.println("data:"+data[i]);
tab = data[i].indexOf("\u0003");
ch = data[i].indexOf("A.");
an = data[i].indexOf(":");
if(tab!=-1) subject[index][0] = data[i].substring(0,tab);
else if(ch!=-1) subject[index][0] = data[i].substring(0,ch);
else if(an!=-1) subject[index][0] = data[i].substring(0,an);
else subject[index][0] = data[i];
if(tab!=-1 && ch!=-1) subject[index][1] = data[i].substring(tab,ch);
else if(tab!=-1 && an!=-1) subject[index][1] = data[i].substring(tab,an);
else if(tab!=-1) subject[index][1] = data[i].substring(tab,data[i].length());
if(ch!=-1 && an!=-1) subject[index][2] = data[i].substring(ch,an);
else if(ch!=-1) subject[index][2] = data[i].substring(ch,data[i].length());
if(an!=-1) subject[index][3] = data[i].substring(an,data[i].length());
index++;
}
return subject;
}
private void writeToXML(String[][] xml,String path){
FileOutputStream f=null;
PrintStream ps=null;
try{
f = new FileOutputStream(path+sp+"XMLData.xml");
ps = new PrintStream(f);
}
catch(Exception e){
e.getMessage();
}
ps.println("<?xml version='1.0' encoding='UTF-8'?>");
ps.println("<Word>");
for(int i=0;i<xml.length;i++){
ps.println("\t<Data>");
ps.println("\t\t<Subject>");
ps.println("\t\t\t"+xml[i][0].trim());
ps.println("\t\t</Subject>");
if(xml[i][1]!=null) ps.println("\t\t\t"+xml[i][1].trim());
if(xml[i][2]!=null){
ps.println("\t\t<Choose>");
ps.println("\t\t\t"+xml[i][2].trim());
ps.println("\t\t</Choose>");
}
if(xml[i][3]!=null){
ps.println("\t\t<Anser>");
ps.println("\t\t\t"+xml[i][3].trim());
ps.println("\t\t</Anser>");
}
ps.println("\t</Data>\n\n");
}
ps.println("</Word>");
}
}
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.*;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class OTable {
private List<String> tableData;
private HWPFDocument doc;
public OTable(HWPFDocument document){
tableData = new LinkedList<>();
this.doc = document;
readTableMsg();
}
private void readTableMsg(){
Range range = doc.getRange();
TableIterator tab = new TableIterator(range);
while(tab.hasNext()){
Table table = tab.next();
readTable(table);
}
}
public String readTable(Table tab){
String res = "";
for (int i = 0; i < tab.numRows(); i++) {
TableRow tr = tab.getRow(i);
for (int j = 0; j < tr.numCells(); j++) {
TableCell td = tr.getCell(j);
for(int k=0;k<td.numParagraphs();k++){
//System.out.println("k"+td.numParagraphs());
Paragraph paragraph =td.getParagraph(k);
res =res+ paragraph.text()+"\u0005"+"@TD@";
}
}
res = res+"@TR@";
}
res = res.replaceAll("(\u0013.{1,30}\u0015)+","\u0002");
tableData.add(res);
return res;
}
public List<String> getTableData(){ //杩斿洖鏂囨。涓墍鏈塼able鐨勬暟鎹紝涓�涓猼able涓轰竴涓泦鍚堜竴涓厓绱�
return tableData;
}
public String replaceXmlTable(String xmlData){
String xml = xmlData.replaceAll("@TABLE@", "\u0003@TABLE@");
Matcher mm = Pattern.compile("@TABLE@").matcher(xml);
int index= 0;
while(mm.find() && index<tableData.size()){
xml = xmlData.replaceFirst("@TABLE@", xmlTable(tableData.get(index++)));
}
return xml;
}
public String replaceHtmlTable(String htmlData){
Matcher mm = Pattern.compile("@TABLE@").matcher(htmlData);
int index= 0;
while(mm.find() && index<tableData.size()){
htmlData = htmlData.replaceFirst("@TABLE@", htmlTable(tableData.get(index++)));
}
return htmlData;
}
public String htmlTable(String table){
String res = "";
res = "<table border='1'>";
String[] tr = table.split("@TR@");
for(int j=0;j<tr.length;j++){
String[] td = tr[j].split("@TD@");
res = res+"<tr>";
for(int k=0;k<td.length;k++){
res = res+"<td>";
res = res+td[k];
res = res+"</td>";
}
res = res+"</tr>";
}
res = res+"</table>";
return res;
}
public String xmlTable(String table){
String res = "";
res = "\t\t<table border='1'>\n";
String[] tr = table.split("@TR@");
for(int j=0;j<tr.length;j++){
String[] td = tr[j].split("@TD@");
res = res+"\t\t\t<tr>\n";
for(int k=0;k<td.length;k++){
res = res+"\t\t\t\t<td>";
res = res+td[k];
res = res+" </td>\n";
}
res = res+"\t\t\t</tr>\n";
}
res = res+"\t\t</table>\n";
return res;
}
}
这个可以读取图片,公式一些表格但是还有一个问题,docx转doc以后docx版本的公式不能转成doc的公式3.0,会自动转图片,这边读取的时候转换的图片会有两张,总的来说docx公式转doc后读取会有重复,这样设置图片定位图片位置会错误,客户不愿意了,不让我用docx还不让我用他们的公式吗,不行还得改,最终解决方案
import net.arnx.wmf2svg.gdi.svg.SvgGdi;
import net.arnx.wmf2svg.gdi.wmf.WmfParser;
import org.apache.commons.io.FileUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.GZIPOutputStream;
/**
* <p>Title:Word2007ToHtml </p>
* <p>Company: </p>
* @author shihao
* @date 2020年3月24日下午2:21:30
* Description:
*/
public class Word2007ToHtml {
public void wordDoc(String path,String file) throws Throwable{
List<String> wmfPath = new ArrayList<>();
InputStream input = new FileInputStream(path + file);
HWPFDocument wordDocument = new HWPFDocument(input);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument());
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
@Override
public String savePicture(byte[] content, PictureType pictureType,
String suggestedName, float widthInches, float heightInches) {
return suggestedName;
}
});
wordToHtmlConverter.processDocument(wordDocument);
List pics = wordDocument.getPicturesTable().getAllPictures();
System.out.println(pics.size());
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get(i);
try {
pic.writeImageContent(new FileOutputStream(path
+ pic.suggestFullFileName()));
String afileName=pic.suggestFullFileName();
String suffix = afileName.substring(afileName.lastIndexOf(".") + 1);
if(suffix.equals("wmf")){
wmfPath.add(pic.suggestFullFileName());
}
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
String content = new String(outStream.toByteArray());
// convert("C:\\Users\\MACHENIKE\\Desktop\\battt\\a3149.wmf","C:\\Users\\MACHENIKE\\Desktop\\battt\\a3149.svg");
System.out.println(content);
for (String wmf:wmfPath){
convert(path+wmf,path+wmf.substring(0,wmf.lastIndexOf("."))+".svg");
content = content.replace(wmf,wmf.substring(0,wmf.lastIndexOf("."))+".svg");
}
FileUtils.writeStringToFile(new File(path, "1.html"), content, "UTF-8");
}
public void convert(String file,String dest){
try{
InputStream in = new FileInputStream(new File(file));
WmfParser parser = new WmfParser();
final SvgGdi gdi = new SvgGdi(false);
parser.parse(in, gdi);
Document doc = gdi.getDocument();
OutputStream out = new FileOutputStream(dest);
if (dest.endsWith(".svgz")) {
out = new GZIPOutputStream(out);
}
output(doc, out);
}
catch(Exception e){
System.out.println("edn?????"+e.getMessage());
}
}
public void output(Document doc, OutputStream out) throws Exception {
TransformerFactory factory = TransformerFactory.newInstance();
Transformer transformer = factory.newTransformer();
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC,"-//W3C//DTD SVG 1.0//EN");
transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd");
transformer.transform(new DOMSource(doc), new StreamResult(out));
ByteArrayOutputStream bos = new ByteArrayOutputStream();
transformer.transform(new DOMSource(doc), new StreamResult(bos));
out.flush();
out.close();
}
}
这个处理公式图片定位都很ok,这个也是xhtml用上方的读取类读取就好了乱码注意编码 最后我在粘一下读取html的代码
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
/**
* @author shihao
* @Title: ReadHtml
* @ProjectName Second-order-center
* @Description:
* @date Created in
* @Version: $
*/
public class ReadHtml {
public String read(String url){
String fileContent = "";
try {
File f = new File(url);
if(f.isFile()&&f.exists()){
InputStreamReader read = new InputStreamReader(new FileInputStream(f),"utf8");
BufferedReader reader=new BufferedReader(read);
String line;
while ((line = reader.readLine()) != null) {
fileContent += line;
}
read.close();
}
} catch (Exception e) {
System.out.println("读取文件内容操作出错");
e.printStackTrace();
}
System.out.println("读取:"+fileContent);
return fileContent;
}
}
重要的事情说三遍注意编码,文件编码和解析的编码