package per.qy.dexter.fileoperate;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.UUID;
import javax.imageio.ImageIO;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.text.PDFTextStripper;
import org.junit.Test;
public class PdfTest {
@Test
public void testPdf() {
String path = "D:\\temp\\temp\\test.pdf";
File file = new File(path);
InputStream is = null;
PDDocument document = null;
try {
if (path.endsWith(".pdf")) {
document = PDDocument.load(file);
int pageSize = document.getNumberOfPages();
// 一页一页读取
for (int i = 0; i < pageSize; i++) {
// 文本内容
PDFTextStripper stripper = new PDFTextStripper();
// 设置按顺序输出
stripper.setSortByPosition(true);
stripper.setStartPage(i + 1);
stripper.setEndPage(i + 1);
String text = stripper.getText(document);
System.out.println(text.trim());
System.out.println("-=-=-=-=-=-=-=-=-=-=-=-=-");
// 图片内容
PDPage page = document.getPage(i);
PDResources resources = page.getResources();
Iterable<COSName> cosNames = resources.getXObjectNames();
if (cosNames != null) {
Iterator<COSName> cosNamesIter = cosNames.iterator();
while (cosNamesIter.hasNext()) {
COSName cosName = cosNamesIter.next();
if (resources.isImageXObject(cosName)) {
PDImageXObject Ipdmage = (PDImageXObject) resources.getXObject(cosName);
BufferedImage image = Ipdmage.getImage();
FileOutputStream out = new FileOutputStream("D:\\temp\\temp\\" + UUID.randomUUID() + ".png");
try {
ImageIO.write(image, "png", out);
} catch (IOException e) {
} finally {
try {
out.close();
} catch (IOException e) {
}
}
}
}
}
}
}
} catch (InvalidPasswordException e) {
} catch (IOException e) {
} finally {
try {
if (document != null) {
document.close();
}
if (is != null) {
is.close();
}
} catch (IOException e) {
}
}
}
}
java-pdfbox2.0.14读取pdf文本和图片
©著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...