第一步:导入jar包
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.12</version>
</dependency>
第二步:读取文件内容
import java.io.File;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
public class PDFUtil {
public static String getContent(String filePath) {
String content = "";
try {
File pdfFile = new File(filePath);
PDDocument document = PDDocument.load(pdfFile);
// 获取页码
int pages = document.getNumberOfPages();
// 读文本内容
PDFTextStripper stripper = new PDFTextStripper();
// 设置按顺序输出
stripper.setSortByPosition(true);
stripper.setStartPage(1);
stripper.setEndPage(pages);
content = stripper.getText(document);
} catch (Exception e) {
content = "读取PDF文件失败!!!";
}
return content;
}
}