如何将下载下来的文件名乱码的pdf按论文标题命名,在windows系统下,利用Python3.6,使用pdfminer库来实现。
按照https://www.jianshu.com/p/742a28decc58中的程序来实现,按照原作者的程序,能实现步骤2(读pdf获取标题),但执行步骤3(更改文件名)后,文件名未被更改,也没有报错,请问是什么原因?
程序如下:
#encoding:utf-8
from urllib.request import urlopen
from pdfminer.pdfinterp import PDFResourceManager,process_pdf
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from io import StringIO #StringIO就是在内存中读写str
from io import open
import os
from os import walk
#步骤2(读pdf获取标题)
def readPDF(pdffile):
rsrcmgr=PDFResourceManager()
retstr=StringIO()
laparams=LAParams()
device=TextConverter(rsrcmgr,retstr,laparams=laparams)
process_pdf(rsrcmgr,device,pdffile)
device.close()
content=retstr.getvalue()
retstr.close()
strs = str(content).split("\n")
#选择第 8,9,10行作为标题,但这种方法不一定适合别的pdf文档
title = strs[8]+strs[9]+strs[10]
return title
pdffile=open('D:\\pdfjiexi\\3.pdf',"rb")
title =readPDF(pdffile)
print(title)
pdffile.close()
#步骤3(更改文件名)
def rename():
walk = os.walk('/pdfjiexi/')
i = 0;
for root, dirs, files in walk:
#获取文件的全路径
for name in files:
pdffile=open(os.path.join(root, name),"rb")
title = readPDF(pdffile)
print(title)
os.rename(os.path.join(root, name), os.path.join(root, title+".pdf"))
i += 1