python 生成从指定目录读取doc文档,
按每个doc作为一个章节生成epub文件
每个文档名称为目录名称
依赖(pip install -r requirments.txt):
docx
ebooklib
natsort
实现如下:
import os
import uuid
import docx
from ebooklib import epub
# Define the path to the directory containing the DOC files
from natsort import natsorted
doc_dir = "D:\\Download\\文稿集"
# Create a new EPUB book
book = epub.EpubBook()
# Set the book's metadata
book.set_identifier(str(uuid.uuid4()))
book.set_title(os.path.basename(doc_dir))
book.set_language("zh")
def docx2html(doc):
html = []
for p in doc.paragraphs:
html.append(f"<p>{p.text}</p>")
return "\n".join(html)
toc = []
# Iterate over the DOC files in the directory
files = os.listdir(doc_dir)
sorted_files = natsorted(files)
for filename in sorted_files:
if filename.endswith(".doc") or filename.endswith(".docx"):
# Open the DOC file using python-docx
doc = docx.Document(os.path.join(doc_dir, filename))
chapterName= filename[0:filename.rindex(".")]
# Create a new EPUB chapter for the DOC file
chapter = epub.EpubHtml(title=chapterName, file_name="chapter_{}.xhtml".format(filename))
chapter.content = docx2html(doc)
# Add the chapter to the book
book.add_item(chapter)
book.spine.append(chapter)
# Add an entry to the Table of Contents
toc.append(epub.Link("chapter_{}.xhtml".format(filename), chapterName, filename))
book.toc = tuple(toc)
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
epub_path = f'{doc_dir}\\{book.title}.epub'
epub.write_epub(epub_path, book, {})