先上一段代码
import docx
from docx import Document
from docx.shared import RGBColor
from docx.shared import Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
import re
all_data = open('F:\\vscode\\.vscode\\python\\python课\\文件操作\\结果汇总\\newans.txt',encoding='utf-8')
document = Document()
# add the title
document.add_heading('四六级单词', 0)
# add a paragraph
document.add_paragraph('test paragraphs')
'''
from the net
'''
# set the font
document.styles['Normal'].font.name = u'黑体'
p = document.add_paragraph()
#set the align , this is left
p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
# add the text , into the paragraphs
run = p.add_run(u'START')
#set color
run.font.color.rgb = RGBColor(54,95,145)
#set font size
run.font.size = Pt(26)
# add a page break
document.add_page_break()
all_data = all_data.readlines()
for i in all_data:
data = i.replace('>>','')
data = data.split('|')
# print('单词'+data[0])
# print('词频'+data[1])
# print('词义'+data[2])
# print('例句'+data[3])
skip = re.findall('"(.*?)"',data[2])
if len(skip) < 1:
print("skip")
continue
word = data[0]
# new a paragraph , set the align is LEFT
p = document.add_paragraph()
p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
'''
write in words
'''
words = p.add_run('单词:')
words.bold = True
words.font.size = Pt(17)
words.font.color.rgb = RGBColor(54,95,145)
word_content = p.add_run(word+'\n')
# ans.writelines('单词:')
# ans.writelines(word+"\n")
'''
write the frequency
'''
frequency = data[1]
p_frequency = p.add_run('词频:')
p_frequency.font.size = Pt(17)
p_frequency.font.color.rgb = RGBColor(54,100,100)
p.add_run(frequency+'\n').bold
p_frequency.bold = True
# ans.writelines('词频:')
# ans.writelines(frequency+"\n")
'''
write the means
'''
mean = re.findall('"(.*?)"',data[2])
p_all_mean = p.add_run('词义:\n')
p_all_mean.bold = True
p_all_mean.font.size = Pt(17)
p_all_mean.font.color.rgb = RGBColor(15,15,145)
# ans.writelines('词义:\n')
mean_count = 0
sentence_count = 0
if len(mean) >0:
for i in mean:
if len(i) > 20:
mean_count+=1
strr = str(mean_count)+'. '
p.add_run(strr+i+'\n')
print(i)
else:
print(word+" has no mean")
sentence = re.findall("'(.*?)'",data[3])
p_sentence = p.add_run('例句:\n')
p_sentence.bold = True
p_sentence.font.color.rgb = RGBColor(54,95,45)
p_sentence.font.size = Pt(17)
if len(sentence) > 0:
for i in sentence:
if len(i)>20:
sentence_count+=1
strr = str(sentence_count)+'. '
p.add_run(strr+i+'\n')
print(i)
else:
print(word+' has no sentence')
document.add_page_break()
# save as
document.save('result.docx')
这段代码是我把一个分词结果写入docx文件所使用的,总结一下docx怎么用
概述
个人理解,为自己而作
docx这个模块还是很随主流操作方式的,我认为在此模块中以段和字为单位,当然了不包括图片和表格,仅仅说文字。图片和表格操作大同小异。
除了title和head,其他的文字一般都是写入一个paragraph中,然后再在这个段落中插入文字,add_run(‘text’) , 如果我们给这次插入存入一个变量,如
p = Document.add_paragraph()
content = p.add_run("words")
我们就可以对content操作,然后改变'words'的显示形式
如果我们只需要设置一个属性,比如粗体显示
content.bold = True
这样就会粗体显示了
其他操作也是大同小异,当然了,基本操作,具体可以看文档