使用模块docx
from docx import Document
打开目标文档
wordDoc = Document(docPath)
获取目标文档的第n个表格
targetTable = wordDoc.tables[n]
table对象的相关字段
rows:当前表格的所有行的集合
cells:当前表格的某行的所有单元格的集合
cell.text:当前单元格的内容
完整代码
from docx import Document
def getTargetIndexTableInTargetDoc(docPath, index):
'''从指定的doc中获取第index表格的内容'''
wordDoc = Document(docPath)
tempList = []
for row in wordDoc.tables[index].rows:
currentList = []
for cell in row.cells:
currentList.append(cell.text)
tempList.append(currentList)
return tempList
if __name__ == '__main__':
# 相关参数
docxPath = '目标docx路径'
index = 0
# 第 index 个表格的内容转为list
tableDataList = getTargetIndexTableInTargetDoc(docxPath, index)