from lxml import etree
import xml.etree.ElementTree as ET
path = r"E:\飞书下载\ycpg_gtxx.XML"
dfcols = ['ZZID', 'XB', 'ZCZT', 'DadID', 'MaID', 'CSRQ', 'RCRQ', 'DNRQ',
'CSCBH', 'ZCBH', 'LY', 'PZBH', 'PXBH', 'CSTC', 'EQH', 'CSZ', 'TWHZS', 'DNTZ', 'ZRTS', 'YRTS']
df_xml = pd.DataFrame(columns=dfcols)
root = ET.parse(path)
rows = root.findall('.//{#RowsetSchema}row')
for row in rows:
value_list=[]
for i in dfcols:
value=row.get(i)
value_list.append(value)
df_xml = df_xml.append(pd.Series(value_list, index=dfcols), ignore_index=True)
print(df_xml)
速度会比较慢,6000行大概需要10秒左右
输入文件
输出结果