假如 xml文件都藏在这些文件夹里面
文件夹里面还有子文件夹,多重子文件夹套娃
要注意里面的xml文件对应着同名的jpg文件!
处理后的结果,所有xml按照标签分好类丢进对应的文件夹里面
按照 annotation/imgs/labels 创建了文件夹,对应的文件都在里面
import os
import xml.etree.ElementTree as ET
from shutil import copyfile
import pathlib as pathlib
from collections import defaultdict
def parse_obj(filename):
tree = ET.parse(filename)
objects = []
for obj in tree.findall('object'): #获取所有object,定位到每一个object里面
obj_struct = {}
obj_struct['name'] = obj.find('name').text #获取被name夹在中间的label名称,具体打开xml文件看
objects.append(obj_struct)
return objects
def getallfiles(path):
allfile = []
file_xml = []
for dirpath, dirnames, filenames in os.walk(path):
for dir in dirnames:
allfile.append(os.path.join(dirpath, dir))
for name in filenames:
allfile.append(os.path.join(dirpath, name))
for file in allfile:
if file.endswith('.xm',-4,-1):
file_xml.append(file)
return file_xml
if __name__ == '__main__':
# filenamess = os.listdir('E:\code_yyq\get_data')
filenames = getallfiles(r'D:\Yuqian_Yang\project_yolov4\yolo\data\smokephone\imgs')
aim_root = 'D:\Yuqian_Yang\project_yolov4\yolo\data\smoke' # 转移的文件夹路径;注意下划线
classnames = []
recs={}
copy_key = defaultdict(list)
for i, name in enumerate(filenames):
recs[name] = parse_obj(name)
if not os.path.exists(aim_root):
os.makedirs(aim_root)
for name in filenames:
num_key = 0
for object in recs[name]:
if object['name'] not in classnames:
copy_key['%s' %(object['name'])].append(name)
if not os.path.exists(aim_root + "/%s" % object['name']):
os.makedirs(aim_root + "/%s" % object['name'])
os.makedirs(aim_root + "/%s" % object['name'] + "/anotation")
os.makedirs(aim_root + "/%s" % object['name'] + "/imgs")
os.makedirs(aim_root + "/%s" % object['name'] + "/labels")
for name in copy_key:
for i in range(0, len(copy_key['%s'%name])):
file_jpg = "%s" % copy_key['%s'%name][i].replace("xml", "jpg").replace("\\", "/").replace("//","/")
file_JPG = "%s" % copy_key['%s' % name][i].replace("xml", "JPG").replace("\\", "/").replace("//", "/")
# print(file_jpg)
file_xml = "%s" % copy_key['%s' % name][i]
f_jpg = os.path.basename(file_jpg)
f_JPG = os.path.basename(file_JPG)
f_xml = os.path.basename(file_xml)
if pathlib.Path(file_jpg).exists():
try:
copyfile(file_jpg, aim_root + "/%s" % name + "/imgs" + "/%s" %f_jpg)
copyfile(file_xml, aim_root + "/%s" % name + "/anotation" + "/%s" %f_xml)
except:
print("warning:", file_jpg)
elif pathlib.Path(file_JPG).exists():
try:
copyfile(file_jpg, aim_root + "/%s" % name + "/imgs" + "/%s" %f_JPG)
copyfile(file_xml, aim_root + "/%s" % name + "/anotation" + "/%s" %f_xml)
except:
print("warning:", file_jpg)
else:
print("Error: no such jpg file:", file_jpg)