数据集自动划分(代码)

在你自己项目里面创建一个新的py文件,命名你随意。

import os

import shutil

import random

random.seed(0)

def split_data(file_path,xml_path, new_file_path, train_rate, val_rate, test_rate):

    each_class_image = []

    each_class_label = []

    for image in os.listdir(file_path):

        each_class_image.append(image)

    for label in os.listdir(xml_path):

        each_class_label.append(label)

    data=list(zip(each_class_image,each_class_label))

    total = len(each_class_image)

    random.shuffle(data)

    each_class_image,each_class_label=zip(*data)

    train_images = each_class_image[0:int(train_rate * total)]

    val_images = each_class_image[int(train_rate * total):int((train_rate + val_rate) * total)]

    test_images = each_class_image[int((train_rate + val_rate) * total):]

    train_labels = each_class_label[0:int(train_rate * total)]

    val_labels = each_class_label[int(train_rate * total):int((train_rate + val_rate) * total)]

    test_labels = each_class_label[int((train_rate + val_rate) * total):]

    for image in train_images:

        print(image)

        old_path = file_path + '/' + image

        new_path1 = new_file_path + '/' + 'train' + '/' + 'images'

        if not os.path.exists(new_path1):

            os.makedirs(new_path1)

        new_path = new_path1 + '/' + image

        shutil.copy(old_path, new_path)

    for label in train_labels:

        print(label)

        old_path = xml_path + '/' + label

        new_path1 = new_file_path + '/' + 'train' + '/' + 'labels'

        if not os.path.exists(new_path1):

            os.makedirs(new_path1)

        new_path = new_path1 + '/' + label

        shutil.copy(old_path, new_path)

    for image in val_images:

        old_path = file_path + '/' + image

        new_path1 = new_file_path + '/' + 'val' + '/' + 'images'

        if not os.path.exists(new_path1):

            os.makedirs(new_path1)

        new_path = new_path1 + '/' + image

        shutil.copy(old_path, new_path)

    for label in val_labels:

        old_path = xml_path + '/' + label

        new_path1 = new_file_path + '/' + 'val' + '/' + 'labels'

        if not os.path.exists(new_path1):

            os.makedirs(new_path1)

        new_path = new_path1 + '/' + label

        shutil.copy(old_path, new_path)

    for image in test_images:

        old_path = file_path + '/' + image

        new_path1 = new_file_path + '/' + 'test' + '/' + 'images'

        if not os.path.exists(new_path1):

            os.makedirs(new_path1)

        new_path = new_path1 + '/' + image

        shutil.copy(old_path, new_path)

    for label in test_labels:

        old_path = xml_path + '/' + label

        new_path1 = new_file_path + '/' + 'test' + '/' + 'labels'

        if not os.path.exists(new_path1):

            os.makedirs(new_path1)

        new_path = new_path1 + '/' + label

        shutil.copy(old_path, new_path)

if __name__ == '__main__':

    file_path = "D:/Files/dataSet/drone_images"

    xml_path = 'D:/Files/dataSet/drone_labels'

    new_file_path = "D:/Files/dataSet/droneData"

    split_data(file_path,xml_path, new_file_path, train_rate=0.6, val_rate=0.2, test_rate=0.2)  #数据集划分6:2:2


以上代码参考的:

(55条消息) 【yolov5】将标注好的数据集进行划分(附完整可运行python代码)_yolov5数据集划分_freezing?的博客-CSDN博客

上面代码改一下file_path 、xml_path 、new_file_path 就可以运行了。

直接划分了数据集  很方便!


显示这样则划分成功
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容