划分数据集
数据集存在于0none
1pouting
2smile
3openmouth
四个文件夹中,根据要求,我们对其按照 1:9 的比例进行划分
注意,这里我们对每一个文件夹下的数据按比例进行划分,保证数据的均匀
import os
import random
import shutil
from shutil import copy2
foldernames = ['0none', '1pouting', '2smile', '3openmouth']
trainDir = 'train/'
valDir = 'val/'
for foldername in foldernames:
datafile = os.listdir(foldername)
datafilename = foldername + '/'
num_train = len(datafile)
print("Images in " + foldername + " : " + str(num_train))
index_list = list(range(num_train))
# print(index_list)d
num = 0
for i in index_list:
filename = os.path.join(datafilename, datafile[i])
if num < num_train * 0.9:
copy2(filename, trainDir)
else:
copy2(filename, valDir)
num += 1
分割完成后,得到train
val
两个文件夹,分别存储训练和测试的图片数据
查看训练集的图片数
查看测试集的图片数
对数据集进行划分后,还有对数据设置标签
#!/usr/bin/env sh
DATA=dataset
DATA_TRAIN=dataset/train
DATA_VAL=dataset/val
echo "Creating train.txt ..."
rm -rf $DATA/train.txt
find $DATA_TRAIN -name *none.jpg | cut -d '/' -f3 | sed "s/$/ 0/">>$DATA/train.txt
find $DATA_TRAIN -name *pouting.jpg | cut -d '/' -f3 | sed "s/$/ 1/">>$DATA/train.txt
find $DATA_TRAIN -name *smile.jpg | cut -d '/' -f3 | sed "s/$/ 2/">>$DATA/train.txt
find $DATA_TRAIN -name *open.jpg | cut -d '/' -f3 | sed "s/$/ 3/">>$DATA/train.txt
echo "Creat train.txt is done"
echo "Creating test.txt ..."
rm -rf $DATA/val.txt
find $DATA_VAL -name *none.jpg | cut -d '/' -f3 | sed "s/$/ 0/">>$DATA/val.txt
find $DATA_VAL -name *pouting.jpg | cut -d '/' -f3 | sed "s/$/ 1/">>$DATA/val.txt
find $DATA_VAL -name *smile.jpg | cut -d '/' -f3 | sed "s/$/ 2/">>$DATA/val.txt
find $DATA_VAL -name *open.jpg | cut -d '/' -f3 | sed "s/$/ 3/">>$DATA/val.txt
echo "Creat val.txt is done"