迁移学习
迁移学习的具体内容有很多大佬文章已经说得很清楚了,这里就不献丑了。
本文尝试通过迁移学习,将Pytorch中的已经预训练好的ResNet18网络用用于动物图片分类。
任务
1.纲分类任务,预测该动物是属于哺乳类(Mammals)还是鸟纲(Birds)
2.种分类任务,预测该动物是鸡、兔还是鼠
3.多任务分类,同时预测纲和种
数据
数据在网盘自取(已更正)https://pan.baidu.com/s/1nrlpqWFHVRhiFNSAMsbn3w 提取码:4zrj
数据分为train dataset 和val dataset两个数据集,分别有890和80个图片数据,含有鸡、兔、鼠三种动物。数据分布如下:CLASSES = ['Mammals', 'Birds'] ,分别有580和310张图片,用0,1 对应;SPECIES = ['rabbits', 'rats', 'chickens'] 分别有300,270,310张图片,用0, 1, 2对应。数据信息如图所示
code
导入所需要的包.
这里被注释掉了,是因为起初使用的自己设定的网络,效果很差,后来就使用pytorch里自带的resNet网络进行了训练和预测。
from Classes_Network import *
from __future__ import print_function, division
import os
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
import torch
import torchvision
#from Classes_Network import *
from torchvision.transforms import transforms
from torchvision import models
from PIL import Image
import pandas as pd
import numpy as np
import random
from torch import optim
from torch.optim import lr_scheduler
import copy
import time
设定一些参数和路径
root_dir = './Stage_3 Multi-classification/'
train_annotations_file = 'Multi_train_annotation.csv'
val_annotations_file = 'Multi_val_annotation.csv'
CLASSES = ['Mammals', 'Birds']
SPECIES = ['rabbits', 'rats', 'chickens']
train_annotation_file与val_annotation_file文件标注了训练集和验证集的数据的路径以及label.格式如图所示。
pd.read_csv(root_dir + train_annotations_file)
根据数据集的格式完善了Dataset
class MyDataset():
def __init__(self,root_dir,annotations_file,transform=None):
self.root_dir = root_dir
self.annotations = annotations_file
self.transform = transform
# if not os.path.isfile(self.annotations_file):
# print(self.annotations + "does not exist")
self.file_info = pd.read_csv(root_dir +annotations_file,index_col=0)
self.size = len(self.file_info)
def __len__(self):
return self.size
def __getitem__(self,idx):
img_path = self.file_info['path'][idx]
label_classes = self.file_info['classes'][idx]
label_species = self.file_info['species'][idx]
img = Image.open(img_path).convert('RGB')
if self.transform:
img = self.transform(img)
return img, label_classes, label_species
设定transform,并将数据载入dataloader
train_transform = transforms.Compose([transforms.Resize((500, 500)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
])
# val_transform = transforms.Compose([transforms.Resize((500, 500)),
# transforms.RandomHorizontalFlip(),
# transforms.ToTensor(),
# ])
val_transform = transforms.Compose([transforms.Resize((500, 500)),
transforms.ToTensor(),
])
train_dataset = MyDataset(root_dir,train_annotations_file,transform=train_transform)
val_dataset = MyDataset(root_dir,val_annotations_file,transform=val_transform)
train_loader = DataLoader(dataset=train_dataset,batch_size=16,shuffle=True)
val_loader = DataLoader(dataset=val_dataset,batch_size=1,shuffle=True)
data_loaders = {'train': train_loader, 'val': val_loader}
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
print(device)
定义模型, 损失函数,优化器
将内置的resnet18及与预其相应训练模型参数导入,进行迁移学习。
param.requires_grad = False 将模型的中间层参数固定,只要最后的FC层参数可导。model_ft.fc = nn.Linear(num_ftrs, 32) 此处将原来的默认分类数1000改为32,是为了后续再加上两个并列的FC层分别对class和species进行分类,若改为2,则进行单一的二分类任务。
model_ft = models.resnet18(pretrained=True)#加载已经训练好的模型
# 使除最后一层的参数不可导,即不进行学习
for param in model_ft.parameters():
param.requires_grad = False
# classes分类结果输出
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 32)#将全连接层做出改变类别改为两类
class multi_out_model(torch.nn.Module):
def __init__(self,model_core):
super(multi_out_model,self).__init__()
self.resnet_model = model_core
self.classes = nn.Linear(in_features=32, out_features=2, bias=True)
self.species = nn.Linear(in_features=32, out_features=3, bias=True)
def forward(self,x):
x1 = self.resnet_model(x)
classes = self.classes(x1)
species = self.species(x1)
return classes, species
model_ft = multi_out_model(model_ft)
criterion = [nn.CrossEntropyLoss(),nn.CrossEntropyLoss()]
model_ft = model_ft.to(device)
network = model_ft
# criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized优化参数
# optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
# Observe that only parameters of final layer are being optimized as
# opoosed to before.
optimizer_ft = optim.SGD([{"params":model_ft.resnet_model.fc.parameters()},
{"params":model_ft.classes.parameters()},
{"params":model_ft.species.parameters()}],lr=0.01,momentum=0.9)
optimizer = optimizer_ft
# Decay LR by a factor of 0.1 every 7 epochs使用学习率缩减
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7,gamma=0.1)
定义训练函数
def train_model(model, criterion, optimizer, scheduler, num_epochs=50,pretrain_model=None):
start_time = time.clock()
Loss_list = {'train':[],'val':[]}
classloss_list = {'train':[],'val':[]}
speciesloss_list = {'train':[],'val':[]}
Accuracy_list_classes = {'train':[],'val':[]}
Accuracy_list_species = {'train':[],'val':[]}
start_epoch = 0
if pretrain_model != None and os.path.exists(pretrain_model):
checkpoint = torch.load(pretrain_model)
model.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
start_epoch = checkpoint['epoch'] + 1
num_epochs = num_epochs +start_epoch
else:
print('无保存模型,从头开始训练')
best_model_wts = copy.deepcopy(model.state_dict())
best_acc = 0.0
best_loss = 100
这里设定了预训练模型,如果采用已经训练好的优秀的模型及其权重的话,就把pretrain_model设置为权重文件的路径+文件名,后面训练时会导入相关参数。
for epoch in range(start_epoch,num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-*' *10)
for phase in ['train','val']:
if phase == 'train':
model.train() # 在train和test的时候。BN层以及Dropout的处理方式不一样,其他都一样,所以没有这两类层的话,可以不进行声明
else:
model.eval()
running_loss = 0.0
running_classes_loss = 0.0
running_species_loss = 0.0
corrects_classes = 0
correct_species = 0
# Each epoch has a training and validation phase
for idx,data in enumerate(data_loaders[phase]):
img, label_classes, label_species = data
img = img.to(device)
label_classes = label_classes.to(device)
label_species = label_species.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
# track history if only in train
with torch.set_grad_enabled(phase == 'train'): # 当是train phase时,以下参数为可导,当为val时,后续包含参数不可导
output = model(img)
x_classes = output[0]
x_species = output[1]
# x_classes, x_species = model(img)
x_classes = x_classes.view(-1, 2) # 将softmax输出的列向量转换为行向量
x_species = x_species.view(-1, 3)
_, preds_classes = torch.max(x_classes, 1) # 输出行向量中最大的元素及其对应的索引值
_a,preds_species = torch.max(x_species, 1)
#损失函数,可以依实际情况设定。
loss_classes = criterion[0](x_classes, label_classes)
loss_species = criterion[1](x_species, label_species)
# loss = criterion(x_classes, label_classes) # 单分类时loss函数
if phase == 'train':
loss = 0.1 * loss_classes + 0.9* loss_species
loss.backward()
optimizer.step() # 进行权值更新
running_classes_loss += loss_classes.item() * img.size(0)
running_species_loss += loss_species.item() * img.size(0)
running_loss += loss.item() * img.size(0)
corrects_classes += torch.sum(preds_classes == label_classes)
correct_species += torch.sum(preds_species == label_species)
loss = 0.1 * loss_classes + 0.9* loss_species设定各个任务损失函数的权重,这里暂时写的是0.1,0.9.但是目前为止还没有筛选出好的权重或者说别的改进过的损失函数.
由于作者电脑辣鸡,所以为了防止加载的东西过多而爆显存,使用torch.cuda.empty_cache()删除一些不需要的变量。
epoch_loss = running_loss / len(data_loaders[phase].dataset)
epoch_class_loss = loss_classes / len(data_loaders[phase].dataset)
epoch_species_loss = loss_species / len(data_loaders[phase].dataset)
Loss_list[phase].append(epoch_loss)
classloss_list[phase].append(epoch_class_loss)
speciesloss_list[phase].append(epoch_species_loss)
这里想把各个任务的loss分别打印出来,但是显示结果和总loss好像对不上,先不注释掉了。
后续根据各次的迭代结果,选出最优模型并保留模型参数。
epoch_acc_classes = corrects_classes.double() / len(data_loaders[phase].dataset)
epoch_acc_species = correct_species.double() / len(data_loaders[phase].dataset)
# epoch_acc = epoch_acc_classes
Accuracy_list_classes[phase].append(100 * epoch_acc_classes)
Accuracy_list_species[phase].append(100 * epoch_acc_species)
print('{} Loss: {:.4f} Acc_classes: {:.2%} Acc_species: {:.2%}'.format(phase, epoch_loss,epoch_acc_classes,epoch_acc_species))
# 更新模型权重及最优准确率
# if phase == 'val' and epoch_loss < best_loss:
if phase == 'val':
print('This epoch val loss: {:.4f}'.format(epoch_loss))
if epoch_loss < best_loss:
# 多任务分类时,仅采用了损失函数进行最优模型的选择,为考虑采用其他指标进行筛选,单一任务时,采用准确率即可。
# if phase == 'val' and epoch_acc > best_acc:
# best_acc = epoch_acc_classes
best_loss = epoch_loss
best_model_wts = copy.deepcopy(model.state_dict())
# print('Best val classes Acc: {:.2%}'.format(best_acc))
print('Best val loss: {:.4f}'.format(best_loss))
# 获取模型当前的参数,以便后续继续训练
pre_state = {'model' : model.state_dict(), 'optimizer':optimizer.state_dict(), 'epoch': epoch}
torch.save(pre_state, 'multi_pre_resnet18_model.pt')
# 所有epoch结束后,将best_model_wts中的模型参数加载到当前网络中,并保存
state = {'model' : model.load_state_dict(best_model_wts)}
torch.save(state, 'multi_best_model.pt')
# print('Best val classes Acc: {:.2%}'.format(best_acc))
end_time = time.clock()
print('训练时间:' + str(end_time - start_time))
return model, classloss_list, speciesloss_list, Loss_list,Accuracy_list_classes,Accuracy_list_species
开始训练
import time
start_time = time.clock()
model, classloss_list, speciesloss_list, Loss_list, Accuracy_list_classes, Accuracy_list_species = train_model(network, criterion, optimizer, exp_lr_scheduler, num_epochs=2)
end_time = time.clock()
print('训练时间:' + str(end_time - start_time))
同时将pretrain_model='multi_pre_resnet18_model.pt'设置为之前训练保存的参数,可以接着上次继续训练,但注意不要过拟合。
model, classloss_list, speciesloss_list, Loss_list, Accuracy_list_classes, Accuracy_list_species = train_model(
network, criterion, optimizer, exp_lr_scheduler, num_epochs=20,pretrain_model='multi_pre_resnet18_model.pt')
训练结果示例
----------
...
----------
Epoch 19/19
----------
train Loss: 0.9367 Acc_classes: 87.48% Acc_species: 71.79%
val Loss: 0.5222 Acc_classes: 93.75% Acc_species: 86.25%
This epoch val loss: 0.5222
Best val loss: 0.5222
训练时间:1396.4057515999993
之前也忘记损失函数的权重怎么设置的,最终得到的结果如上。可以看到,通过迁移学习将resnet用来进行该分类任务时,在classes的分类上还是很容易收敛的,但是同时进行species的分类时,就有点吃力了。后续可能会改进损失函数进行优化。
对准确率及损失进行可视化
x = range(0,len(Loss_list['train']))
y3 = [i.cpu().numpy() for i in Accuracy_list_classes["train"]]
y4 = [i.cpu().numpy() for i in Accuracy_list_classes["val"]]
# y3 = Accuracy_list_classes["train"]
# y4 = Accuracy_list_classes["val"]
plt.plot(x, y3, color="r", linestyle="-", marker=".", linewidth=1, label="train")
plt.plot(x, y4, color="b", linestyle="-", marker=".", linewidth=1, label="val")
plt.ylim(min(min(y3),min(y4)) * 0.2,max(max(y3),max(y4)) * 1.2)
plt.legend()
plt.title('train and val Classes_acc vs. epoches')
plt.ylabel('Classes_accuracy')
plt.savefig("train and val Classes_acc vs epoches.jpg")
y5 = [i.cpu().numpy() for i in Accuracy_list_species["train"]]
y6 = [i.cpu().numpy() for i in Accuracy_list_species["val"]]
# y5 = Accuracy_list_species["train"].cpu().numpy()
# y6 = Accuracy_list_species["val"].cpu().numpy()
plt.plot(x, y5, color="r", linestyle="-", marker=".", linewidth=1, label="train")
plt.plot(x, y6, color="b", linestyle="-", marker=".", linewidth=1, label="val")
plt.ylim(min(min(y5),min(y6)) * 0.2,max(max(y5),max(y6)) * 1.2)
plt.legend()
plt.title('train and val Species_acc vs. epoches')
plt.ylabel('Classes_accuracy')
plt.savefig("train and val Species_acc vs epoches.jpg")
y1 = Loss_list["val"]
y2 = Loss_list["train"]
y8 = speciesloss_list['train']
y7 = speciesloss_list['val']
y10 = classloss_list['train']
y9 = classloss_list['val']
plt.plot(x, y1, color="r", linestyle="-", marker="o", linewidth=1, label="loss_val")
plt.plot(x, y2, color="b", linestyle="-", marker="o", linewidth=1, label="loss_train")
plt.plot(x, y7, color="r", linestyle="-", marker="^", linewidth=1, label="specie_loss_val")
plt.plot(x, y8, color="b", linestyle="-", marker="^", linewidth=1, label="specie_loss_train")
plt.plot(x, y9, color="r", linestyle="-", marker=">", linewidth=1, label="class_loss_val")
plt.plot(x, y10, color="b", linestyle="-", marker="<", linewidth=1, label="class_loss_train")
plt.ylim(min(min(y1),min(y2)) * (-1.5),max(max(y1),max(y2),max(y7),max(y8),max(y9),max(y10)) * 1.1)
plt.legend()
plt.title('train and val loss vs. epoches')
plt.xlabel("epochs")
plt.ylabel('loss')
plt.savefig("train and val loss vs epoches.jpg")
并对验证集进行验证
def visualize_model(model):
corrects_classes = 0
corrects_species = 0
counts = 0
model.eval()
with torch.no_grad():
for i, data in enumerate(data_loaders['val']):
# print
img, label_classes, label_species = data
# img = img.to(device)
label_classes = label_classes.to(device)
label_species = label_species.to(device)
# inputs = data['image']
# labels_classes = data['classes'].to(device)
output = model(img.to(device))
x_classes = output[0].view(-1,2)
_, preds_classes = torch.max(x_classes, 1)
corrects_classes += torch.sum(preds_classes == label_classes)
x_species = output[1].view(-1,3)
_, preds_species = torch.max(x_species, 1)
corrects_species += torch.sum(preds_species == label_species)
torch.cuda.empty_cache()
plt.imshow(transforms.ToPILImage()(img.squeeze(0)))
plt.title('predicted classes: {}\n ground-truth classes:{}\n predicted species: {}\n ground-truth species:{}'\
.format(CLASSES[preds_classes],CLASSES[label_classes],SPECIES[preds_species],SPECIES[label_species]))
plt.show()
counts += 1
epoch_acc_classes = corrects_classes.double() / counts
epoch_acc_species = corrects_species.double() / counts
print("epoch_acc_classes:{} epoch_acc_species:{}".format(epoch_acc_classes, epoch_acc_species))
visualize_model(network)
最终验证结果如图
然后两个任务验证集的准确率为:
epoch_acc_classes:93.75% epoch_acc_species:86.25%
单独进行classes分类和species分类时,验证集准确率均可达到90%以上,这也说明预训练的模型含有丰富特征。
后续计划在数据集、网络结构、损失函数方面进行改善,以期提高多任务图像分类时,各个子任务的准确率。
数据增强
对数据进行随机裁剪,改变颜色,旋转,透视变换等操作,并将变换后的图片保存下来,生成新的数据集,补充到原训练集中。
# 透视变换
def random_warp(img, row, col):
height, width, channels = img.shape
# warp:
random_margin = 60
x1 = random.randint(-random_margin, random_margin)
y1 = random.randint(-random_margin, random_margin)
x2 = random.randint(width - random_margin - 1, width - 1)
y2 = random.randint(-random_margin, random_margin)
x3 = random.randint(width - random_margin - 1, width - 1)
y3 = random.randint(height - random_margin - 1, height - 1)
x4 = random.randint(-random_margin, random_margin)
y4 = random.randint(height - random_margin - 1, height - 1)
dx1 = random.randint(-random_margin, random_margin)
dy1 = random.randint(-random_margin, random_margin)
dx2 = random.randint(width - random_margin - 1, width - 1)
dy2 = random.randint(-random_margin, random_margin)
dx3 = random.randint(width - random_margin - 1, width - 1)
dy3 = random.randint(height - random_margin - 1, height - 1)
dx4 = random.randint(-random_margin, random_margin)
dy4 = random.randint(height - random_margin - 1, height - 1)
pts1 = np.float32([[x1, y1], [x2, y2], [x3, y3], [x4, y4]])
pts2 = np.float32([[dx1, dy1], [dx2, dy2], [dx3, dy3], [dx4, dy4]])
M_warp = cv2.getPerspectiveTransform(pts1, pts2)
img_warp = cv2.warpPerspective(img, M_warp, (width, height))
return M_warp, img_warp
#改变颜色
def random_light_color(img):
#brightness
B,G,R = cv2.split(img)#通道拆分,顺序为BGR,不是RBG
b_rand = random.randint(-50,50)#生成随机数整数n a<=n<=b
if b_rand == 0:
pass
elif b_rand > 0:
lim = 255 - b_rand
B[B > lim] = 255
B[B <= lim] = (b_rand + B[B <= lim]).astype(img.dtype)
elif b_rand < 0:
lim = 0 - b_rand
B[B < lim] = 0
B[B >= lim] = (b_rand + B[B >= lim]).astype(img.dtype)
g_rand = random.randint(-50,50)
if g_rand == 0:
pass
elif g_rand > 0:
lim = 255 - g_rand
G[G > lim] = 255#R[],G[],B[]都是矩阵
G[G <= lim] = (g_rand + G[G <= lim]).astype(img.dtype)
elif g_rand < 0:
lim = 0 - g_rand
G[G < lim] = 0
G[G >= lim] = (g_rand + G[G >= lim]).astype(img.dtype)
r_rand = random.randint(-50,50)
if r_rand == 0:
pass
elif r_rand > 0:
lim = 255 - r_rand
R[R > lim] = 255
R[R <= lim] = (r_rand + R[R <= lim]).astype(img.dtype)
elif r_rand < 0:
lim = 0 - r_rand
R[R < lim] = 0
R[R >= lim] = (r_rand + R[R >= lim]).astype(img.dtype)
img_merge = cv2.merge((B,G,R)) #合并之前分离出来进行变换的通道
#img = cv2.cvtColor(final_hsv,cv2.COLOR_HSV2BGR)
return img_merge
#对图片实现多种变换并保存
def image_data_aug(img,crop=True,change_color=True,rotation=True,perspective_transform=False):
if (crop or change_color or rotation or perspective_transform) == False:
print("wrong input")
return
if crop:
img = img[int(img.shape[0]/4):int(3*img.shape[0]/4),0:int(3*img.shape[1]/4)]#根据图像大小选择参数大小
if change_color:
img = random_light_color(img)
if rotation:
angle = random.randint(0,180)
scale = random.uniform(0.75,1.25)
M = cv2.getRotationMatrix2D((img.shape[1] / 2, img.shape[0] / 2), angle, scale) # center, angle, scale
img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))
if perspective_transform:
M_warp, img = random_warp(img, img[0], img[1])
return img
def create_image(ori_img_file,times=1):
"""
param: ori_img_file:记录训练数据集相关信息的csv文件路径+名称;
times: 为数据集增加的倍数
"""
ori_file = pd.read_csv(root_dir + ori_img_file,index_col=0)
new_csv = []
for time in range(times):
print("图片第{}次生成中...".format(str(time)))
with tqdm(range(len(ori_file))) as t:
for idx in t:
ori_path = ori_file["path"][idx]
path = ori_path.replace(".jpg", "_aug"+str(time) + "_" + str(idx)+".jpg").replace("train","train_aug").replace("val","val_aug")
classes = ori_file["classes"][idx]
species = ori_file["species"][idx]
print(ori_path)
img = cv2.imread(ori_path)
try:
img = image_data_aug(img)
# """
#不知道为啥遍历到idx=680时,总会显示error: C:\projects\opencv-python\opencv\modules\highgui\src\window.
#cpp:325: error: (-215) size.width>0 && size.height>0 in function cv::imshow
#(已经改斜杠,确认路径没有中文,所以用了try...except这个结构)
cv2.imwrite(path,img)
new_csv.append([str(time)+"_"+ str(idx),path,classes,species])
except:
continue
data_aug = pd.DataFrame(new_csv,columns=["index","path","classes","species"])
data_aug.to_csv("data_aug.csv",index=0)