中文文档
简单网络的建立
数据的加载
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
#数据处理
data_transforms = {
'train': transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
'val': transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
]),
}
data_dir = 'data/hymenoptera_data'
#加载处理后数据
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
data_transforms[x])
for x in ['train', 'val']}
#创建数据加载器,每次往网络输入batch数据
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
shuffle=True, num_workers=4)
for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
数据的显示
def imshow(inp, title=None):
"""Imshow for Tensor."""
inp = inp.numpy().transpose((1, 2, 0))
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
inp = std * inp + mean
inp = np.clip(inp, 0, 1)
plt.imshow(inp)
if title is not None:
plt.title(title)
plt.pause(0.001) # pause a bit so that plots are updated
# Get a batch of training data
#迭代器和迭代对象 迭代器有next()方法,可迭代对象有列表等
#可通过iter()方法将可迭代对象变成迭代器,同时注意yield()方法
inputs, classes = next(iter(dataloaders['train']))
# Make a grid from batch将batch在一张图上显示
out = torchvision.utils.make_grid(inputs)
imshow(out, title=[class_names[x] for x in classes])
建立训练模型
#使用models下载vgg16神经网络,是CNN卷积网络中的一种,比较小,也好用
fmodel = models.vgg16(pretrained = True)
#冻结预训练网络中的参数,因为我们并不想更新这一部分的参数
for param in fmodel.parameters():
param.require_grad = False
from collections import OrderedDict
# 定义一个新的classifier,其中包含3个全连接隐藏层,层之间使用ReLU函数进行激活,最后输出使用LogSoftMax函数,因为这是一个多分类。
classifier = nn.Sequential(OrderedDict([
('fc1',nn.Linear(25088,4096)),
('relu1',nn.ReLU()),
('fc2',nn.Linear(4096,1000)),
('relu2',nn.ReLU()),
('fc3',nn.Linear(1000,102)),
('output',nn.LogSoftmax(dim=1))
]))
# 替换
fmodel.classifier = classifier
# 使用Negative Log Likelihood Loss作为误差函数
criterion = nn.NLLLoss()
# 使用Adam作为优化器,并且只对分类器的参数进行优化,也可以使用SDG,ADAM具有动量优化效果,设置学习速率为0.01,
optimizer = optim.Adam(fmodel.classifier.parameters(),lr=0.001
def accuracy_test(model,dataloader):
correct = 0
total = 0
model.cuda() # 将模型放入GPU计算,能极大加快运算速度
with torch.no_grad(): # 使用验证集时关闭梯度计算
for data in dataloader:
images,labels = data
images,labels = images.to('cuda'),labels.to('cuda')
outputs = model(images)
_, predicted = torch.max(outputs.data,1)
# torch.max返回输出结果中,按dim=1行排列的每一行最大数据及他的索引,丢弃数据,保留索引
total += labels.size(0)
correct += torch.sum(predicted==labels.data)
#将预测及标签两相同大小张量逐一比较各相同元素的个数
print('the accuracy is {:.4f}'.format(correct/total))
def deep_learning (model,trainloader,epochs,print_every,criterion,optimizer,device):
epochs = epochs #设置学习次数
print_every = print_every
steps = 0
model.to(deive)
for e in range(epochs):
running_loss = 0
for ii , (inputs,labels) in enumerate(trainloader):
steps += 1
inputs,labels = inputs.to(device),labels.to(device)
optimizer.zero_grad() # 优化器梯度清零
# 前馈及反馈
outputs = model(inputs) #数据前馈,正向传播
loss = criterion(outputs,labels) # 输出误差
loss.backward() #误差反馈
optimizer.step() #优化器更新参数
running_loss += loss.item()
if steps % print_every == 0:
#test the accuracy
print('EPOCHS : {}/{}'.format(e+1,epochs),
'Loss : {:.4f}'.format(running_loss/print_every))
accuracy_test(model,validloader)
deep_learning(fmodel,trainloader,3,40,criterion,optimizer,'cuda')
accuracy_test(testloader)
from PIL import Image # 使用image模块导入图片
def process_image(image):
''' 对图片进行缩放,建材,标准化,并输出一个NUMPY数组
'''
#调整图片大小
pic = Image.open(image)
if pic.size[0] < pic.size[1]:
ratio = float(256) / float(pic.size[0])
else:
ratio = float(256) / float(pic.size[1])
new_size = (int(pic.size[0]*ratio),int(pic.size[1]*ratio))
pic.thumbnail(new_size) # 缩放为等长等宽
#从图片中心抠出224 *224 的图像
pic = pic.crop([pic.size[0]/2-112,pic.size[1]/2-112,pic.size[0]/2+112,pic.size[1]/2+112])
#将图片转化为numpy数组
mean = [0.485,0.456,0.406]
std = [0.229,0.224,0.225]
np_image = np.array(pic)
np_image = np_image/255
for i in range(2): # 使用和训练集同样的参数对图片进行数值标准化
np_image[:,:,i] -= mean[i]
np_image[:,:,i] /= std[i]
np_image = np_image.transpose((2,0,1)) #PyTorch 要求颜色通道为第一个维度,但是在 PIL 图像和 Numpy 数组中是第三个维度,所以调整
np_image = torch.from_numpy(np_image) # 转化为张量
np_image = np_image.float()
print(np_image.type)
return np_image
def predict(image_path, model, topk=5):
''' 预测图片.
'''
img = process_image(image_path)
img = img.unsqueeze(0) # 将图片多增加一维
result = model(img.cuda()).topk(topk)
probs= []
classes = []
a = result[0] # 返回TOPK函数截取的排名前列的结果列表a
b = result[1].tolist() #返回TOPK函数截取的排名前列的概率索引列表b
for i in a[0]:
probs.append(torch.exp(i).tolist()) #将结果转化为实际概率
for n in b[0]:
classes.append(str(n+1)) # 将索引转化为实际编号
return(probs,classes)