import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets,transforms
import matplotlib.pyplot as plt
'''构建网络层'''
class ANN(nn.Module):
def __init__(self):
super(ANN,self).__init__() # 对继承自父类的属性进行初始化
self.linear_1 = nn.Linear(in_features=28*28,out_features=512,bias=True)
self.reLU_1 = nn.ReLU(inplace=False) # 如果设为True,会把输出直接覆盖到输入中,这样可以节省内存/显存
self.linear_2 = nn.Linear(in_features=512,out_features=512,bias=True)
self.reLU_2 = nn.ReLU(inplace=False)
self.linear_3 = nn.Linear(in_features=512,out_features=256,bias=True)
self.reLU_3 = nn.ReLU(inplace=False)
self.linear_4 = nn.Linear(in_features=256,out_features=10,bias=True)
def forward(self,x_para_1):
x_reshape = torch.reshape(x_para_1,shape=(-1,28*28*1))
x_linear_1 = self.linear_1(x_reshape) # 使用了python的__call__方法,而在__call__方法中调用了forward函数
x_reLU_1 = self.reLU_1(x_linear_1)
x_linear_2 = self.linear_2(x_reLU_1)
x_reLU_2 = self.reLU_2(x_linear_2)
x_linear_3 = self.linear_3(x_reLU_2)
x_reLU_3 = self.reLU_3(x_linear_3)
x_linear_4 = self.linear_4(x_reLU_3)
return x_linear_4
'''主函数:训练模型'''
def main():
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
epochs = 10 # 迭代次数
batch_size = 512 # 批量大小
net = ANN().to(device) # 将对象移动到目标设备
criterion = nn.CrossEntropyLoss(reduce = None, weight = None, size_average = None, ignore_index = -100) # 选择损失函数
optimizer = optim.Adam(net.parameters(), weight_decay=0, amsgrad=False, lr=0.001, betas=(0.9, 0.999),
eps=1e-10) # 选择优化方法
transform = transforms.Compose([
transforms.Resize(28), # 设置输出图像大小
transforms.ToTensor(), # 数据转化为tensor
transforms.Normalize((0.5,), (0.5,)) # 数据标准化
])
dataset = datasets.MNIST("datasets/", train=True, download=False, transform=transform) # 加载MNIST数据集
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True) # 将整个数据集分成多个批次
testdataset = datasets.MNIST("datasets/", train=False, download=False, transform=transform)
testdataloader = torch.utils.data.DataLoader(testdataset, batch_size=batch_size, shuffle=True)
losses = []
for i in range(epochs):
net.train() # 打开滑动指数平均按钮,将batch上的mean和var近似成整个样本空间上的mean和var,在mini-batch训练集中使用
print('epochs: %d' % i)
for j, (input, target) in enumerate(dataloader):
input, target = input.to(device), target.to(device)
output = net(input)
loss = criterion(output, target)
optimizer.zero_grad() # 将每一轮的梯度设为零
loss.backward() # 计算梯度,误差反向传播
optimizer.step() # 更新参数
if j % 10 == 0:
losses.append(loss.float())
print("[epochs - %d - %d/%d]loss: %f" % (i, j, len(dataloader), loss.float()))
'''实时绘图'''
plt.clf() # 清除当前 figure 上的内容
plt.plot(losses)
plt.savefig('loss.jpg') # 保存图片
plt.pause(0.01) # 相当于plt.show(),但是只显示0.01秒
with torch.no_grad(): # 测试集中的数据不需要计算梯度,也不会进行反向传播
net.eval() # 相对于net.train(),在测试集中使用
correct = 0.
total = 0.
for input, target in testdataloader:
input, target = input.to(device), target.to(device)
output = net(input)
_, predicted = torch.max(output.data, 1) # 返回每一行中最大值的索引
total += target.size(0)
correct += (predicted == target).sum()
accuracy = correct.float() / total
print("[epochs - %d]Accuracy: %f" % (i + 1, (100 * accuracy)))
torch.save(net, "models/net.pth") # 保存模型
if __name__ == "__main__":
main()
loss.jpg
image.png