pytorch 实现 GoogleNet on Fashion-MNIST
from __future__ import print_function
import torch
import time
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch import optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision.transforms import ToPILImage
show=ToPILImage()
import numpy as np
import matplotlib.pyplot as plt
#
batchSize=128
##load data
transform = transforms.Compose([transforms.Resize(96),transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batchSize, shuffle=True, num_workers=0)
testset = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batchSize, shuffle=False, num_workers=0)
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
def imshow(img):
img = img / 2 + 0.5
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
####network
def conv_relu(in_channels, out_channels, kernel, stride=1, padding=0):
layer = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel, stride, padding),
nn.BatchNorm2d(out_channels, eps=1e-3),
nn.ReLU(True))
return layer
class Inception(nn.Module):
def __init__(self,in_channel,c1,c2,c3,c4):
super(Inception,self).__init__()
self.norm1_1=nn.BatchNorm2d(in_channel,eps=1e-3)
self.p1_1=nn.Conv2d(in_channels=in_channel,out_channels=c1,kernel_size=1)
self.norm2_1 = nn.BatchNorm2d(in_channel, eps=1e-3)
self.p2_1=nn.Conv2d(in_channels=in_channel,out_channels=c2[0],kernel_size=1)
self.norm2_2 = nn.BatchNorm2d(c2[0], eps=1e-3)
self.p2_2=nn.Conv2d(in_channels=c2[0],out_channels=c2[1],kernel_size=3,padding=1)
self.norm3_1 = nn.BatchNorm2d(in_channel, eps=1e-3)
self.p3_1=nn.Conv2d(in_channels=in_channel,out_channels=c3[0],kernel_size=1)
self.norm3_2 = nn.BatchNorm2d(c3[0], eps=1e-3)
self.p3_2=nn.Conv2d(in_channels=c3[0],out_channels=c3[1],kernel_size=5,padding=2)
self.p4_1 = nn.MaxPool2d(kernel_size=3,stride=1,padding=1)
self.norm4_2 = nn.BatchNorm2d(in_channel, eps=1e-3)
self.p4_2 = nn.Conv2d(in_channels=in_channel, out_channels=c4, kernel_size=1)
def forward(self, x):
p1=self.p1_1(F.relu(self.norm1_1(x)))
p2=self.p2_2(F.relu(self.norm2_2(self.p2_1(F.relu(self.norm2_1(x))))))
p3=self.p3_2(F.relu(self.norm3_2(self.p3_1(F.relu(self.norm3_1(x))))))
p4=self.p4_2(F.relu(self.norm4_2(self.p4_1(x))))
return torch.cat((p1,p2,p3,p4),dim=1)
#Test Inception block
# test_net = Inception(3, 64, (48, 64), (64, 96), 32)
# test_x = Variable(torch.zeros(1, 3, 96, 96))
# print('input shape: {} x {} x {}'.format(test_x.shape[1], test_x.shape[2], test_x.shape[3]))
# test_y = test_net(test_x)
# print('output shape: {} x {} x {}'.format(test_y.shape[1], test_y.shape[2], test_y.shape[3]))
class GoogleNet(nn.Module):
def __init__(self,in_channel,num_classes):
super(GoogleNet,self).__init__()
layers=[]
layers+=[nn.Conv2d(in_channels=in_channel,out_channels=64,kernel_size=7,stride=2,padding=3),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3,stride=2,padding=1)]
layers+=[nn.Conv2d(in_channels=64,out_channels=64,kernel_size=1),
nn.Conv2d(in_channels=64,out_channels=192,kernel_size=3,padding=1),
nn.MaxPool2d(kernel_size=3,stride=2,padding=1)]
layers+=[Inception(192,64,(96,128),(16,32),32),
Inception(256,128,(128,192),(32,96),64),
nn.MaxPool2d(kernel_size=3,stride=2,padding=1)]
layers+=[Inception(480, 192, (96, 208), (16, 48), 64),
Inception(512, 160, (112, 224), (24, 64), 64),
Inception(512, 128, (128, 256), (24, 64), 64),
Inception(512, 112, (144, 288), (32, 64), 64),
Inception(528, 256, (160, 320), (32, 128), 128),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1)]
layers += [Inception(832, 256, (160, 320), (32, 128), 128),
Inception(832, 384, (192, 384), (48, 128), 128),
nn.AvgPool2d(kernel_size=2)]
self.net = nn.Sequential(*layers)
self.dense=nn.Linear(1024,num_classes)
def forward(self,x):
x=self.net(x)
x=x.view(-1,1024*1*1)
x=self.dense(x)
return x
#Test GoogleNet
# test_net = GoogleNet(3, 10)
# test_x = Variable(torch.zeros(1, 3, 96, 96))
# test_y = test_net(test_x)
# print('output: {}'.format(test_y.shape))
net=GoogleNet(1,10).cuda()
print (net)
criterion=nn.CrossEntropyLoss()
optimizer=optim.SGD(net.parameters(),lr=0.1,momentum=0.9)
#train
print ("training begin")
for epoch in range(3):
start = time.time()
running_loss=0
for i,data in enumerate(trainloader,0):
# print (inputs,labels)
image,label=data
image=image.cuda()
label=label.cuda()
image=Variable(image)
label=Variable(label)
# imshow(torchvision.utils.make_grid(image))
# plt.show()
# print (label)
optimizer.zero_grad()
outputs=net(image)
# print (outputs)
loss=criterion(outputs,label)
loss.backward()
optimizer.step()
running_loss+=loss.data
if i%100==99:
end=time.time()
print ('[epoch %d,imgs %5d] loss: %.7f time: %0.3f s'%(epoch+1,(i+1)*batchSize,running_loss/100,(end-start)))
start=time.time()
running_loss=0
print ("finish training")
#test
net.eval()
correct=0
total=0
for data in testloader:
images,labels=data
images=images.cuda()
labels=labels.cuda()
outputs=net(Variable(images))
_,predicted=torch.max(outputs,1)
total+=labels.size(0)
correct+=(predicted==labels).sum()
print('Accuracy of the network on the %d test images: %d %%' % (total , 100 * correct / total))
运行过程
GoogleNet(
(net): Sequential(
(0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
(1): ReLU()
(2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(3): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
(4): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(5): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(6): Inception(
(norm1_1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p1_1): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
(norm2_1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p2_1): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1))
(norm2_2): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p2_2): Conv2d(96, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm3_1): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p3_1): Conv2d(192, 16, kernel_size=(1, 1), stride=(1, 1))
(norm3_2): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p3_2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(p4_1): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
(norm4_2): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p4_2): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1))
)
(7): Inception(
(norm1_1): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p1_1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
(norm2_1): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p2_1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
(norm2_2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p2_2): Conv2d(128, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm3_1): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p3_1): Conv2d(256, 32, kernel_size=(1, 1), stride=(1, 1))
(norm3_2): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p3_2): Conv2d(32, 96, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(p4_1): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
(norm4_2): BatchNorm2d(256, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p4_2): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
)
(8): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(9): Inception(
(norm1_1): BatchNorm2d(480, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p1_1): Conv2d(480, 192, kernel_size=(1, 1), stride=(1, 1))
(norm2_1): BatchNorm2d(480, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p2_1): Conv2d(480, 96, kernel_size=(1, 1), stride=(1, 1))
(norm2_2): BatchNorm2d(96, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p2_2): Conv2d(96, 208, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm3_1): BatchNorm2d(480, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p3_1): Conv2d(480, 16, kernel_size=(1, 1), stride=(1, 1))
(norm3_2): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p3_2): Conv2d(16, 48, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(p4_1): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
(norm4_2): BatchNorm2d(480, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p4_2): Conv2d(480, 64, kernel_size=(1, 1), stride=(1, 1))
)
(10): Inception(
(norm1_1): BatchNorm2d(512, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p1_1): Conv2d(512, 160, kernel_size=(1, 1), stride=(1, 1))
(norm2_1): BatchNorm2d(512, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p2_1): Conv2d(512, 112, kernel_size=(1, 1), stride=(1, 1))
(norm2_2): BatchNorm2d(112, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p2_2): Conv2d(112, 224, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm3_1): BatchNorm2d(512, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p3_1): Conv2d(512, 24, kernel_size=(1, 1), stride=(1, 1))
(norm3_2): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p3_2): Conv2d(24, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(p4_1): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
(norm4_2): BatchNorm2d(512, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p4_2): Conv2d(512, 64, kernel_size=(1, 1), stride=(1, 1))
)
(11): Inception(
(norm1_1): BatchNorm2d(512, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p1_1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
(norm2_1): BatchNorm2d(512, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p2_1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
(norm2_2): BatchNorm2d(128, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p2_2): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm3_1): BatchNorm2d(512, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p3_1): Conv2d(512, 24, kernel_size=(1, 1), stride=(1, 1))
(norm3_2): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p3_2): Conv2d(24, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(p4_1): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
(norm4_2): BatchNorm2d(512, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p4_2): Conv2d(512, 64, kernel_size=(1, 1), stride=(1, 1))
)
(12): Inception(
(norm1_1): BatchNorm2d(512, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p1_1): Conv2d(512, 112, kernel_size=(1, 1), stride=(1, 1))
(norm2_1): BatchNorm2d(512, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p2_1): Conv2d(512, 144, kernel_size=(1, 1), stride=(1, 1))
(norm2_2): BatchNorm2d(144, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p2_2): Conv2d(144, 288, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm3_1): BatchNorm2d(512, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p3_1): Conv2d(512, 32, kernel_size=(1, 1), stride=(1, 1))
(norm3_2): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p3_2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(p4_1): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
(norm4_2): BatchNorm2d(512, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p4_2): Conv2d(512, 64, kernel_size=(1, 1), stride=(1, 1))
)
(13): Inception(
(norm1_1): BatchNorm2d(528, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p1_1): Conv2d(528, 256, kernel_size=(1, 1), stride=(1, 1))
(norm2_1): BatchNorm2d(528, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p2_1): Conv2d(528, 160, kernel_size=(1, 1), stride=(1, 1))
(norm2_2): BatchNorm2d(160, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p2_2): Conv2d(160, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm3_1): BatchNorm2d(528, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p3_1): Conv2d(528, 32, kernel_size=(1, 1), stride=(1, 1))
(norm3_2): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p3_2): Conv2d(32, 128, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(p4_1): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
(norm4_2): BatchNorm2d(528, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p4_2): Conv2d(528, 128, kernel_size=(1, 1), stride=(1, 1))
)
(14): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(15): Inception(
(norm1_1): BatchNorm2d(832, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p1_1): Conv2d(832, 256, kernel_size=(1, 1), stride=(1, 1))
(norm2_1): BatchNorm2d(832, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p2_1): Conv2d(832, 160, kernel_size=(1, 1), stride=(1, 1))
(norm2_2): BatchNorm2d(160, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p2_2): Conv2d(160, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm3_1): BatchNorm2d(832, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p3_1): Conv2d(832, 32, kernel_size=(1, 1), stride=(1, 1))
(norm3_2): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p3_2): Conv2d(32, 128, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(p4_1): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
(norm4_2): BatchNorm2d(832, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p4_2): Conv2d(832, 128, kernel_size=(1, 1), stride=(1, 1))
)
(16): Inception(
(norm1_1): BatchNorm2d(832, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p1_1): Conv2d(832, 384, kernel_size=(1, 1), stride=(1, 1))
(norm2_1): BatchNorm2d(832, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p2_1): Conv2d(832, 192, kernel_size=(1, 1), stride=(1, 1))
(norm2_2): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p2_2): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm3_1): BatchNorm2d(832, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p3_1): Conv2d(832, 48, kernel_size=(1, 1), stride=(1, 1))
(norm3_2): BatchNorm2d(48, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p3_2): Conv2d(48, 128, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
(p4_1): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
(norm4_2): BatchNorm2d(832, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
(p4_2): Conv2d(832, 128, kernel_size=(1, 1), stride=(1, 1))
)
(17): AvgPool2d(kernel_size=2, stride=2, padding=0)
)
(dense): Linear(in_features=1024, out_features=10, bias=True)
)
training begin
[epoch 1,imgs 12800] loss: 0.8332726 time: 7.296 s
[epoch 1,imgs 25600] loss: 0.4878939 time: 7.260 s
[epoch 1,imgs 38400] loss: 0.4382473 time: 7.275 s
[epoch 1,imgs 51200] loss: 0.3879716 time: 7.280 s
[epoch 2,imgs 12800] loss: 0.3313940 time: 7.340 s
[epoch 2,imgs 25600] loss: 0.3187236 time: 7.329 s
[epoch 2,imgs 38400] loss: 0.3174009 time: 7.330 s
[epoch 2,imgs 51200] loss: 0.2961887 time: 7.328 s
[epoch 3,imgs 12800] loss: 0.2664629 time: 7.364 s
[epoch 3,imgs 25600] loss: 0.2510577 time: 7.357 s
[epoch 3,imgs 38400] loss: 0.2545497 time: 7.354 s
[epoch 3,imgs 51200] loss: 0.2475178 time: 7.354 s
finish training
Accuracy of the network on the 10000 test images: 89 %