Softmax 从零开始实现

导入必要的包

from mxnet import gluon
from mxnet import nd
from mxnet.gluon import data as gdata,loss as gloss
import d2lzh as d2l
from mxnet import autograd as ag

导入数据

# 下载训练集 和 下载测试集
# 并且读取小批量数据
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

# 查看数据导入的情况
print(len(train_iter),len(test_iter))
for x,y in train_iter:
    print(x,y)
    break

235 40

[[[[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   ...
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]]


 [[[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   ...
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]]


 [[[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   ...
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]]


 ...


 [[[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   ...
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]]


 [[[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   ...
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]]


 [[[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   ...
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]]]
<NDArray 256x1x28x28 @cpu(0)> 
[6 9 7 2 1 7 6 6 9 0 6 3 1 4 3 4 4 2 6 3 6 9 0 2 1 2 2 7 7 1 8 0 1 7 6 1 4
 9 2 6 3 7 9 0 5 9 7 0 8 8 1 8 6 1 6 9 4 7 7 4 1 4 4 5 7 8 7 7 5 6 4 2 9 0
 0 6 0 5 7 8 7 8 9 3 3 7 1 0 9 6 5 4 9 4 4 9 4 2 4 7 7 4 5 9 6 8 7 5 1 4 4
 3 5 3 5 0 7 1 0 5 6 1 6 5 4 9 4 7 7 3 8 7 7 7 0 5 4 2 3 2 2 0 9 0 3 8 0 6
 4 4 4 5 8 9 8 7 5 6 0 6 5 6 8 2 6 9 9 5 2 0 9 4 3 4 8 0 5 5 8 2 4 1 8 8 9
 7 9 1 7 2 8 7 8 6 4 7 7 3 0 8 0 9 0 0 5 9 0 8 2 8 6 0 9 2 7 5 7 9 7 5 4 0
 3 8 7 5 4 9 1 2 7 8 1 7 9 8 8 8 0 0 0 9 6 6 7 8 1 4 1 7 6 1 1 8 6 3]
<NDArray 256 @cpu(0)>

初始化模型参数

num_inputs = 784
num_outputs = 10

w = nd.random.normal(scale=0.01,shape=(num_inputs,num_outputs))
b = nd.zeros(shape=num_outputs)

#附上梯度
w.attach_grad()
b.attach_grad()

定义模型

#实现softmax运算
def softmax(X):
    X_exp = X.exp()
    partition = X_exp.sum(axis=1,keepdims=True)
    return X_exp/partition

#定义神经网络计算
def net(X):
    return softmax(nd.dot(X.reshape(-1,num_inputs),w)+b)

定义损失函数

def cross_entropy(y_hat,y):
    return -nd.pick(y_hat,y).log()

确定精度

def accuracy(y_hat,y):
    return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()
#准确度
def evaluate_accuracy(data_iter,net):
    acc_sum,n = 0.0,0
    for X,y in data_iter:
        y_hat = net(X)
        acc_sum += (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()
        n += y.size
    return acc_sum/n

优化函数

def sgd(params,lr,batch_size):
    for param in params:
        param[:] = param - lr*param.grad/batch_size

训练模型

num_epochs,lr = 5,0.1
def train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params=None,lr=None,trainer=None):
    for epoch in range(1,num_epochs+1):
        train_l_sum,train_acc_sum,n = 0.0,0.0,0
        for X,y in train_iter:
            with ag.record():
                y_hat = net(X)
                l = loss(y_hat,y).sum()
            l.backward()
            if trainer is None:
                sgd(params,lr,batch_size)
            else:
                trainer.step(batch_size)
            y = y.astype('float32')
            train_l_sum += y.sum().asscalar()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
            n += y.size
        test_acc = evaluate_accuracy(test_iter,net)
        print("epoch %d ,loss %f ,train_acc %f ,test_acc %f" % (epoch,train_l_sum/n,train_acc_sum/n,test_acc))
    

train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size,[w, b], lr)

epoch 1 ,loss 4.500000 ,train_acc 0.747417 ,test_acc 0.003201
epoch 2 ,loss 4.500000 ,train_acc 0.810550 ,test_acc 0.003296
epoch 3 ,loss 4.500000 ,train_acc 0.823350 ,test_acc 0.003323
epoch 4 ,loss 4.500000 ,train_acc 0.829450 ,test_acc 0.003361
epoch 5 ,loss 4.500000 ,train_acc 0.834900 ,test_acc 0.003365

展示图片

for X, y in test_iter:
    break

true_labels = d2l.get_fashion_mnist_labels(y.asnumpy())
pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1).asnumpy())
titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)]

d2l.show_fashion_mnist(X[0:9], titles[0:9])

预测结果

深度学习_Softmax从零开始

深度学习_Softmax从零开始

Softmax 从零开始实现

导入必要的包

导入数据

初始化模型参数

定义模型

定义损失函数

确定精度

优化函数

训练模型

展示图片

相关阅读更多精彩内容

友情链接更多精彩内容