用卷积神经网络实现手写数字识别

整体流程

1.导入工具包

import tensorflow as tf
import matplotlib.pyplot as plt

2.下载数据

# mnist = tf.keras.datasets.mnist
# (x_train, y_train),(x_test, y_test) = mnist.load_data()

(x_train, y_train),(x_test, y_test) = tf.keras.datasets.mnist.load_data()

3.调整数据的形状
使得数据在gpu内生成，模型在gpu内运行，需添加with tf.device('/gpu:0')语句:

with tf.device('/gpu:0'):
    x = tf.convert_to_tensor(x_train.reshape(60000, 28,28,1).astype('float32')/255)
    y = tf.convert_to_tensor(y_train.astype('int64'))
    x_test = tf.convert_to_tensor(x_test.reshape(10000, 28,28,1).astype('float32')/255)
    y_test = tf.convert_to_tensor(y_test.astype('int64'))

4.定义卷积层参数（W,b）

def weight_variable(shape):
    initial = tf.Variable(tf.random.normal(shape,dtype = tf.float32)*0.01)
#     np.random.randn(layers_dims[l],layers_dims[l-1])*np.sqrt(2./layers_dims[l-1])
    return initial
# 定义偏置量变量
def bias_variable(shape):
    initial = tf.Variable(tf.zeros(shape,dtype = tf.float32))
    return initial

5.定义卷积函数、池化函数

# 卷积函数
def conv2d(x, w):
    return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')
# 池化函数
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

6.定义模型各层的参数

with tf.device('/gpu:0'):
    
    # 定义卷积层参数，卷积核的尺寸[5,5],通道数为1,卷积核的数量：32
    #     W_conv1 = weight_variable([3, 3, 1, 32],)
    #     he初始化
    W_conv1 = tf.Variable(weight_variable([3, 3, 1, 32]) + tf.sqrt(2/800))
    b_conv1 = bias_variable([32])

    # 第二层卷积，卷积核大小5*5，输入通道有32个，输出通道有64个，从输出通道数看，第二层的卷积单元有64个。
    # W_conv2 = weight_variable([3, 3, 32, 64])

    W_conv2 = tf.Variable(weight_variable([3, 3, 32, 64]) + tf.sqrt(2/1600))
    b_conv2 = bias_variable([64])

    # 全连接
    W_fc1 = weight_variable([7*7*64, 120])
    b_fc1 = bias_variable([120])

    # 输出层
    W_fc2 = weight_variable([120, 10])
    b_fc2 = bias_variable([10])

7.定义整体框架函数

def model(x, W1, b1, W2, b2, W3, b3, W4, b4)  :  
    # cnn ，设置一个卷积层
    h_conv1 = tf.nn.relu(conv2d(x, W1) + b1)
    # pooling
    h_pool1 = max_pool_2x2(h_conv1)
    
    
    # 第二层卷积：激活和池化（类似第一层卷积操作的激活和池化）
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W2) + b2)
    h_pool2 = max_pool_2x2(h_conv2)
    
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W3) + b3)
    
    y_conv=tf.nn.softmax(tf.matmul(h_fc1, W4) + b4)
    
    return y_conv

8.计算准确率

# 计算准确率
def acc(x,y, W1, b1, W2, b2, W3, b3, W4, b4):
    y_conv = model(x, W1, b1, W2, b2, W3, b3, W4, b4)
    correct_predict = tf.equal(tf.argmax(y_conv, 1), y)
    accuracy = tf.reduce_mean(tf.cast(correct_predict, "float"))
    return accuracy

9.开始训练模型

with tf.device('/gpu:0'):
    learning_rate = 0.05
    y_one = tf.one_hot(y, 10, dtype=tf.float32)
    minibatch = 128
    costs = []
    for iten in range(50):
        for start, end in zip(range(0,len(x),minibatch),range(128,len(x)+1,minibatch)):
            with tf.GradientTape() as t:

                cost = tf.reduce_mean(tf.square(model(x[start:end],W_conv1,b_conv1,W_conv2,b_conv2,W_fc1,b_fc1,W_fc2,b_fc2)-y_one[start:end]))

            dw1,db1,dw2,db2,dw3,db3, dw4,db4 = t.gradient(cost,(W_conv1,b_conv1,W_conv2,b_conv2,W_fc1,b_fc1,W_fc2,b_fc2))
    #      
            costs.append(cost)
            #更新
            W_conv1.assign_sub(learning_rate * dw1)
            b_conv1.assign_sub(learning_rate * db1)
            W_conv2.assign_sub(learning_rate * dw2)
            b_conv2.assign_sub(learning_rate * db2)
            W_fc1.assign_sub(learning_rate * dw3)
            b_fc1.assign_sub(learning_rate * db3)
            W_fc2.assign_sub(learning_rate * dw4)
            b_fc2.assign_sub(learning_rate * db4)

#         if (iten+1)%10 == 0:
        print('iten %2d: Train=%.4f Test=%.4f cost=%.5f' %
                    (iten+1, acc(x[0:20000],y[0:20000], W_conv1,b_conv1,W_conv2,b_conv2,W_fc1,b_fc1,W_fc2,b_fc2), acc(x_test,y_test,W_conv1,b_conv1,W_conv2,b_conv2,W_fc1,b_fc1,W_fc2,b_fc2),cost))
plt.plot(costs)
plt.show()

结果分析:

mnist手写数字识别—卷积神经网络(CNN)

mnist手写数字识别—卷积神经网络(CNN)

用卷积神经网络实现手写数字识别