本片文章纯依靠python实现神经网络,并未涉及tensorflow,想看tensorflow的网上可以找到很多
神经网络的大体框架是:
前向传播:即输入训练集,也就是用于训练的数据,通过一些线性运算,非线性运算,计算输出结果output
后向传播:用前向传播计算出的output与真实的每个训练集对应的标签或者属性对比,构造代价函数,用梯度下降算法去优化神经网络,让代价函数变小
本篇程序在我的github地址为:GitHub - pangpangche/linear_neural: the linear neural network, using python
import os
import numpy as np
import struct
def load_data(path,kind="train"):
labels_path=os.path.join(path,"%s-labels.idx1-ubyte"%kind)
images_path=os.path.join(path,"%s-images.idx3-ubyte"%kind)
with open(labels_path,'rb') as labpath:
magic_number,item_number=struct.unpack(">II",labpath.read(8))
labels=np.fromfile(labpath,dtype=np.uint8)
with open(images_path,'rb') as imgpath:
magic_number,image_number,rows,cols=struct.unpack(">IIII",imgpath.read(16))
images=np.fromfile(imgpath,dtype=np.uint8)
returnlabels,images
orig_labels,orig_images=np.array(load_data("data/",kind="train"))
print("original labels shape is"+str(orig_labels.shape))
print("original images shape is"+str(orig_images.shape))
'''
开始处理数据,将数据输入处理成(784,60000)
输出处理成(10,60000),手写数字有0-9共10个数字
'''
##labels = orig_labels.reshape(1,orig_labels.shape[0])
images=orig_images.reshape(60000,784)/255##注意此处是先将orig_image先reshape成(60000,784)若是改成(784,60000)就会变成乱码
images=images.T
'''
因为输出层是softmax,因此需要将labels先进行one-hot编码
'''
def get_one_hot(targets,nb_classes):
return np.eye(nb_classes)[np.array(targets).reshape(-1)]
mid_labels=get_one_hot(orig_labels,10)##the mid_labels shape is (60000,10)
labels=mid_labels.T###the labels shape is (10,60000)
print("------------after tunning the images's shape and the labels's shape have changed------------------")
print("the images' shape is"+str(images.shape))
print("the labels' shape is"+str(labels.shape))
'''
load the test set
'''
print("-------------------load the test set---------------------------------------------------------------")
orig_test_labels,orig_test_images=np.array(load_data("data/",kind="t10k"))
print("original test labels shape"+str(orig_test_labels.shape))
print("original test images shape"+str(orig_test_images.shape))
test_images=orig_test_images.reshape(10000,784)/255
test_images=test_images.T
mid_test_labels=get_one_hot(orig_test_labels,10)
test_labels=mid_test_labels.T
print("the test-images-set shape is"+str(test_images.shape))
print("the test-labels-set shape is"+str(test_labels.shape))
'''
本例子线性神经网络结构为
输入层(784个输入维度)---隐藏层(num_hidden_units)----输出层(softmax,10)
x----(784,60000)
w1----(num_hidden_units,784)
b1----(num_hidden_units,1)
a1----(num_hidden_units,60000)
w2----(10,num_hidden_units)
b2----(10,1)
a2----(10,60000)
'''
np.random.seed(1)
def initializer_with_hidden_layers(num_hidden_units):
w1=np.random.randn(num_hidden_units,784)
b1=np.zeros((num_hidden_units,1))
w2=np.random.randn(10,num_hidden_units)
b2=np.zeros((10,1))
parameters={"w1":w1,
"b1":b1,
"w2":w2,
"b2":b2}
return parameters
'''
定以了两个激活函数,其中有
sigmoid函数-----隐藏层
softmax函数-----输出层
'''
def sigmoid(z):
s=1/(1+np.exp(-z))
return s
def softmax(z):
total=np.sum(np.exp(z),axis=0,keepdims=True)
s=np.exp(z)/total
return s
'''
定义前向传播函数,并且返回每一层当中激活函数的输出
'''
def forward_propagation(input_x,output_y,parameters):
m=input_x.shape[1]
w1=parameters["w1"]
b1=parameters["b1"]
w2=parameters["w2"]
b2=parameters["b2"]
a1=sigmoid(np.dot(w1,input_x)+b1)
a2=softmax(np.dot(w2,a1)+b2)
value_cost=-1/m*np.sum(output_y*np.log(a2))
returna1,a2,value_cost
'''
定义后向传播过程
'''
def backward_propagation(input_x,output_y,parameters,learning_rate,iterations):
m=input_x.shape[1]
w1=parameters["w1"]
b1=parameters["b1"]
w2=parameters["w2"]
b2=parameters["b2"]
for i in range(iterations):
a1,a2,cost=forward_propagation(input_x,output_y,parameters)
dz2=a2-output_y
dw2=1/m*np.dot(dz2,a1.T)
db2=1/m*np.sum(dz2,axis=1,keepdims=True)
dz1=1/m*np.dot(w2.T,dz2)*a1*(1-a1)
dw1=1/m*np.dot(dz1,input_x.T)
db1=1/m*np.sum(dz1,axis=1,keepdims=True)
w1=w1-learning_rate*dw1
b1=b1-learning_rate*db1
w2=w2-learning_rate*dw2
b2=b2-learning_rate*db2
assert(w1.shape==dw1.shape)
assert(b1.shape==db1.shape)
assert(w2.shape==dw2.shape)
assert(b2.shape==db2.shape)
y_predict=np.eye(10)[np.array(a2.argmax(0))].T##这是用与one-hot编码
'''
np.eye(3) ----------> [1,0,0]
[0,1,0]
[0,0,1]
np.eye(3)[i]会将生成的对角矩阵的第i行取出
因此:
np.eye(10)[np.array(a2.argmax(0))]会生成,每行对应一个0-9之间的数字
转制之后,即为one-hot编码形式,每列对应0-9之间的一个数字
'''
acc=1-np.sum(np.abs(y_predict-output_y))/m
if i%100==0:
print("cost after iteration%i:%f"%(i,cost))
print("accuracy is"+str(acc))
parameters={"w1":w1,
"b1":b1,
"w2":w2,
"b2":b2}
return parameters
def predict(input_x,output_y,parameters):
m=output_y.shape[1]
_,y_hat,_=forward_propagation(input_x,output_y,parameters)
y_predict=np.eye(10)[np.array(y_hat.argmax(0))]
return y_predict.T
def accuracy(y_predict,output_y):
assert(y_predict.shape==output_y.shape)
m=output_y.shape[1]
acc=1-np.sum(np.abs(y_predict-output_y))/m
return acc
def model(input_x,output_y,hidden_units,learning_rate,iterations):
parameters=initializer_with_hidden_layers(hidden_units) parameters=backward_propagation(input_x,output_y,parameters,learning_rate,iterations)
y_prediction=predict(input_x,output_y,parameters)
acc=accuracy(y_prediction,output_y)
print("the training-set accuracy is"+str(acc))
return parameters
parameters=model(images,labels,hidden_units=784,learning_rate=0.45,iterations=2000)
test_y_prediction=predict(test_images,test_labels,parameters)
test_accuracy=accuracy(test_y_prediction,test_labels)
print("the testing-set accuracy is"+str(test_accuracy))