一、分类学习
- 准备mnist数据
mnist数据需要翻墙,可以先从这里下载下来,并在代码中指定数据目录
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
# mnist = input_data.read_data_sets('<data dir>', one_hot=True)
- 搭建网络
# 图片作为输入,规格为 28×28 = 784
xs = tf.placeholder(tf.float32, [None, 784])
# 每个图片表示一个数字,0~9,共10类
ys = tf.placeholder(tf.float32, [None, 10])
# 创建输出层,输出一个包含10个元素的列表
# softmax 常用于分类问题
prediction = tf.layers.dense(xs, 10, tf.nn.softmax)
loss函数选用交叉熵函数cross entropy(关于交叉熵,可以参考这篇文章)。交叉熵用来衡量预测值和真实值的相似程度,如果完全相同,它们的交叉熵等于零。
另外,定义compute_accuracy来计算精确度。
cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.log(prediction), reduction_indices=[1]))
# train operation
train_op = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)
# compute accuracy
def compute_accuracy(v_xs, v_ys):
global prediction
global sess
y_pre = sess.run(prediction, feed_dict={xs: v_xs})
correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(v_ys, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
result = sess.run(accuracy, feed_dict={xs: v_xs, ys: v_ys})
return result
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# train
for step in range(1000):
batch_xs, batch_ys = mnist.train.next_batch(100)
sess.run(train_op, feed_dict={xs: batch_xs, ys: batch_ys})
if step % 50 == 0:
print(compute_accuracy(mnist.test.images, mnist.test.labels))
参考链接:https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-01-classifier/
二、Dropout 解决overfitting
使用sklearn提供的手写数字数据集from sklearn.datasets import load_digits
。在处理的时候需要将label转为二进制,即只有黑白色的像素。
import tensorflow as tf
from sklearn.datasets import load_digits
from sklearn.cross_validation import train_test_split # split train set and test set
from sklearn.preprocessing import LabelBinarizer # convert label to binary 0,1
# load data
digits = load_digits()
X = digits.data
y = digits.target
y = LabelBinarizer().fit_transform(y) # fit to data(get mean and variance), then transform it
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
接下来定义需要用到的变量。tf_is_training 用于控制是训练还是测试
# define inputs
keep_prob = tf.placeholder(tf.float32)
xs = tf.placeholder(tf.float32, [None, 64]) # 8 * 8
ys = tf.placeholder(tf.float32, [None, 10]) # 10 types label
tf_is_training = tf.placeholder(tf.bool, None) # to control dropout when training and testing
定义两个神经网络,一个不使用dropout,另一个使用dropout。(注意:dropout只需要在隐藏层进行即可)。
# normal layer
h1 = tf.layers.dense(xs, 50, tf.nn.softmax)
output = tf.layers.dense(h1, 10)
# dropout layer
dh1 = tf.layers.dropout(xs, 50, tf.nn.softmax)
dh1 = tf.layers.dropout(dh1, rate=0.2, training=tf_is_training)
doutput = tf.layers.dense(dh1, 10)
使用交叉熵作为损失函数。
# loss
loss = tf.losses.softmax_cross_entropy(ys, output)
tf.summary.scalar('loss', loss)
# dropout loss
dloss = tf.losses.softmax_cross_entropy(ys, doutput)
tf.summary.scalar('dloss', dloss)
使用梯度下降优化器。
# train operation
train_op = tf.train.GradientDescentOptimizer(0.1).minimize(loss)
dtrain_op = tf.train.GradientDescentOptimizer(0.1).minimize(dloss)
训练200次,每10次记录一次loss和dloss。
# session area
with tf.Session() as sess:
# tensorboard
merge_op = tf.summary.merge_all()
test_writer = tf.summary.FileWriter('logs/test', sess.graph)
sess.run(tf.global_variables_initializer())
# train
for step in range(200):
sess.run([train_op, dtrain_op], feed_dict={xs: X_train, ys: y_train, tf_is_training: True})
# get result
if step % 10 == 0:
test_result = sess.run(merge_op, feed_dict={xs: X_test, ys: y_test, tf_is_training: False})
test_writer.add_summary(test_result, step)
rloss, rdloss = sess.run([loss, dloss], feed_dict={xs: X_test, ys: y_test, tf_is_training: False})
print(rloss, rdloss)
参考链接:https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-02-dropout/
三、CNN 卷积神经网络
CNN的介绍
卷积神经网络包含:输入层、隐藏层和输出层。
隐藏层又包含:卷积层和pooling层。
- 图像输入到卷积神经网络后通过卷积来不断的提取特征,每提取一个特征就会增加一个feature map,所以图中的立方体不断的增加厚度(进行一次卷积时,一般会采用多种卷积核进行卷积,一个卷积核对应一个层神经元,所以立方体会变厚);
- 因为参数太多,所以通过pooling来稀疏参数,使我们的网络不至于太复杂。pooling层进行下采样,使得立方体厚度增加了但是却越来越瘦,这就是pooling层的作用,通常采用的是最大值pooling和平均值pooling。
接下来我们使用卷积神经网络训练一个识别文字的模型。
我们可以采用如下结构:
conv1、conv2是卷积层,经过卷积后使用relu函数进行激活,然后再通过pooling层(pooling1、pooling2)来稀疏参数,最后通过两层全连接层来完成最终的处理(flat、output),其中flat将数据进行扁平化处理(即将立方体转成平面),同时采用dropout方法处理过拟合;predict使用softmax函数来进行预测。这几层神经网络规模的变化如下: -
下面是实现代码(filters是指卷积核的种类。kernel_size、pool_size是指卷积核的大小。strides是指每次卷积时卷积核的移动距离。padding是指卷积核对边界是如果处理的,'same'表明会处理边界,卷积后的神经层大小和卷积前相同,而'valid'则不会处理边界,卷积后的神经层比卷积前小):
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# source data
mnist = input_data.read_data_sets(r"E:\WorkSpace\GitRep\MachineLearning\mnist", one_hot=True)
#define inputs
tf_x = tf.placeholder(tf.float32, [None, 28*28])
tf_image = tf.reshape(tf_x, [-1, 28, 28, 1]) # [batch, x, y, channel]
tf_y = tf.placeholder(tf.float32, [None, 10])
tf_is_training = tf.placeholder(tf.bool, None) # control dropout
# define hidden layers
# [conv1 -> relu -> pooling1] -> [conv2 -> relu -> pooling2] -> flat -> output
# param is : inputs, filters, kernel_size, strides, padding
conv1 = tf.layers.conv2d(tf_image, 16, 7, 1, 'same', activation=tf.nn.relu) # -> (batch, 28, 28, 16)
# param is : inputs, pool_size, strides
pooling1 = tf.layers.max_pooling2d(conv1, 2, 2) # -> (batch, 14, 14, 16)
# param is : inputs, filters, kernel_size, strides, padding
conv2 = tf.layers.conv2d(pooling1, 32, 7, 1, 'same', activation=tf.nn.relu) # -> (batch, 14, 14, 32)
# param is : inputs, pool_size, strides
pooling2 = tf.layers.max_pooling2d(conv2, 2, 2) # -> (batch, 7, 7, 32)
# flat : (batch, 7, 7, 32) -> (batch, 7*7*32)
flat = tf.reshape(pooling2, [-1, 7*7*32]) # -> (batch, 7*7*32)
# dropout layer
dropout_flat = tf.layers.dropout(flat, rate=0.2, training=tf_is_training)
# output with softmax activation function
predict = tf.layers.dense(dropout_flat, 10, activation=tf.nn.softmax)
# define loss. param : onehot_labels, logits
loss = tf.losses.softmax_cross_entropy(tf_y, predict)
# define train operation
train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)
# define accuracy. param: labels, predictions. return: (acc, update_op) and create 2 local variables
accuracy = tf.metrics.accuracy(tf.argmax(tf_y, axis=1), tf.argmax(predict, axis=1))[1]
# define init operation
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
# session
with tf.Session() as sess:
sess.run(init_op)
# train
for step in range(1, 1001):
batch_x, batch_y = mnist.train.next_batch(100) # use SGD,50 is batch size
sess.run(train_op, feed_dict={tf_x: batch_x, tf_y: batch_y, tf_is_training:True})
rloss = sess.run(loss, feed_dict={tf_x: batch_x, tf_y: batch_y, tf_is_training:False})
if step % 50 == 0:
raccuracy = sess.run(accuracy, feed_dict={tf_x: mnist.test.images, tf_y: mnist.test.labels, tf_is_training:False})
print('step: {}, loss: {}, accuracy: {}'.format(step, rloss, raccuracy))
这是我的运行结果:
参考链接:https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-03-CNN1/
https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-04-CNN2/
https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-05-CNN3/
https://blog.csdn.net/u012641018/article/details/52238169
四、Saver保持session
使用tf.train.Saver可以保存session,这样下一次可以直接加载训练过的模型。下面是使用方法(修改自CNN的代码):
# saver
saver = tf.train.Saver()
# session
with tf.Session() as sess:
sess.run(init_op)
# train
for step in range(1, 1001):
batch_x, batch_y = mnist.train.next_batch(100) # 50 is batch size
sess.run(train_op, feed_dict={tf_x: batch_x, tf_y: batch_y, tf_is_training:True})
if step % 50 == 0:
rloss, raccuracy = sess.run([loss, accuracy], feed_dict={tf_x: mnist.test.images, tf_y: mnist.test.labels, tf_is_training:False})
print('step: {}, loss: {}, accuracy: {}'.format(step, rloss, raccuracy))
if step % 200 == 0:
# use saver to save session
save_path = saver.save(sess, './cpkts/cnn.cpkt', global_step=step)
print(save_path)
以后加载的时候需要再定义一次相同类型的变量
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
# source data
mnist = input_data.read_data_sets(r"E:\WorkSpace\GitRep\MachineLearning\mnist", one_hot=True)
#d efine inputs
tf_x = tf.placeholder(tf.float32, [None, 28*28])
tf_image = tf.reshape(tf_x, [-1, 28, 28, 1]) # [batch, x, y, channel]
tf_y = tf.placeholder(tf.float32, [None, 10])
tf_is_training = tf.placeholder(tf.bool, None)
# define hidden layers
# [conv1 -> relu -> pooling1] -> [conv2 -> relu -> pooling2] -> flat -> output
# param is : inputs, filters, kernel_size, strides, padding
conv1 = tf.layers.conv2d(tf_image, 16, 7, 1, 'same', activation=tf.nn.relu) # -> (batch, 28, 28, 16)
# param is : inputs, pool_size, strides
pooling1 = tf.layers.max_pooling2d(conv1, 2, 2) # -> (batch, 14, 14, 16)
# param is : inputs, filters, kernel_size, strides, padding
conv2 = tf.layers.conv2d(pooling1, 32, 7, 1, 'same', activation=tf.nn.relu) # -> (batch, 14, 14, 32)
# param is : inputs, pool_size, strides
pooling2 = tf.layers.max_pooling2d(conv2, 2, 2) # -> (batch, 7, 7, 32)
# flat : (batch, 7, 7, 32) -> (batch, 7*7*32)
flat = tf.reshape(pooling2, [-1, 7*7*32]) # -> (batch, 7*7*32)
# dropout layer
dropout_flat = tf.layers.dropout(flat, rate=0.2, training=tf_is_training)
# output with softmax activation function
predict = tf.layers.dense(dropout_flat, 10, activation=tf.nn.softmax)
# define loss. param : onehot_labels, logits
loss = tf.losses.softmax_cross_entropy(tf_y, predict)
# define train operation
train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)
# define accuracy. param: labels, predictions. return: (acc, update_op) and create 2 local variables
accuracy = tf.metrics.accuracy(tf.argmax(tf_y, axis=1), tf.argmax(predict, axis=1))[1]
# saver
saver = tf.train.Saver()
# restore
with tf.Session() as sess:
cpkt = tf.train.get_checkpoint_state('./cpkts')
if cpkt and cpkt.model_checkpoint_path:
saver.restore(sess, cpkt.model_checkpoint_path)
print(sess.run(loss, feed_dict={tf_x: mnist.test.images, tf_y: mnist.test.labels, tf_is_training:False}))
参考链接:https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-06-save/
https://blog.csdn.net/u011500062/article/details/51728830
五、RNN循环神经网络
介绍
RNN基本模型如下:
还有一种是 LSTM(Long Short-Term Memory 长短期记忆网络),这是RNN的改进版本。
1.使用LSTM模型对mnist手写数字图片进行分类。
下面首先载入数据:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(r"E:\WorkSpace\GitRep\MachineLearning\mnistt", one_hot=True)
定义我们会用到的一些变量,其中time_step是时间步长(可以理解为每一个时间步长接受一次输入),RNN单元的每一次,我们对图片(28*28)的每一行进行RNN分析,所以time_step是28
import tensorflow as tf
# hyper parameters
batch_size = 64
time_step = 28
input_size = 28
learning_rate = 1e-2
# data
test_x = mnist.test.images[:2000]
test_y = mnist.test.labels[:2000]
# placeholders
tf_x = tf.placeholder(tf.float32, [None, time_step * input_size])
tf_image = tf.reshape(tf_x, [-1, time_step, input_size])
tf_y = tf.placeholder(tf.float32, [None, 10])
定义LSTM神经元,其中num_units是一个时间步长中的神经元个数
# rnn
rnn_cell = tf.contrib.rnn.BasicLSTMCell(num_units=64)
outputs, (h_c, h_n) = tf.nn.dynamic_rnn(
rnn_cell, # your cell
tf_image, # input
initial_state=None, # the initial hidden state
dtype=tf.float32, # must given if set initial_state=None
time_major=False # False: (batch, time step, input) True: (time step, batch, input)
)
接下来是定义输出以及优化目标
output = tf.layers.dense(outputs[:, -1, :], 10) # output based on the last output step
# compute loss and accuracy
loss = tf.losses.softmax_cross_entropy(onehot_labels=tf_y, logits=output)
# return (accuracy, update_op), and create two local variables
accuracy = tf.metrics.accuracy(labels=tf.argmax(tf_y, axis=1), predictions=tf.argmax(output, axis=1))[1]
# train operation
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
然后开始训练
# session
with tf.Session() as sess:
sess.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())) # the local var is for accuracy
for step in range(1, 1001):
batch_x, batch_y = mnist.train.next_batch(batch_size)
sess.run(train_op, feed_dict={tf_x: batch_x, tf_y: batch_y})
if step % 50 == 0:
rloss, raccuracy = sess.run([loss, accuracy], feed_dict={tf_x: test_x, tf_y: test_y})
print('step {} | loss {} | accuracy {}'.format(step, rloss, raccuracy))
2.使用LSTM模型进行回归
我们使用LSTM创建一个sin函数去拟合cos函数。首先创建相关变量:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# hyper parameters
batch_size = 32
time_step = 20
input_size = 1
learning_rate = 1e-2
# placeholders
tf_x = tf.placeholder(tf.float32, [None, time_step, input_size])
tf_y = tf.placeholder(tf.float32, [None, time_step, input_size])
然后创建rnn和output layer。创建output layer时,需要先将outputs(shape = (batch, time_step, batch_size))转为二维(矩阵运算一般都是二维的,三维的....真的没见过),在进行转换的时候,每一个time_step的outputs对应一个time_step的tf_x(shape=(time_step, input_size))。output layer应该为每一个time_step输出一个input_size的预测,然后将其转回三维形式(predict.shape = tf_y.shape)。
# rnn
rnn_cell = tf.contrib.rnn.BasicLSTMCell(num_units=batch_size)
outputs, (h_c, h_n) = tf.nn.dynamic_rnn(
rnn_cell,
tf_x,
initial_state=None,
dtype=tf.float32,
time_major=False
)
# output layer
outputs_2d = tf.reshape(outputs, [-1, batch_size])
output = tf.layers.dense(outputs_2d, input_size)
predict = tf.reshape(output, [-1, time_step, input_size])
# loss
loss = tf.losses.mean_squared_error(labels=tf_y, predictions=predict)
# train operation
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
进行训练,同时用matplotlib将我们预测的曲线绘制处出来。
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
plt.ion() # enable interact mode
plt.ylim((-1.2, 1.2))
# session
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# train
for step in range(1, 101):
start, end = step*2*np.pi, (step+1)*2*np.pi
steps = np.linspace(start, end, time_step)
x = np.sin(steps)[np.newaxis, :, np.newaxis] # reshape to (1, time_step, 1)
y = np.sin(steps)[np.newaxis, :, np.newaxis] # reshape to (1, time_step, 1)
feed_dict={tf_x: x, tf_y: y}
_, rpredict, rloss = sess.run([train_op, predict, loss], feed_dict=feed_dict)
steps = steps - step*2*np.pi # keep steps equals (0, 2*np.pi)
if ax.lines:
# remove last y's curve and last rpredict's curve
ax.lines.pop()
ax.lines.pop()
ax.plot(steps, y.flatten(), 'r-')
ax.plot(steps, rpredict.flatten(), 'b--')
plt.pause(0.1)
print('step {} | loss {}'.format(step, rloss))
plt.ioff() # disable interact mode
plt.show()
参考链接:https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-08-RNN2/
https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-09-RNN3/
https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-10-RNN4/
六、自编码
介绍
自编码器先对输入进行压缩,然后再进行解压,可以通过比较解压后的结果与输入的相似度来确定自编码器的准确率。
我们可以使用自编码器对mnist的数据进行编码和解码来制作一个手写数字的自编码器。
首先还是载入mnist数据。
from tensorflow.examples.tutorials.mnist import input_data
# one_hot = False -> label = number
mnist = input_data.read_data_sets(r"E:\WorkSpace\GitRep\MachineLearning\mnistt", one_hot=False)
定义相关参数
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
# hyper parameters
image_size = 28*28 #
learning_rate = 2*1e-3
view_image_size = 8
# data
test_x = mnist.test.images[:100]
test_y = mnist.test.labels[:100]
# placeholder
tf_image = tf.placeholder(tf.float32, [None, image_size])
定义神经网络,我们将inputs分布压缩,784 -> 128 -> 32 -> 8 -> 4 -> 1,然后再进行解压 1 -> 4 -> 8 -> 32 -> 128 -> 784(各层的activation function可以自己进行调整)
# encoder layers
encoder_layer1 = tf.layers.dense(tf_image, 128, tf.nn.relu)
encoder_layer2 = tf.layers.dense(encoder_layer1, 32, tf.nn.tanh)
encoder_layer3 = tf.layers.dense(encoder_layer2, 8, tf.nn.tanh)
encoder_layer4 = tf.layers.dense(encoder_layer3, 4, tf.nn.tanh)
encodered_layer = tf.layers.dense(encoder_layer4, 1)
# decoder layers
decoder_layer1 = tf.layers.dense(encodered_layer, 4)
decoder_layer2 = tf.layers.dense(decoder_layer1, 8, tf.nn.tanh)
decoder_layer3 = tf.layers.dense(decoder_layer2, 32, tf.nn.tanh)
decoder_layer4 = tf.layers.dense(decoder_layer3, 128, tf.nn.tanh)
decodered_layer = tf.layers.dense(decoder_layer4, image_size, tf.sigmoid)
# loss
loss = tf.losses.mean_squared_error(labels=tf_image, predictions=decodered_layer)
# train operation
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
在训练之前,我们先输出一部分图片(view_data)以便观察自编码器的效果
# show some image
fig, ax = plt.subplots(2, view_image_size)
view_data = mnist.test.images[:view_image_size]
view_data_label = mnist.test.labels[:view_image_size]
for i in range(view_image_size):
ax[0][i].imshow(np.reshape(view_data[i], (28, 28)), cmap='gray', label=view_data_label[i])
ax[0][i].set_xticks(())
ax[0][i].set_yticks(())
plt.ion()
由于我们将输入的维度压缩到了 1 再进行解压,所以需要较多的训练来保证我们的自编码器具有较好的效果,所以将训练次数设置为8000。每200步输出一次loss,同时将当前自编码器对view_data的编码解码结果显示出来。
# session
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# train
print('start training')
for step in range(1, 8001):
batch_x, batch_y = mnist.train.next_batch(200)
sess.run(train_op, feed_dict={tf_image: batch_x})
if step % 200 == 0:
rloss = sess.run(loss, feed_dict={tf_image: test_x})
print('step {} | loss {}'.format(step, rloss))
decodered_data = sess.run(decodered_layer, feed_dict={tf_image: view_data})
for i in range(view_image_size):
ax[1][i].clear()
ax[1][i].imshow(np.reshape(decodered_data[i], (28, 28)), cmap='gray', label=view_data_label[i])
ax[1][i].set_xticks(())
ax[1][i].set_yticks(())
plt.pause(0.05)
if step == 8000:
encodered_data = sess.run(encodered_layer, feed_dict={tf_image: view_data})
for i in range(view_image_size):
print('label {} | encodered {}'.format(view_data_label[i], encodered_data[i]))
plt.ioff()
plt.show()
参考链接:https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-11-autoencoder/
七、scope命名方法
tensorflow有两种scope:name_scope
和variable_scope
,name_scope
是针对于name,而variable_scope
是针对于variable的。
我们可以使用tensorflow生成变量来测试这两种scope。tensorflow有两种生成变量的方式:tf.get_variable()
和tf.Variable()
,前者是提取同名变量,而后者是生成新的变量。测试代码如下
import tensorflow as tf
with tf.name_scope('name_scope') as ns:
var1 = tf.get_variable(name='var1', shape=[1], dtype=tf.float32)
var2 = tf.Variable(name='var2', initial_value=[1], dtype=tf.float32)
with tf.variable_scope('varivable_scope') as vs:
var3 = tf.get_variable(name='var3', shape=[1], dtype=tf.float32)
var4 = tf.Variable(name='var4', initial_value=[1], dtype=tf.float32)
print(var1.name) # output >> var1:0
print(var2.name) # output >> name_scope/var2:0
print(var3.name) # output >> varivable_scope/var3:0
print(var4.name) # output >> varivable_scope/var4:0
参考链接:https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-12-scope/
八、批标准化
在神经网络中,数据的分布会对训练产生影响。就拿tanh激活函数来说,如果数据都大于1,那么tanh的结果都是1,进入了tanh的饱和区域,此时1和100的区别都不大。这相当于轻轻拍自己和重重打自己没什么区别一样,神经网络变得迟钝了。这个时候就需要标准化来将数据的值映射到-1到1的范围。在神经网络中进行标准化一般使用批标准化技术。由于未标准化的数据主要影响的是激活函数的结果,所以一般在激活函数之前进行批标准化。
神经网络的批标准化中,先按照均值和方差对数据进行标准化,然后再进行一次反标准化。反标准化中的两个参数和用来控制标准化的程度,神经网络在训练的时候会调整这两个参数以获得更好的效果。
下面我们来比较一下标准化和非标准化训练的差异。首先创建一个类来创建神经网络。
class neural_network:
def __init__(self, tf_x, tf_y, tf_is_training, learning_rate=0.1, with_normalization=False):
self.input_layers = [tf_x]
self.output_layer = None
self.real_output = tf_y
self.tf_is_training = tf_is_training
self.learning_rate = learning_rate
self.with_normalization = with_normalization
self.w_init = tf.random_normal_initializer(0., 1.)
self.b_init = tf.constant_initializer(-0.2)
def add_layer(self, input, out_size, ac=None):
h_layer = tf.layers.dense(input, out_size, kernel_initializer=self.w_init, bias_initializer=self.b_init)
if ac:
if self.with_normalization:
layer_out = tf.layers.batch_normalization(h_layer, momentum=0.4, training=self.tf_is_training)
return ac(layer_out)
else:
return ac(h_layer)
return h_layer
def init_layers(self, out_size=10, ac=None, layer_num=1):
# set input layers
for i in range(layer_num):
h_layer = self.add_layer(self.input_layers[-1], out_size, ac)
self.input_layers.append(h_layer)
# set output layer
self.output_layer = tf.layers.dense(self.input_layers[-1], self.real_output.shape[1], kernel_initializer=self.w_init, bias_initializer=self.b_init)
self.loss = tf.losses.mean_squared_error(self.real_output, self.output_layer)
# moving mean and moving variance need to be updated
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)
然后创建一个不使用批标准化的神经网络和使用批标准化的神经网络,在进行训练后分别显示预测结果。
# hyper parameters
n_samples = 1000
n_test_samples = 300
learning_rate = 0.03
batch_size = 100
n_layers = 8
n_epoch = 20
# train data
xs = np.linspace(-7, 10, n_samples, dtype=np.float32)[:, np.newaxis]
# np.random.shuffle(xs)
noise = np.random.normal(0, 2, xs.shape)
ys = np.square(xs) - 5 + noise
train_data = np.hstack((xs, ys))
# test data
test_xs = np.linspace(-7, 10, n_samples, dtype=np.float32)[:, np.newaxis]
noise = np.random.normal(0, 2, xs.shape)
test_ys = np.square(xs) - 5 + noise
# placeholder
tf_x = tf.placeholder(tf.float32, [None, 1])
tf_y = tf.placeholder(tf.float32, [None, 1])
tf_is_training = tf.placeholder(tf.bool, None)
# nets
nets = [neural_network(tf_x, tf_y, tf_is_training, learning_rate), neural_network(tf_x, tf_y, tf_is_training, learning_rate, True)]
for net in nets:
net.init_layers(10, tf.nn.tanh, n_layers)
# session
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(n_epoch):
print('epoch : ', epoch)
np.random.shuffle(train_data)
for step in range((len(train_data)+batch_size-1)//batch_size):
bi = step*batch_size
ei = min((step+1)*batch_size, len(train_data))
b_x, b_y = train_data[bi:ei, 0:1], train_data[bi:ei, 1:2]
sess.run([nets[0].train_op, nets[1].train_op], feed_dict={tf_x: b_x, tf_y: b_y, tf_is_training: True})
# plot prediction line
pred, pred_bn = sess.run([nets[0].output_layer, nets[1].output_layer], feed_dict={tf_x: test_xs, tf_y: test_ys, tf_is_training: False})
plt.figure(2)
plt.plot(test_xs, pred, c='#FF9359', lw=4, label='original')
plt.plot(test_xs, pred_bn, c='#74BCFF', lw=4, label='batch normalization')
plt.scatter(test_xs, test_ys, c='r', s=50, alpha=0.2, label='goal')
plt.legend()
plt.show()
参考链接:https://morvanzhou.github.io/tutorials/machine-learning/tensorflow/5-13-BN/
九、迁移学习
迁移学习在已经训练好的神经网络上修改后边的几层(主要是修改输出层,前面的神经层可以认为是提取输入的特征,我们只需要创建一个新的的神经层,根据提取好的特征来输出我们想要的结果就可以了),然后再进行一定的训练使神经网络能够输出我们想要的东西。(迁移学习的详细介绍)
我们从imagnet上下载猫和老虎的图片(百度云下载)来预测体长。对应machrisaa 改写的VGG16 的代码和他提供的 VGG16 train 好了的 model parameters(建议使用百度云下载)来完成我们的迁移学习。
首先,我们改写VGG16的代码(我将原本的输出层prob改成了我的输出层,然后添加了一个用来训练的函数。其中,体长数据是我伪造的,猫的体长大多在3248cm,而老虎的体长大多在70130cm)
class Vgg16:
def __init__(self, vgg16_npy_path=None):
if vgg16_npy_path is None:
path = inspect.getfile(Vgg16)
path = os.path.abspath(os.path.join(path, os.pardir))
path = os.path.join(path, "vgg16.npy")
vgg16_npy_path = path
print(path)
self.data_dict = np.load(vgg16_npy_path, encoding='latin1').item()
print("npy file loaded")
def build(self, rgb, tf_y):
"""
load variable from npy to build the VGG
:param rgb: rgb image [batch, height, width, 3] values scaled [0, 1]
"""
start_time = time.time()
print("build model started")
rgb_scaled = rgb * 255.0
# Convert RGB to BGR
red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=rgb_scaled)
assert red.get_shape().as_list()[1:] == [224, 224, 1]
assert green.get_shape().as_list()[1:] == [224, 224, 1]
assert blue.get_shape().as_list()[1:] == [224, 224, 1]
bgr = tf.concat(axis=3, values=[
blue - VGG_MEAN[0],
green - VGG_MEAN[1],
red - VGG_MEAN[2],
])
assert bgr.get_shape().as_list()[1:] == [224, 224, 3]
self.conv1_1 = self.conv_layer(bgr, "conv1_1")
self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
self.pool1 = self.max_pool(self.conv1_2, 'pool1')
self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
self.pool2 = self.max_pool(self.conv2_2, 'pool2')
self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
self.pool3 = self.max_pool(self.conv3_3, 'pool3')
self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
self.pool4 = self.max_pool(self.conv4_3, 'pool4')
self.conv5_1 = self.conv_layer(self.pool4, "conv5_1")
self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2")
self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3")
self.pool5 = self.max_pool(self.conv5_3, 'pool5')
self.fc6 = self.fc_layer(self.pool5, "fc6")
assert self.fc6.get_shape().as_list()[1:] == [4096]
self.relu6 = tf.nn.relu(self.fc6)
self.fc7 = self.fc_layer(self.relu6, "fc7")
self.relu7 = tf.nn.relu(self.fc7)
self.fc8 = self.fc_layer(self.relu7, "fc8")
#self.prob = tf.nn.softmax(self.fc8, name="prob")
# my predict
self.mfc = tf.layers.dense(self.fc8, 256, tf.nn.relu)
self.predict = tf.layers.dense(self.fc8, 1)
# loss
self.loss = tf.losses.mean_squared_error(tf_y, self.predict)
# train operation
self.train_op = tf.train.AdamOptimizer(0.1).minimize(self.loss)
self.data_dict = None
print(("build model finished: %ds" % (time.time() - start_time)))
def train(self, n_step=200, batch_size=9):
xs0, xs1, ys0, ys1 = load_data(400)
xs = np.concatenate((xs0, xs1), axis=0)
ys = np.concatenate((ys0, ys1), axis=0)
indecies = list(range(len(xs)))
shape_x = xs[0].shape
shape_y = ys[0].shape
# placeholder
tf_x = tf.placeholder(np.float32, [None, *shape_x])
tf_y = tf.placeholder(np.float32, [None, *shape_y])
self.build(tf_x, tf_y)
fig, axs = plt.subplots(1, 3)
plt.ion()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
batch_indecies = np.random.choice(indecies, batch_size)
b_x, b_y = [xs[i] for i in batch_indecies], [ys[i] for i in batch_indecies]
ploss = sess.run(self.loss, feed_dict={tf_x: b_x, tf_y: b_y})
axs[0].set_xlim((0, n_step))
axs[0].set_ylabel('loss')
for step in range(n_step):
batch_indecies = np.random.choice(indecies, batch_size)
b_x, b_y = [xs[i] for i in batch_indecies], [ys[i] for i in batch_indecies]
_, rloss = sess.run([self.train_op, self.loss], feed_dict={tf_x: b_x, tf_y: b_y})
print('step {: 4d} | loss {}'.format(step, rloss))
axs[0].plot([step, step+1], [ploss, rloss])
plt.pause(0.05)
ploss = rloss
img_kittycat = load_img(
'E:\\WorkSpace\\src\\transfer learning\\transfer learning image data\\data\\kittycat\\000129037.jpg'
)
img_tiger = load_img(
'E:\\WorkSpace\\src\\transfer learning\\transfer learning image data\\data\\tiger\\391412.jpg'
)
pred = sess.run(self.predict, feed_dict={tf_x: [img_kittycat, img_tiger]})
print('train end', '-'*50, '\n')
axs[1].set_title('{}cm'.format(pred[0].ravel(0)))
axs[1].imshow(img_kittycat)
axs[2].set_title('{}cm'.format(pred[1].ravel(0)))
axs[2].imshow(img_tiger)
plt.ioff()
plt.savefig('practice4.png')
plt.show()
def avg_pool(self, bottom, name):
return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
def max_pool(self, bottom, name):
return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
def conv_layer(self, bottom, name):
with tf.variable_scope(name):
filt = self.get_conv_filter(name)
conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')
conv_biases = self.get_bias(name)
bias = tf.nn.bias_add(conv, conv_biases)
relu = tf.nn.relu(bias)
return relu
def fc_layer(self, bottom, name):
with tf.variable_scope(name):
shape = bottom.get_shape().as_list()
dim = 1
for d in shape[1:]:
dim *= d
x = tf.reshape(bottom, [-1, dim])
weights = self.get_fc_weight(name)
biases = self.get_bias(name)
# Fully connected layer. Note that the '+' operation automatically
# broadcasts the biases.
fc = tf.nn.bias_add(tf.matmul(x, weights), biases)
return fc
def get_conv_filter(self, name):
return tf.constant(self.data_dict[name][0], name="filter")
def get_bias(self, name):
return tf.constant(self.data_dict[name][1], name="biases")
def get_fc_weight(self, name):
return tf.constant(self.data_dict[name][0], name="weights")
然后我们需要加载图片(需要安装scikit-image库)
def load_img(path):
img = skimage.io.imread(path)
img = img / 255.0
# print(img.shape) -> (113, 150, 3)
# crop image from center
short_edge = min(img.shape[:2])
lx = (img.shape[0] - short_edge) // 2
ly = (img.shape[1] - short_edge) // 2
crop_img = img[lx:lx+short_edge, ly:ly+short_edge]
# resize to 224, 224, 3
resized_img = skimage.transform.resize(crop_img, (224, 224))
return resized_img
def load_data(num=200):
imgs = {'kittycat': [], 'tiger': []}
base_dir = 'E:\\WorkSpace\\src\\transfer learning\\transfer learning image data\\data'
for k in imgs.keys():
path = os.path.join(base_dir, k)
for file in os.listdir(path):
if file.lower().endswith('.jpg'):
try:
resized_img = load_img(os.path.join(path, file))
imgs[k].append(resized_img)
if len(imgs[k]) == num:
break
except OSError:
pass
# fake length data for kittycat and tiger
kittycat_y = np.random.randn(len(imgs['kittycat']), 1) * 8 + 40
tiger_y = np.random.randn(len(imgs['tiger']), 1) * 30 + 100
return imgs['kittycat'], imgs['tiger'], kittycat_y, tiger_y
做好上面的几步后,就可以开始训练了
if __name__ == '__main__':
vgg16 = Vgg16('E:\\WorkSpace\\src\\transfer learning\\vgg16.npy')
vgg16.train(400)