学习自中国大学MOOC TensorFlow学习课程
一、模拟神经网络输入数据的生成(方法1)
try:
# %tensorflow_version only exists in Colab.
%tensorflow_version 2.x
except Exception:
pass
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
print(tf.__version__)
# 生成序列数据
dataset = tf.data.Dataset.range(10)
# 获得窗口数据,窗口大小为5 & 去掉不完整的数据
dataset = dataset.window(5, shift=1, drop_remainder=True)
# 转为numpy列表
dataset = dataset.flat_map(lambda window: window.batch(5))
# 拆分序列和标签
dataset = dataset.map(lambda window: (window[:-1], window[-1:]))
# 打散数据 缓冲序列数为10
dataset = dataset.shuffle(buffer_size=10)
for x,y in dataset:
print(x.numpy(), y.numpy())
[4 5 6 7] [8]
[0 1 2 3] [4]
[1 2 3 4] [5]
[5 6 7 8] [9]
[3 4 5 6] [7]
[2 3 4 5] [6]
设置数据批量
两个数据为一批,作为循环神经网络的训练样本
dataset = dataset.batch(2).prefetch(1)
for x, y in dataset:
print("x = ", x.numpy())
print("y = ", y.numpy())
x = [[0 1 2 3]
[5 6 7 8]]
y = [[4]
[9]]
x = [[1 2 3 4]
[4 5 6 7]]
y = [[5]
[8]]
x = [[3 4 5 6]
[2 3 4 5]]
y = [[7]
[6]]
模拟神经网络输入数据的生成(方法2)
# 模拟生成时间序列
# 模拟生成数据集
# 使用神经网络预测时间序列
try:
# %tensorflow_version only exists in Colab.
%tensorflow_version 2.x
except Exception:
pass
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
print(tf.__version__)
2.4.0
模拟生成时间序列
# 1. 模拟生成时间序列
def plot_series(time, series, format="-", start=0, end=None):
plt.plot(time[start:end], series[start:end], format)
plt.xlabel("Time")
plt.ylabel("Value")
plt.grid(True)
def trend(time, slope=0):
return slope * time
def seasonal_pattern(season_time):
"""Just an arbitrary pattern, you can change it if you wish"""
return np.where(season_time < 0.4,
np.cos(season_time * 2 * np.pi),
1 / np.exp(3 * season_time))
def seasonality(time, period, amplitude=1, phase=0):
"""Repeats the same pattern at each period"""
season_time = ((time + phase) % period) / period
return amplitude * seasonal_pattern(season_time)
def noise(time, noise_level=1, seed=None):
rnd = np.random.RandomState(seed)
return rnd.randn(len(time)) * noise_level
time = np.arange(4 * 365 + 1, dtype="float32")
baseline = 10
series = trend(time, 0.1)
baseline = 10
amplitude = 40
slope = 0.05
noise_level = 5
# Create the series
#这里的时间序列 = 基线序列 + 直线趋势序列 + 周期性趋势序列 + 呈正态分布的误差
series = baseline + trend(time, slope) + seasonality(time, period=365, amplitude=amplitude)
# Update with noise
series += noise(time, noise_level, seed=42)
#拆分训练集和测试机
split_time = 1000
time_train = time[:split_time]
x_train = series[:split_time]
time_valid = time[split_time:]
x_valid = series[split_time:]
window_size = 20 #窗口大小20,序列里20个时间点构成窗口
batch_size = 32 #循环神经网络采用小批量处理方法的批量
shuffle_buffer_size = 1000 #随机化打乱样本的,用来存储样本的缓冲区的大小
# 在前面时间序列上取样生成样本
# 通过加窗的方法,模拟生成数据集
#参数说明:序列数据,窗口大小,批次大小,随机缓存大小
def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
#对series做切分。把给定的元组、列表和张量等数据进行特征切片。切片的范围是从最外层维度开始的。
#如果有多个特征进行组合,那么一次切片是把每个组合的最外维度的数据切开,分成一组一组的。
dataset = tf.data.Dataset.from_tensor_slices(series)
dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True) #加窗
dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))
dataset = dataset.shuffle(shuffle_buffer).map(lambda window: (window[:-1], window[-1]))
dataset = dataset.batch(batch_size).prefetch(1)
return dataset
.from_tensor_slices方法:https://blog.csdn.net/lizz2276/article/details/107380635
二、SimpleRNN神经网络
2.1 定义SimpleRNN神经网络
# 搭建SimpleRNN神经网络, 使用LR_scheduler机制调整学习率
tf.keras.backend.clear_session() #销毁当前的TF图并创建一个新图, 避免旧模型/图层混乱。
tf.random.set_seed(51)
np.random.seed(51)
#128个样本作为一个批次组
train_set = windowed_dataset(x_train, window_size, batch_size=128, shuffle_buffer=shuffle_buffer_size)
model = tf.keras.models.Sequential([
#匿名函数解析层对输入输出数据进行解析,搭建2个SimpleRNN层
tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1), #最后一个维度上多增加一维,沿轴方向进行扩展,使它变成一个向量
input_shape=[None]),
tf.keras.layers.SimpleRNN(40, return_sequences=True), #隐藏神经元40个 (时刻1)
tf.keras.layers.SimpleRNN(40), #(时刻2,隐层直接输出到输出层)
tf.keras.layers.Dense(1),
tf.keras.layers.Lambda(lambda x: x * 100.0)
])
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lambda (Lambda) (None, None, 1) 0
_________________________________________________________________
simple_rnn (SimpleRNN) (None, None, 40) 1680
_________________________________________________________________
simple_rnn_1 (SimpleRNN) (None, 40) 3240
_________________________________________________________________
dense (Dense) (None, 1) 41
_________________________________________________________________
lambda_1 (Lambda) (None, 1) 0
=================================================================
Total params: 4,961
Trainable params: 4,961
Non-trainable params: 0
_________________________________________________________________
2.2 定义模型参数
学习率是随着epochs,依照公式( 1e-8 * 10**(epoch / 20)
),也就是步长,动态变化的。
步长由函数LearningRateScheduler
进行调度
#学习率
lr_schedule = tf.keras.callbacks.LearningRateScheduler(
lambda epoch: 1e-8 * 10**(epoch / 20))
#优化器 随机梯度下降法SGD
#初始学习率为1e-8;为了防止局部极小,这里使用了冲量向量 momentum
#冲量因子常数设置为0.9
optimizer = tf.keras.optimizers.SGD(lr=1e-8, momentum=0.9)
梯度下降法资料:
- https://blog.csdn.net/hanjiangxue_wei/article/details/86712241
- https://www.cnblogs.com/bonelee/p/8392370.html
- https://blog.csdn.net/u012328159/article/details/80311892
- https://blog.csdn.net/u012328159/article/details/80311892
#模型编译
###损失函数是Huber,函数像开口是U型的
###度量指标是mae——平均绝对误差
model.compile(loss=tf.keras.losses.Huber(),
optimizer=optimizer,
metrics=["mae"])
常用损失函数比较:
- https://blog.csdn.net/nefetaria/article/details/111238515
- https://www.jianshu.com/p/b715888f079b
- https://www.cnblogs.com/wangguchangqing/p/12054772.html
- https://www.zhihu.com/question/21018545?sort=created
- https://baijiahao.baidu.com/s?id=1603857666277651546&wfr=spider&for=pc
- https://zhuanlan.zhihu.com/p/91511706?utm_source=wechat_session
2.3 SimpleRNN神经网络模型训练
history = model.fit(train_set, epochs=100, callbacks=[lr_schedule])
...
Epoch 99/100
8/8 [==============================] - 1s 108ms/step - loss: 112.0148 - mae: 112.5148
Epoch 100/100
8/8 [==============================] - 1s 108ms/step - loss: 118.8196 - mae: 119.3196
误差和mae都是逐步减少;学习了也动态变化(奇怪的是,我这里看不了学习率)
#查看误差曲线
##横轴是学习率的自然对数;纵轴是损失
plt.semilogx(history.history["lr"], history.history["loss"])
plt.axis([1e-8, 1e-4, 0, 30])
(1e-08, 0.0001, 0.0, 30.0)
可见,随着学习率随着epochs增加而增加,loss先降后升
是因为步长太大,引起震荡。
学习率太小训练速度太慢
2.4 定义学习率改进RNN网络
对学习率进行固定
# 搭建SimpleRNN神经网络, 使用LR_scheduler机制调整学习率
tf.keras.backend.clear_session() #销毁当前的TF图并创建一个新图, 避免旧模型/图层混乱。
tf.random.set_seed(51)
np.random.seed(51)
#128个样本作为一个批次组
train_set = windowed_dataset(x_train, window_size, batch_size=128, shuffle_buffer=shuffle_buffer_size)
model = tf.keras.models.Sequential([
#匿名函数解析层对输入输出数据进行解析,搭建2个SimpleRNN层
tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1), #最后一个维度上多增加一维,沿轴方向进行扩展,使它变成一个向量
input_shape=[None]),
tf.keras.layers.SimpleRNN(40, return_sequences=True), #隐藏神经元40个 (时刻1)
tf.keras.layers.SimpleRNN(40), #(时刻2,隐层直接输出到输出层)
tf.keras.layers.Dense(1),
tf.keras.layers.Lambda(lambda x: x * 100.0)
])
#优化器 设置一个比较小的固定的学习率5e-5,其他参数不变
optimizer = tf.keras.optimizers.SGD(lr=5e-5, momentum=0.9)
#模型编译
model.compile(loss=tf.keras.losses.Huber(),
optimizer=optimizer,
metrics=["mae"])
#模型训练
history = model.fit(train_set, epochs=100)
...
8/8 [==============================] - 1s 128ms/step - loss: 6.2125 - mae: 6.6949
Epoch 100/100
8/8 [==============================] - 1s 113ms/step - loss: 4.6292 - mae: 5.1087
2.5 使用RNN网络模型预测时间序列
使用拟合了训练集的RNN网络模型对time=1000以后的时间序列进行预测
蓝色的序列是校验集的时间序列,橙色的序列是使用模型预测时间序列的预测结果
forcast = []
for time in range(len(series) - window_size):
forcast.append(model.predict(series[time:time + window_size][np.newaxis]))
forcast = forcast[split_time-window_size:]
results = np.array(forcast)[:, 0, 0]
plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid)
plot_series(time_valid, results)
这误差还是比较大的,因为我们只用了两个时刻,循环神经网络只用两个时刻还是太短了,所以误差大
而且训练轮数也太少(才100轮)
2.6 结果对比
计算误差loss和平均绝对误差MAE
tf.keras.metrics.mean_absolute_error(x_valid, results).numpy()
6.976744
从训练集上获取结果列表
将训练用一张图看看拟合水平
再缩放一下
# 从训练集上获取结果列表
#将训练用一张图看看拟合水平
#再缩放一下
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
#--------------------------------------------------
# Retrieve a list results on training and test data
# sets for each training epoch
#--------------------------------------------------
mae = history.history['mae']
loss = history.history['loss']
epochs = range(len(loss)) #get number of epochs
#-------------------------------------------------
# Plot MAE and Loss
#-------------------------------------------------
plt.plot(epochs, mae, 'r')
plt.plot(epochs, loss, 'b')
plt.title('MAE and Loss')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend(["MAE", "Loss"])
plt.figure()
epochs_zoom = epochs[20:]
mae_zoom = mae[20:]
loss_zoom = loss[20:]
#------------------------------------------------
# Plot Zoomed MAE and Loss
#------------------------------------------------
plt.plot(epochs_zoom, mae_zoom, 'r')
plt.plot(epochs_zoom, loss_zoom, 'b')
plt.title('MAE and Loss')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend(["MAE", "Loss"])
plt.figure()
#资源释放
import os, signal
os.kill(os.getpid(), signal.SIGINT)
三、双向LSTM模型时间序列预测
- 模拟生成时间序列
- 模拟生成数据集
- 搭建两个LSTM神经网络,一个使用LR_scheduler机制调整学习率,另一个不做处理
- 结果对比,计算误差loss和平均绝对误差MAE
理论上LSTM可以处理更长的时序数据,所以对于像股票预测之类的时间序列预测,LSTM效果会比RNN要好
# !pip install tf-nightly-gpu-2.0-preview
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
print(tf.__version__)
2.4.0
3.1 模拟时间序列生成
# 模拟生成时间序列
def plot_series(time, series, format="-", start=0, end=None):
plt.plot(time[start:end], series[start:end], format)
plt.xlabel("Time")
plt.ylabel("Value")
plt.grid(True)
def trend(time, slope=0):
return slope * time
def seasonal_pattern(season_time):
"""Just an arbitrary pattern, you can change it if you wish"""
return np.where(season_time < 0.4,
np.cos(season_time * 2 * np.pi),
1 / np.exp(3 * season_time))
def seasonality(time, period, amplitude=1, phase=0):
"""Repeats the same pattern at each period"""
season_time = ((time + phase) % period) / period
return amplitude * seasonal_pattern(season_time)
def noise(time, noise_level=1, seed=None):
rnd = np.random.RandomState(seed)
return rnd.randn(len(time)) * noise_level
time = np.arange(4 * 365 + 1, dtype="float32")
baseline = 10
series = trend(time, 0.1)
baseline = 10
amplitude = 40
slope = 0.05
noise_level = 5
# Create the series
series = baseline + trend(time, slope) + seasonality(time, period=365, amplitude=amplitude)
# Update with noise
series += noise(time, noise_level, seed=42)
split_time = 1000
time_train = time[:split_time]
x_train = series[:split_time]
time_valid = time[split_time:]
x_valid = series[split_time:]
window_size = 20
batch_size = 32
shuffle_buffer_size = 1000
# 模拟生成数据集
def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
dataset = tf.data.Dataset.from_tensor_slices(series)
dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))
dataset = dataset.shuffle(shuffle_buffer).map(lambda window: (window[:-1], window[-1]))
dataset = dataset.batch(batch_size).prefetch(1)
return dataset
3.2 定义双向LSTM模型
双向的LSTM,也就是它的隐层至少有2层:
- 一层:从左往右的时序顺序
- 另一层:从右往左,按照反向序列的方式工作
- 长短记忆机制
第一个时刻隐层的输入,除了输出到输出层外,还要向下一个时刻(第二层)做输出
# 搭建双向LSTM神经网络
tf.keras.backend.clear_session()
tf.random.set_seed(51)
np.random.seed(51)
tf.keras.backend.clear_session()
dataset = windowed_dataset(x_train, window_size, batch_size, shuffle_buffer_size)
model = tf.keras.models.Sequential([
tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1), #输入就是把样本向量化
input_shape=[None]),
#把单向RNN换成双向LSTM,同样双向的BiLSTM也是两个时刻
#第一个时刻隐层的输入,除了输出到输出层外,还要向下一个时刻(第二层)做输出
#双向一般都是至少有两个隐层
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True)),
#直接输出到输出层
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
#因为只要对一个时间点的结果做预测,所以输出层的神经元数目是1
tf.keras.layers.Dense(1),
tf.keras.layers.Lambda(lambda x: x * 100.0)
])
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lambda (Lambda) (None, None, 1) 0
_________________________________________________________________
bidirectional (Bidirectional (None, None, 64) 8704
_________________________________________________________________
bidirectional_1 (Bidirection (None, 64) 24832
_________________________________________________________________
dense (Dense) (None, 1) 65
_________________________________________________________________
lambda_1 (Lambda) (None, 1) 0
=================================================================
Total params: 33,601
Trainable params: 33,601
Non-trainable params: 0
_________________________________________________________________
3.3 使用LR_scheduler动态步长机制调整学习率
lr_schedule = tf.keras.callbacks.LearningRateScheduler(
lambda epoch: 1e-8 * 10**(epoch / 20)) #初始步长是1e-8,随着迭代进行,步长按公式逐步增大
#随机梯度下降法:初始学习率1e-8,冲量因子0.9
optimizer = tf.keras.optimizers.SGD(lr=1e-8, momentum=0.9)
3.4 模型编译
#Huber作为损失函数 mae绝对平均误差作为性能指标
model.compile(loss=tf.keras.losses.Huber(),
optimizer=optimizer,
metrics=["mae"])
3.5 模型训练
history = model.fit(dataset, epochs=100, callbacks=[lr_schedule])
...
31/31 [==============================] - 1s 20ms/step - loss: 10.9023 - mae: 11.3891
Epoch 100/100
31/31 [==============================] - 1s 20ms/step - loss: 12.6077 - mae: 13.0998
经过第一次模型训练后,可以通过查看误差曲线来寻找最佳学习率。
相对于普通RNN的误差曲线,可以明显看到双向LSTM的误差曲线要平滑许多,误差曲线更平稳。用双向LSTM稳定性要好一些
plt.semilogx(history.history["lr"], history.history["loss"])
plt.axis([1e-8, 1e-4, 0, 30])
(1e-08, 0.0001, 0.0, 30.0)
参照误差曲线,将学习率固定到1e-5
#参照误差曲线,将学习率固定到1e-5
# 搭建双向LSTM神经网络,对学习率不作处理
tf.keras.backend.clear_session()
tf.random.set_seed(51)
np.random.seed(51)
tf.keras.backend.clear_session()
dataset = windowed_dataset(x_train, window_size, batch_size, shuffle_buffer_size)
model = tf.keras.models.Sequential([
tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1),
input_shape=[None]),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
tf.keras.layers.Dense(1),
tf.keras.layers.Lambda(lambda x: x * 100.0)
])
#固定学习率
model.compile(loss="mse", optimizer=tf.keras.optimizers.SGD(lr=1e-5, momentum=0.9),metrics=["mae"])
#模型训练,但不输出日志信息
history = model.fit(dataset,epochs=100,verbose=0)
3.6 用校验样本做预测
forecast = []
results = []
for time in range(len(series) - window_size):
forecast.append(model.predict(series[time:time + window_size][np.newaxis]))
forecast = forecast[split_time-window_size:]
results = np.array(forecast)[:, 0, 0]
plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid)
plot_series(time_valid, results)
这个拟合曲线明显要比普通RNN曲线要好,这是因为双向相当于把两个方向的时间序列特征综合起来了,所以一般会比只有单个方向预测的准确性要好
3.7 结果对比
计算误差loss和平均绝对误差MAE
tf.keras.metrics.mean_absolute_error(x_valid, results).numpy()
7.598343
我们训练的模型和实际时间序列存在一定的误差,我们通过绝对平均误差MAE对训练结果进行评估
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
#-----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
#-----------------------------------------------------------
mae=history.history['mae']
loss=history.history['loss']
epochs=range(len(loss)) # Get number of epochs
#------------------------------------------------
# Plot MAE and Loss
#------------------------------------------------
plt.plot(epochs, mae, 'r')
plt.plot(epochs, loss, 'b')
plt.title('MAE and Loss')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend(["MAE", "Loss"])
plt.figure()
epochs_zoom = epochs[20:]
mae_zoom = mae[20:]
loss_zoom = loss[20:]
#------------------------------------------------
# Plot Zoomed MAE and Loss
#------------------------------------------------
plt.plot(epochs_zoom, mae_zoom, 'r')
plt.plot(epochs_zoom, loss_zoom, 'b')
plt.title('MAE and Loss')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend(["MAE", "Loss"])
plt.figure()
3.8 调整不同的学习率和神经网络层数来训练(优化上述模型)
调整学习率
tf.keras.backend.clear_session()
dataset = windowed_dataset(x_train, window_size, batch_size, shuffle_buffer_size)
model = tf.keras.models.Sequential([
tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1),
input_shape=[None]),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
tf.keras.layers.Dense(1),
tf.keras.layers.Lambda(lambda x: x * 100.0)
])
model.compile(loss="mse", optimizer=tf.keras.optimizers.SGD(lr=1e-6, momentum=0.9))
model.fit(dataset,epochs=100, verbose=1)
...
Epoch 99/100
31/31 [==============================] - 1s 21ms/step - loss: 52.5941
Epoch 100/100
31/31 [==============================] - 1s 19ms/step - loss: 43.1076
增加一个时刻(增加一个双向LSTM层)
tf.keras.backend.clear_session()
dataset = windowed_dataset(x_train, window_size, batch_size, shuffle_buffer_size)
model = tf.keras.models.Sequential([
tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1),
input_shape=[None]),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True)),
tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
tf.keras.layers.Dense(1),
tf.keras.layers.Lambda(lambda x: x * 100.0)
])
model.compile(loss="mse", optimizer=tf.keras.optimizers.SGD(lr=1e-6, momentum=0.9))
model.fit(dataset,epochs=100)
...
31/31 [==============================] - 1s 29ms/step - loss: 43.6789
Epoch 100/100
31/31 [==============================] - 1s 30ms/step - loss: 42.8390
通过比较损失值loss可以发现,增加一层LSTM层神经网络反而增加了误差,所以选取合适的网络层数至关重要
这是因为对于时序数据而言,增加层次很可能会导致一些信息的丢失。所以深度学习的层次不能太多,太多反而效果不好
时刻取多长需要通过实验确定,这是一个超参
# 资源释放
import os, signal
os.kill(os.getpid(), signal.SIGINT)