本文使用耶拿天气数据集,对温度进行预测。由于数据量过大,笔者只使用前100天的温度数据作为训练即共14400个温度数据,预测后10天的温度即共1440个温度数据。主要思想与LSTM进行时间序列预测(一)相同,便不做过多分析,完整代码如下:
import csv
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
def temp_dataset():
with open('jena_climate_2009_2016.csv',encoding = 'utf-8-sig') as f:
reader = csv.reader(f)
dataset = []
for item in reader:
try:
dataset.append(float(item[2]))
except:
pass
return np.array(dataset).reshape(-1,1)
def sc_fit_transform(nDlist):
#将所有数据归一化为0-1的范围
sc = MinMaxScaler(feature_range=(0, 1))
dataset_transform = sc.fit_transform(X=nDlist)
#归一化后的数据
return sc, np.array(dataset_transform)
###############################################################################
#需要之前144次的数据来预测下一次的数据,
timestep = 144
#训练数据的大小
training_num = 14400
#迭代训练10次
epoch = 10
#每次取数据数量
batch_size = 200
###############################################################################
listDataset = temp_dataset()
# print(listDataset.shape)
# 生成训练集
xTrainDataset = listDataset[0:training_num]
print(xTrainDataset.shape)
yTrainDataset = listDataset[1:training_num+1]
print(yTrainDataset.shape)
#原始数据归一化
scTrainDataseX, xTrainDataset = sc_fit_transform(xTrainDataset)
scTrainDataseY, yTrainDataset = sc_fit_transform(yTrainDataset)
###############################################################################
# 生成lstm模型需要的训练集数据
xTrain = []
for i in range(timestep, training_num):
xTrain.append(xTrainDataset[i-timestep : i])
xTrain = np.array(xTrain)
print(xTrain.shape)
yTrain = []
for i in range(timestep, training_num):
yTrain.append(yTrainDataset[i])
yTrain = np.array(yTrain)
print(yTrain.shape)
###############################################################################
# 构建网络,使用的是序贯模型
model = Sequential()
model.add(LSTM(units=128, input_shape=[xTrain.shape[1], 1]))
model.add(Dense(1))
#进行配置
model.compile(optimizer='adam',
loss='mean_squared_error',
metrics=['accuracy'])
model.fit(x=xTrain, y=yTrain, epochs=epoch, batch_size=batch_size)
###############################################################################
#进行测试数据的处理
xTestDataset = listDataset[training_num : 15840-2]
scTesDatasetX, xTestDataset = sc_fit_transform(xTestDataset)
yTestDataset = listDataset[training_num+1 : 15840-1]
scTestDataseY, yTestDataset = sc_fit_transform(yTestDataset)
# 生成lstm模型需要的训练集数据
xTest = []
for i in range(timestep, len(xTestDataset)):
xTest.append(xTestDataset[i-timestep : i])
xTest = np.array(xTest)
# print(xTest.shape)
yTest = []
for i in range(timestep, len(xTestDataset)):
yTest.append(yTestDataset[i])
# 反归一化
yTest = scTestDataseY.inverse_transform(X= yTest)
# print(yTest.shape)
###############################################################################
#进行预测
yPredictes = model.predict(x=xTest)
# 反归一化
yPredictes = scTestDataseY.inverse_transform(X=yPredictes)
###############################################################################
#对比结果,绘制数据图表,红色是真实数据,蓝色是预测数据
plt.plot(yTest, color='red', label='Real')
plt.plot(yPredictes, color='blue', label='Predict')
plt.title(label='Prediction')
plt.xlabel(xlabel='Time')
plt.ylabel(ylabel='T')
plt.legend()
plt.show()
# 评估标准: mae, rmse, r2_score
mae = mean_absolute_error(yTest, yPredictes)
rmse = mean_squared_error(yTest, yPredictes, squared=False)
r2 = r2_score(yTest, yPredictes)
print(mae, rmse, r2)
# 0.5398017517750193 0.7921849560734467 0.9763136585185972
测试集和预测数据折线图:
mae:0.5583895525423784
rmse:0.8061969889732786
r2:0.9742544305395516