本文旨在进行时间序列预测,采用较为简单的深度学习模型,数据集格式样式如下:
image.png
用历史的20个周期每天的数量预测下一个周期的数量。
from numpy import array
from numpy import hstack
from keras.models import Sequential
from keras.layers import Dense
import pandas as pd
# 将数据转为深度学习模型的格式
def split_sequence(sequence, n_steps):
X, y = list(), list()
for i in range(len(sequence)):
end_ix = i + n_steps
if end_ix > len(sequence)-1:
break
seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
def base_model(n_steps):
model = Sequential()
model.add(Dense(100, activation='relu', input_dim=n_steps))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
return model
file = '数据集.csv'
series = pd.read_csv(file, header=0, index_col=0, error_bad_lines=False,encoding='gbk')
data = series.values
n_steps = 3
model = base_model(3)
result = []
for i in range(len(data)):
X, y = split_sequence(data[i], n_steps)
model.fit(X, y, epochs=2000, verbose=0)
x_input = array(data[i][-3:])
x_input = x_input.reshape((1, n_steps))
yhat = model.predict(x_input, verbose=0)
res = max(0, round(yhat[0][0]))
print(res)
result.append(res)
n_steps=3表明用每一条数据的3个周期和下一个周期进行训练学习。样本第一条处理格式如下。
打印前4条预测结果如下:
第一条数据预测下一周期是20,第二三四条都是0。可以当作深度学习用来进行时间序列的格式。
下面以此格式用CNN进行时间预测,直接上代码了。
from numpy import array
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
import pandas as pd
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
def split_sequence(sequence, n_steps):
X, y = list(), list()
for i in range(len(sequence)):
end_ix = i + n_steps
if end_ix > len(sequence)-1:
break
seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
def base_model(n_steps,n_features):
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps, n_features)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
return model
file = '数据集.csv'
series = pd.read_csv(file, header=0, index_col=0, error_bad_lines=False,encoding='gbk')
data = series.values
n_steps = 3
model = base_model(3,1) ##n_features
result = []
for i in range(len(data)):
X, y = split_sequence(data[i], n_steps)
X = X.reshape((X.shape[0], X.shape[1], 1)) ##n_features
model.fit(X, y, epochs=2000, verbose=0)
x_input = array(data[i][-3:])
x_input = x_input.reshape((1, n_steps,1))
yhat = model.predict(x_input, verbose=0)
res = max(0, round(yhat[0][0]))
print(res)
result.append(res)
LSTM时间序列预测效果要比大多的模型好。格式代码如下:
from numpy import array
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
import pandas as pd
def split_sequence(sequence, n_steps):
X, y = list(), list()
for i in range(len(sequence)):
end_ix = i + n_steps
if end_ix > len(sequence)-1:
break
seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
def base_model(n_steps,n_features):
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse') # fit model
return model
file = '数据集.csv'
series = pd.read_csv(file, header=0, index_col=0, error_bad_lines=False,encoding='gbk')
data = series.values
n_steps = 3
model = base_model(3,1)
result = []
for i in range(len(data)):
X, y = split_sequence(data[i], n_steps)
X = X.reshape((X.shape[0], X.shape[1], 1))
# print(X)
model.fit(X, y, epochs=2000, verbose=0)
x_input = array(data[i][-3:])
x_input = x_input.reshape((1, n_steps,1))
yhat = model.predict(x_input, verbose=0)
res = max(0, round(yhat[0][0]))
print(res)
result.append(res)