使用时间序列模型预测上海新冠确诊人数变化趋势
ARIMA模型
步骤
1、检测数据平稳性
2、根据ACF,PACF 判断p,q值
3、模型训练
mport statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.tsa.api as smt
# Display and Plotting
import seaborn as sns
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
%matplotlib inline
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import acf, pacf
##分解(decomposing) 可以用来把时序数据中的趋势和周期性数据都分离出来:
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.arima_model import ARMA
from matplotlib.pylab import rcParams
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
rcParams['figure.figsize'] = 10, 6
##loading data
Data= pd.read_csv('./sh1.csv', index_col=0, parse_dates=[0])
Data.head()
##plot
Data.plot(figsize=(12,8))
plt.legend(bbox_to_anchor=(1.25, 0.5))
plt.title("confirmed")
#一阶差分
confirm_diff = Data.diff()
confirm_diff = confirm_diff.dropna()#drop NA
#plot
plt.figure()
plt.plot(confirm_diff)
plt.title('First Difference')
plt.show()
#二阶差分
confirm_diff2 = confirm_diff.diff()
confirm_diff2 = confirm_diff.dropna()
plt.figure()
plt.plot(confirm_diff2)
plt.title('2')
plt.show()
原始数据
acf = plot_acf(confirm_diff, lags=20)
plt.title("ACF")
acf.show()
pacf = plot_pacf(confirm_diff, lags=20)
plt.title("PACF")
pacf.show()
ACF
PACF
model = ARIMA(Data, order=(2, 2, 5))
result = model.fit()
print(result.summary())
pred = result.predict('20220403', '20220408',dynamic=True, typ='levels')#预测,指定起始与终止时间。预测值起始时间必须在原始数据中,终止时间不需要
print (pred)
2022-04-03 7189.822693
2022-04-04 7238.730278
2022-04-05 7175.224682
2022-04-06 7230.427479
2022-04-07 7220.730730
2022-04-08 7253.580080
plt.figure(figsize=(6, 6))
plt.xticks(rotation=45)
plt.plot(pred)
plt.plot(Data)
最终预测结果不太理想