首先使用LOWESS方法求产量趋势,再利用产量趋势求产量异常值。
代码如下:
import pandas as pd
import statsmodels.api as sm
lowess = sm.nonparametric.lowess
#读取数据
data = pd.read_excel()
#计算lowess趋势
region = set(list(data['District']))
for r in region:
data1 = data[data.District == r] #找到每个区的数据
year = data1['Year']
Yield = data1['Yield']
Yield_trend = lowess(Yield, year, frac=1./3.)[:,1]
#print(Yield_trend)
#print(Yield_trend[0])
ids = data1.index.tolist()
#print(ids)
t = 0
for i in ids:
data.loc[i:i,('Yield_trend')]=[Yield_trend[t]]
t = t+1
#计算产量异常值
Yield = data[['Yield']] #提取产量
Yield_trend = data[['Yield_trend']]
Yield_anomaly = ((Yield.values-Yield_trend.values)/Yield_trend.values)*100