有2013-2021的文件,挑选部分区域的站点id进行合并。
1、感兴趣的站点在urban_region.txt里面,里面存储了我想要的region里所有站点的siteID
region='jjj'
fname='/home/wangnan/aqi_data_v2/post_data/'+'urban_'+region+'.txt'
df=pd.read_csv(fname,sep='\\s+',skiprows=[0],header=None)
df.columns=['siteid']
stid=[np.str(i)+'A' for i in df.siteid]
#concat 2013-2020
dfile='daily_'
fpath='/home/wangnan/aqi_data_v2/'
year=['2013','2014','2015','2016','2017','2018','2019','2020','2021']
#对站点循环,找每个站点每年的文件
for isd in range(len(stid)):
combine=[]
for iy in range(len(year)):
fname=fpath+year[iy]+'/'+dfile+year[iy]+'_'+stid[isd]+'.txt'
#print(fname)
df=pd.read_csv(fname,sep='\\s+',skiprows=[0],header=None)
df.columns=['month','day','o3','no2','d_o3','n_o3','o3max','o3_8hmax','pm25','co','acco3','accno2']
combine.append(df)
fout=pd.concat(combine,axis=0,keys=year) #合并文件,并用year标记,这样可以检查顺序是否正确
fout.to_csv('./merge_'+region+'_'+stid[isd]+'.csv')
fout
2、读取1的结果,并在文件中加入前一天20:00-23:00的前夜累积O3和NO2,这里就需要错位排列
用到了df1=df.replace(-999.0,np.nan) #批量替换 很舒服
用到了final=df2.groupby(['year','month']).mean().reset_index() #按多个要素分类求平均
用到了 lastnite_no2=np.hstack((np.nan,lastnite_no2[:-1])) #错位拼接,前一天的结果 就是当天的前夜累积
fname='/home/wangnan/aqi_data_v2/post_data/'+'urban_'+region+'.txt'
df=pd.read_csv(fname,sep='\\s+',skiprows=[0],header=None)
df.columns=['siteid']
stid=[np.str(i)+'A' for i in df.siteid]
lujin='/home/wangnan/aqi_data_v2/forChenxi/'
for isd in range(len(stid)):
fname=lujin+'merge_'+region+'_'+stid[isd]+'.csv'
df=pd.read_csv(fname)
df.columns=['year','index','month','day','o3','no2','d_o3','n_o3','o3max','o3_8hmax','pm25','co','acco3','accno2']
lastnite_o3=np.array(df.acco3)
lastnite_o3=np.hstack((np.nan,lastnite_o3[:-1]))
lastnite_no2=np.array(df.accno2)
lastnite_no2=np.hstack((np.nan,lastnite_no2[:-1]))
df['lstniteO3']=lastnite_o3
df['lstniteno2']=lastnite_no2
# dmo38h=np.array(df.o3_8hmax)
# dmo38h[np.where(dmo38h==0)]=np.nan
df1=df.replace(-999.0,np.nan) #批量替换 很舒服
df2=df1.replace(0.0, np.nan)
final=df2.groupby(['year','month']).mean().reset_index()
final.to_csv('final_monthly_'+region+'_'+stid[isd]+'.csv',index = False)
final
- 将所有站点合并 并平均
主要是np.nanmean和data.append使用
fname='/home/wangnan/aqi_data_v2/post_data/'+'urban_'+region+'.txt'
df=pd.read_csv(fname,sep='\\s+',skiprows=[0],header=None)
df.columns=['siteid']
stid=[np.str(i)+'A' for i in df.siteid]
iy=[]
im=[]
mdo3=[]
lstno2=[]
lsto3=[]
lujin='/home/wangnan/aqi_data_v2/forChenxi/'
for isd in range(len(stid)):
df=pd.read_csv(lujin+'final_monthly_'+region+'_'+stid[isd]+'.csv')
iy.append(np.array(df.year))
im.append(np.array(df.month))
mdo3.append(np.array(df.o3_8hmax))
lstno2.append(np.array(df.lstniteno2))
lsto3.append (np.array(df.lstniteO3))
print(np.shape(lsto3))
lasto3=np.nanmean(lsto3,axis=0)
lastno2=np.nanmean(lstno2,axis=0)
o38h=np.nanmean(mdo3,axis=0)
year=np.nanmean(iy,axis=0)
month=np.nanmean(im,axis=0)
out=pd.DataFrame({'year':year,'month':month,'o38h':o38h,'lstniteO3':lasto3,'lstniteNO2':lastno2})
out.to_csv('Final_monthly_'+region+'.csv')
out