WRF官方提供的CFSR数据下载地址是在UCAR那边, 在网上找到一些些技术分享实现脚本批量下载的, 但是有些网址似乎发生变化, 我又自己写了个小脚本用来下数据
9.26更新
- 一方面从WRF WPS ungrib的Vtable里看到CFSRv2的话只需要pressure level的就可以跑了
- 另一方面我发现数据下载有不稳定的时候, 会漏数据
因此更新了一下脚本, 只保留了pres的数据, 支持md5校验, win和linux都支持下载
# coding=utf-8
import os,sys
import os.path as osp
import platform
import hashlib
def getfile(url, outDir = "."):
# ref : https://blog.csdn.net/sinat_38804294/article/details/90264738
print(f"downloading {url}")
# prepare
bname = osp.basename(url)
if not osp.exists(outDir):
os.makedirs(outDir)
outFile = f'{outDir}/{bname}'
# down
if platform.system() == 'Windows':
os.system(f"curl -s -o {outFile} {url}")
elif platform.system() == 'Linux':
os.system(f"wget -O {outFile} {url}")
else:
print(f"{platform.system()} is not supported by now, just modify the code by yourself.")
sys.exit(1)
return
def getMD5(file):
if not osp.exists(file):
return "no file"
with open(file, 'rb') as f:
return hashlib.md5(f.read()).hexdigest()
#**********************************************************************
# This function is used to render time series
# with format of "%Y%m"
#**********************************************************************
def render_ymd_series(ymd1, ymd2): # from py_rdee
import datetime
ymd_format = "%Y%m%d"
ymd1_py = datetime.datetime.strptime(ymd1, ymd_format)
ymd2_py = datetime.datetime.strptime(ymd2, ymd_format)
delta = datetime.timedelta(days=1)
res = []
while ymd1_py <= ymd2_py:
res.append(ymd1_py.strftime(ymd_format))
ymd1_py += delta
return res
#**********************************************************************
# this function is used to resolve date definitions
# such as "201501-201503,201609,201612-201702,201712"
#**********************************************************************
def resolveDates(dateDef) :
# dates - YYYYMMDD,YYYYMMDD,YYYYMMDD-YYYYMMDD,YYYYMMDD
# return : an array[] contains all yyyymmdd
res = []
dateDef_arr = dateDef.split(',')
for ymd in dateDef_arr:
if '-' not in ymd:
res.append(ymd)
else:
ymd1, ymd2 = ymd.split('-')
ymdaT = render_ymd_series(ymd1, ymd2) # yyyymmdd array temp
res.extend(ymdaT)
return res
def download_cfsr_1f(url, outDir = None):
if outDir is None :
outDir = osp.basename(osp.dirname(a))
url_md5 = url + ".md5"
bname = osp.basename(url)
bname_md5 = bname + ".md5"
file_md5 = f'{outDir}/{bname_md5}'
file_target = f'{outDir}/{bname}'
if not osp.exists(file_md5):
getfile(url_md5, outDir)
md5_ref = open(file_md5).read().split()[0]
n = 2
while n > 0:
if not osp.exists(file_target):
getfile(url, outDir)
if getMD5(file_target) != md5_ref:
print("md5sum check failed, try again...")
os.remove(file_target)
n -= 1
else:
print("md5sum check succeed, next ...")
break
if n == 0:
print(f"cannot download {file_target} correctly! plz check it manually")
sys.eixt(2)
def download_cfsr(ymds, pgrbhs, outDir):
url_root = f"https://www.ncei.noaa.gov/data/climate-forecast-system/access/operational-analysis/6-hourly-by-pressure"
for ymd in ymds:
y = ymd[:4]
ym = ymd[:6]
for pgrbh in pgrbhs:
url1 = f"{url_root}/{y}/{ym}/{ymd}/cdas1.t00z.pgrbh{pgrbh}.grib2"
url2 = f"{url_root}/{y}/{ym}/{ymd}/cdas1.t06z.pgrbh{pgrbh}.grib2"
url3 = f"{url_root}/{y}/{ym}/{ymd}/cdas1.t12z.pgrbh{pgrbh}.grib2"
url4 = f"{url_root}/{y}/{ym}/{ymd}/cdas1.t18z.pgrbh{pgrbh}.grib2"
download_cfsr_1f(url1, f'{outDir}/{ymd}')
download_cfsr_1f(url2, f'{outDir}/{ymd}')
download_cfsr_1f(url3, f'{outDir}/{ymd}')
download_cfsr_1f(url4, f'{outDir}/{ymd}')
if __name__ == "__main__":
# datesDef : format like "yyyymmdd,yyyymmdd-yyyymmdd,yyyymmdd" ; key points : split char and period char
datesDef = "20200909-20201012"
# datesDef = "20201009-20201012"
# datesDef = "20201008-20201008"
pgrbhs = ['06']
outDir = "./"
ymds = resolveDates(datesDef)
download_cfsr(ymds, pgrbhs, outDir)
老版本, 简单直球下载
import os,sys
#**********************************************************************
# This function is used to render time series
# with format of "%Y%m"
#**********************************************************************
def render_ymd_series(ymd1, ymd2): # from py_rdee
import datetime
ymd_format = "%Y%m%d"
ymd1_py = datetime.datetime.strptime(ymd1, ymd_format)
ymd2_py = datetime.datetime.strptime(ymd2, ymd_format)
delta = datetime.timedelta(days=1)
res = []
while ymd1_py <= ymd2_py:
res.append(ymd1_py.strftime(ymd_format))
ymd1_py += delta
return res
def download_cfsr_pres(ymd):
y = ymd[:4]
ym = ymd[:6]
url_root = f"https://www.ncei.noaa.gov/data/climate-forecast-system/access/operational-analysis/6-hourly-by-pressure/{y}/{ym}/{ymd}/"
url1 = f"{url_root}/cdas1.t00z.pgrbh06.grib2"
url2 = f"{url_root}/cdas1.t06z.pgrbh06.grib2"
url3 = f"{url_root}/cdas1.t12z.pgrbh06.grib2"
url4 = f"{url_root}/cdas1.t18z.pgrbh06.grib2"
url1_md5 = f"{url_root}/cdas1.t00z.pgrbh06.grib2.md5"
url2_md5 = f"{url_root}/cdas1.t06z.pgrbh06.grib2.md5"
url3_md5 = f"{url_root}/cdas1.t12z.pgrbh06.grib2.md5"
url4_md5 = f"{url_root}/cdas1.t18z.pgrbh06.grib2.md5"
os.system(f"wget {url1}")
os.system(f"wget {url2}")
os.system(f"wget {url3}")
os.system(f"wget {url4}")
os.system(f"wget {url1_md5}")
os.system(f"wget {url2_md5}")
os.system(f"wget {url3_md5}")
os.system(f"wget {url4_md5}")
def download_cfsr_sflux(ymd):
y = ymd[:4]
ym = ymd[:6]
url_root = f"https://www.ncei.noaa.gov/data/climate-forecast-system/access/operational-analysis/6-hourly-flux/{y}/{ym}/{ymd}/"
url1 = f"{url_root}/cdas1.t00z.sfluxgrbf06.grib2"
url2 = f"{url_root}/cdas1.t06z.sfluxgrbf06.grib2"
url3 = f"{url_root}/cdas1.t12z.sfluxgrbf06.grib2"
url4 = f"{url_root}/cdas1.t18z.sfluxgrbf06.grib2"
url1_md5 = f"{url_root}/cdas1.t00z.sfluxgrbf06.grib2.md5"
url2_md5 = f"{url_root}/cdas1.t06z.sfluxgrbf06.grib2.md5"
url3_md5 = f"{url_root}/cdas1.t12z.sfluxgrbf06.grib2.md5"
url4_md5 = f"{url_root}/cdas1.t18z.sfluxgrbf06.grib2.md5"
os.system(f"wget {url1}")
os.system(f"wget {url2}")
os.system(f"wget {url3}")
os.system(f"wget {url4}")
os.system(f"wget {url1_md5}")
os.system(f"wget {url2_md5}")
os.system(f"wget {url3_md5}")
os.system(f"wget {url4_md5}")
if __name__ == "__main__":
ymdS = os.getenv("YMDS", "20200909")
ymdE = os.getenv("YMDE", "20201012")
outDir = "./"
ymds = render_ymd_series(ymdS, ymdE)
for ymd in ymds:
os.makedirs(outDir + ymd, exist_ok = True)
os.chdir(outDir + ymd)
download_cfsr_pres(ymd)
download_cfsr_sflux(ymd)
os.chdir("..")
目前仅实现对cfsr 6小时的一段时间内的下载, 急用, 日后有需求再完善点功能