amv_raob.py 34.98 KiB
import datetime
from datetime import timezone
import bisect
import glob
import numpy as np
import subprocess
import xarray as xr
from util.goesr_l1b import PugFile, PugL1bTools
from util.setup import home_dir
from pysolar.solar import get_altitude
from scipy.interpolate import interp2d
#import cartopy
#from cartopy import *
from metpy import *
#import cartopy.crs as ccrs
import random
import re
goes16_directory = '/arcdata/goes/grb/goes16' # /year/date/abi/L1b/RadC
goes_date_format = '%Y%j%H'
dt_str_pat = '_s\d{11}'
dir_fmt = '%Y_%m_%d_%j'
gfs_directory = '/apollo/cloud/Ancil_Data/clavrx_ancil_data/dynamic/gfs/'
gfs_date_format = '%y%m%d'
h4_to_h5_path = home_dir + '/h4toh5convert'
data_dir = '/scratch/long/rink'
converted_file_dir = data_dir + '/gfs_h5'
CACHE_GFS = True
goes_cache_dir = data_dir + '/goes16'
CACHE_GOES = True
COPY_GOES = True
fmt = '%Y%j%H'
matchup_dict_glbl = None
geoloc_2km = None
geoloc_1km = None
geoloc_hkm = None
TRIPLET = False
CONV3D = False
H08_lat_range = [-78, 78]
H08_lon_range = [62, 218]
amv_hdr_list = ['lat', 'lon', 'tbox', 'sbox', 'spd', 'dir', 'pres', 'lowl', 'mspd', 'mdir',
'alb', 'corr', 'tmet', 'perr', 'qi', 'cqi', 'qif']
raob_hdr_list = ['pres', 'temp', 'dir', 'spd']
raob_spd_idx = raob_hdr_list.index('spd')
raob_dir_idx = raob_hdr_list.index('dir')
amv_lon_idx = amv_hdr_list.index('lon')
amv_lat_idx = amv_hdr_list.index('lat')
amv_pres_idx = amv_hdr_list.index('pres')
raob_pres_idx = raob_hdr_list.index('pres')
amv_spd_idx = amv_hdr_list.index('spd')
amv_dir_idx = amv_hdr_list.index('dir')
amv_prs_idx = amv_hdr_list.index('pres')
amv_qi_idx = amv_hdr_list.index('qi')
amv_cqi_idx = amv_hdr_list.index('cqi')
amv_qif_idx = amv_hdr_list.index('qif')
def get_matchups(filename):
header = None
data = []
with open(filename) as file:
for line in file:
if header is None:
header = line
continue
toks = line.split()
dto = datetime.datetime.strptime(toks[1], fmt).replace(tzinfo=timezone.utc)
timestmp = dto.timestamp()
lat = float(toks[2])/100
lon = float(toks[3])/100
lbfp = float(toks[4])/100
amvpre = float(toks[5])/100
spd = float(toks[6])/100
dir = float(toks[7])/100
shear = float(toks[8])/100
satzen = float(toks[9])/100
qif = float(toks[11])/100
phase = float(toks[13])/100
type = float(toks[14])/100
if qif < 50:
continue
tup = (timestmp, lat, lon, lbfp, amvpre, spd, dir, shear, satzen, qif, phase, type)
data.append(tup)
file.close()
return data
def merge(matchups, time_to_matchups):
if matchups is None:
return time_to_matchups
if time_to_matchups is None:
tup = matchups[0]
time = tup[0]
tup_s = []
time_to_matchups = {time : tup_s}
time_s = list(time_to_matchups.keys())
tup_s = list(time_to_matchups.values())
for tup in matchups:
time = tup[0]
try:
idx = time_s.index(time)
tup_s[idx].append(tup)
except ValueError:
idx = bisect.bisect(time_s, time)
time_s.insert(idx, time)
lst = []
lst.append(tup)
tup_s.insert(idx, lst)
time_to_matchups = {}
for idx in range(len(time_s)):
# time_to_matchups[time_s[idx]] = tup_s[idx]
time_to_matchups[idx] = tup_s[idx]
return time_to_matchups
def matchup_dict_to_nd(matchup_dict, shuffle=True, reduce=None):
time_keys = matchup_dict.keys()
time_to_nd = {}
for key in time_keys:
obs_lst = matchup_dict.get(key)
nda = np.array(obs_lst)
if shuffle:
np.random.shuffle(nda)
if reduce is not None:
nda = nda[::reduce, :]
time_to_nd[key] = nda
return time_to_nd
def get_num_samples(matchup_dict, keys=None):
cnt = 0
if keys is None:
time_keys = matchup_dict.keys()
else:
time_keys = keys
for key in time_keys:
cnt += matchup_dict[key].shape[0]
return cnt
# CONUS
lon_space = np.linspace(-140, -40, 401)
lat_space = np.linspace(15, 55, 161)
def spatial_filter(matchup_dict, shuffle=True, reduce=None):
keys = matchup_dict.keys()
filterd_dict = {}
grd_x_hi = lon_space.shape[0] - 1
grd_y_hi = lat_space.shape[0] - 1
for key in keys:
grd_bins = np.full((lat_space.shape[0], lon_space.shape[0]), False)
obs = matchup_dict.get(key)
lons = obs[:, 2]
lats = obs[:, 1]
lon_idxs = np.searchsorted(lon_space, lons)
lat_idxs = np.searchsorted(lat_space, lats)
keep_obs = []
for i in range(len(lon_idxs)):
lon_idx = lon_idxs[i]
lat_idx = lat_idxs[i]
if lon_idx < 0 or lon_idx > grd_x_hi:
continue
if lat_idx < 0 or lat_idx > grd_y_hi:
continue
if not grd_bins[lat_idx, lon_idx]:
grd_bins[lat_idx, lon_idx] = True
keep_obs.append(obs[i])
if len(keep_obs) == 0:
continue
keep_obs = np.array(keep_obs)
if shuffle:
np.random.seed(seed=42)
shuffle_idxs = np.random.permutation(keep_obs.shape[0])
keep_obs = keep_obs[shuffle_idxs]
if reduce is not None:
keep_obs = keep_obs[::reduce, :]
filterd_dict[key] = keep_obs
return filterd_dict
def get_matchup_dict(filename, merge_with=None, filter_by_space=True, reduce=None, skip=None):
matchup = get_matchups(filename)
matchup_dict = merge(matchup, merge_with)
if skip is not None:
skp_dct = {}
skp_keys = list(matchup_dict.keys())
random.shuffle(skp_keys)
skp_keys = skp_keys[::skip]
for key in skp_keys:
skp_dct[key] = matchup_dict[key]
matchup_dict = skp_dct
matchup_dict = matchup_dict_to_nd(matchup_dict)
if filter_by_space:
matchup_dict = spatial_filter(matchup_dict, reduce=reduce)
return matchup_dict
NUM_SECTIONS = 4
def split_matchup(matchup_dict, perc=0.2):
time_keys = matchup_dict.keys()
num_datetimes = len(time_keys)
keys_list = list(time_keys)
num_test = int(num_datetimes * perc)
skip = int(num_datetimes / num_test)
tst_keys = keys_list[::skip]
train_dict = {}
test_dict = {}
tst_set = set(tst_keys)
trn_set = (set(keys_list)).difference(tst_set)
trn_keys = list(trn_set)
for key in tst_keys:
test_dict[key] = matchup_dict.get(key)
for key in trn_keys:
train_dict[key] = matchup_dict.get(key)
return train_dict, test_dict
def flatten(matchup_dict, shuffle=True):
time_keys = matchup_dict.keys()
dat_list = []
cnt = 0
for key in time_keys:
dat = matchup_dict[key]
num_dat = dat.shape[0]
dat_list.append(dat)
cnt += num_dat
data = np.vstack(dat_list)
if shuffle:
np.random.seed(seed=42)
shuffle_idxs = np.random.permutation(cnt)
data = data[shuffle_idxs]
return data
def shuffle_dict(in_dict): # Outer most level only
shuffled_dict = {}
keys = in_dict.keys()
num_times = len(keys)
np.random.seed(seed=42)
shuffle_idxs = np.random.permutation(num_times)
key_list = list(keys)
for idx in shuffle_idxs:
key = key_list[idx]
shuffled_dict[key] = in_dict[key]
return shuffled_dict
def filter_by_type(matchup_dict, error=None, phase=None, ctype=None):
time_keys = list(matchup_dict.keys())
filtered_dict = {}
for key in time_keys:
amvs = matchup_dict[key]
keep = None
if error is not None:
mae = np.abs(amvs[:, 4] - amvs[:, 3])
keep = mae > error
amvs = amvs[keep, :]
if phase is not None:
keep = amvs[:, 10] == phase
amvs = amvs[keep, :]
elif ctype is not None:
keep = amvs[:, 11] == ctype
amvs = amvs[keep, :]
if keep is not None:
if np.sum(keep) == 0:
continue
filtered_dict[key] = amvs
return filtered_dict
def analyze(matchup_dict):
time_keys = list(matchup_dict.keys())
hgt_diff = []
hgt_diff_i = []
hgt_diff_w = []
hgt_diff_scw = []
hgt_diff_mphz = []
hgts = []
hgts_i = []
hgts_w = []
hgts_scw = []
hgts_mphz = []
num_amvs = 0
num_ice = 0
num_water = 0
num_sc_water = 0
num_mphz = 0
hgt_diff_tice = []
hgts_tice = []
num_tice = 0
hgt_diff_cirrus = []
hgts_cirrus = []
num_cirrus = 0
hgt_diff_ovrlap = []
hgts_ovrlap = []
num_ovrlap = 0
for key in time_keys:
amvs = matchup_dict[key]
# num_amvs += amvs.shape[0]
d = amvs[:, 4] - amvs[:, 3]
# keep = np.abs(d) > 70
keep = np.abs(d) >= 0
d = d[keep]
amvs = amvs[keep, :]
num_amvs += amvs.shape[0]
hgt_diff.append(d)
hgts.append(amvs[:, 3])
ice = amvs[:, 10] == 4
hgt_diff_i.append(d[ice])
hgts_i.append(amvs[ice, 3])
num_ice += np.sum(ice)
water = amvs[:, 10] == 1
hgt_diff_w.append(d[water])
hgts_w.append(amvs[water, 3])
num_water += np.sum(water)
water = amvs[:, 10] == 2
hgt_diff_scw.append(d[water])
hgts_scw.append(amvs[water, 3])
num_sc_water += np.sum(water)
mphz = amvs[:, 10] == 3
hgt_diff_mphz.append(d[mphz])
hgts_mphz.append(amvs[mphz, 3])
num_mphz += np.sum(mphz)
tice = amvs[:, 11] == 5
hgt_diff_tice.append(d[tice])
hgts_tice.append(amvs[tice, 3])
num_tice += np.sum(tice)
cirrus = amvs[:, 11] == 6
hgt_diff_cirrus.append(d[cirrus])
hgts_cirrus.append(amvs[cirrus, 3])
num_cirrus += np.sum(cirrus)
ovrlap = amvs[:, 11] == 7
hgt_diff_ovrlap.append(d[ovrlap])
hgts_ovrlap.append(amvs[ovrlap, 3])
num_ovrlap += np.sum(ovrlap)
hgt_diff = np.concatenate(hgt_diff)
hgt_diff_i = np.concatenate(hgt_diff_i)
hgt_diff_w = np.concatenate(hgt_diff_w)
hgt_diff_scw = np.concatenate(hgt_diff_scw)
hgt_diff_tice = np.concatenate(hgt_diff_tice)
hgt_diff_cirrus = np.concatenate(hgt_diff_cirrus)
hgt_diff_mphz = np.concatenate(hgt_diff_mphz)
hgt_diff_ovrlap = np.concatenate(hgt_diff_ovrlap)
hgts = np.concatenate(hgts)
hgts_i = np.concatenate(hgts_i)
hgts_w = np.concatenate(hgts_w)
hgts_scw = np.concatenate(hgts_scw)
hgts_tice = np.concatenate(hgts_tice)
hgts_cirrus = np.concatenate(hgts_cirrus)
hgts_ovrlap = np.concatenate(hgts_ovrlap)
print('total: ', num_amvs)
print('ice: ', num_ice, num_ice/num_amvs)
print('water: ', num_water, num_water/num_amvs)
print('sc water: ', num_sc_water, num_sc_water/num_amvs)
print('mixed phase: ', num_mphz, num_mphz/num_amvs)
print('tice: ', num_tice, num_tice/num_amvs)
print('cirrus: ', num_cirrus, num_cirrus/num_amvs)
print('num ovrlap: ', num_ovrlap, num_ovrlap/num_amvs)
print('all MAE/BIAS/STD', np.average(np.abs(hgt_diff)), np.average(hgt_diff), np.std(hgt_diff))
print('ice MAE/BIAS/STD', np.average(np.abs(hgt_diff_i)), np.average(hgt_diff_i), np.std(hgt_diff_i))
print('water MAE/BIAS/STD', np.average(np.abs(hgt_diff_w)), np.average(hgt_diff_w), np.std(hgt_diff_w))
print('sc water MAE/BIAS/STD', np.average(np.abs(hgt_diff_scw)), np.average(hgt_diff_scw), np.std(hgt_diff_scw))
print('tice MAE/BIAS/STD', np.average(np.abs(hgt_diff_tice)), np.average(hgt_diff_tice), np.std(hgt_diff_tice))
print('cirrus MAE/BIAS/STD', np.average(np.abs(hgt_diff_cirrus)), np.average(hgt_diff_cirrus), np.std(hgt_diff_cirrus))
print('mphz MAE/BIAS/STD', np.average(np.abs(hgt_diff_mphz)), np.average(hgt_diff_mphz), np.std(hgt_diff_mphz))
print('ovrlap MAE/BIAS/STD', np.average(np.abs(hgt_diff_ovrlap)), np.average(hgt_diff_ovrlap), np.std(hgt_diff_ovrlap))
return hgts, hgt_diff, hgt_diff_i, hgt_diff_w, hgt_diff_scw, hgt_diff_tice, hgt_diff_cirrus, hgt_diff_mphz, hgt_diff_ovrlap
def get_time_tuple_utc(timestamp):
dt_obj = datetime.datetime.fromtimestamp(timestamp, timezone.utc)
return dt_obj, dt_obj.timetuple()
def get_bounding_goes16_files(timestamp, ch_str, file_time_span=5):
dt_obj, time_tup = get_time_tuple_utc(timestamp)
dt_obj_0 = dt_obj - datetime.timedelta(minutes=20)
dt_obj_1 = dt_obj + datetime.timedelta(minutes=20)
yr_dir_0 = str(dt_obj_0.timetuple().tm_year)
date_dir_0 = dt_obj_0.strftime(dir_fmt)
date_str_0 = dt_obj_0.strftime(goes_date_format)
yr_dir_1 = str(dt_obj_1.timetuple().tm_year)
date_dir_1 = dt_obj_1.strftime(dir_fmt)
date_str_1 = dt_obj_1.strftime(goes_date_format)
files_path_0 = goes16_directory + '/' + yr_dir_0 + '/' + date_dir_0 + '/abi' + '/L1b' + '/RadC'
files_path_1 = goes16_directory + '/' + yr_dir_1 + '/' + date_dir_1 + '/abi' + '/L1b' + '/RadC'
flist_0 = glob.glob(files_path_0 + '/OR_ABI-L1b-RadC-??C' + ch_str + '_G16_s' + date_str_0 + '*.nc')
flist_1 = []
if date_str_0 != date_str_1:
flist_1 = glob.glob(files_path_1 + '/OR_ABI-L1b-RadC-??C' + ch_str + '_G16_s' + date_str_1 + '*.nc')
flist = flist_0 + flist_1
if len(flist) == 0:
return None, None, None, None, None, None
ftimes = []
for pname in flist:
fname = os.path.split(pname)[1]
tstr = re.search(dt_str_pat, fname).group()[2:]
dto = datetime.datetime.strptime(tstr, goes_date_format+'%M').replace(tzinfo=timezone.utc)
ftimes.append(dto.timestamp())
tarr = np.array(ftimes)
sidxs = tarr.argsort()
farr = np.array(flist)
farr = farr[sidxs]
ftimes = tarr[sidxs]
ftimes_n = ftimes + file_time_span * 60
iC = -1
for k, t in enumerate(ftimes):
if t <= timestamp < ftimes_n[k]:
iC = k
break
if iC >= 0:
iL = iC - 1
iR = iC + 1
else:
return None, None, None, None, None, None
fList = farr.tolist()
if iL < 0 or iR >= len(ftimes):
return None, None, fList[iC], ftimes[iC], None, None
else:
return fList[iL], ftimes[iL], fList[iC], ftimes[iC], fList[iR], ftimes[iR]
def get_bounding_gfs_files(timestamp):
dt_obj, time_tup = get_time_tuple_utc(timestamp)
dt_obj = dt_obj - datetime.timedelta(hours=12)
date_str = dt_obj.strftime(gfs_date_format)
dt_obj0 = datetime.datetime.strptime(date_str, gfs_date_format).replace(tzinfo=timezone.utc)
dt_obj1 = dt_obj0 + datetime.timedelta(days=1)
date_str_1 = dt_obj1.strftime(gfs_date_format)
flist_0 = glob.glob(gfs_directory+'gfs.'+date_str+'??_F012.hdf')
flist_1 = glob.glob(gfs_directory+'gfs.'+date_str_1+'00_F012.hdf')
if len(flist_0) == 0:
return None, None, None, None
filelist = flist_0
if len(flist_1) > 0:
filelist = flist_0 + flist_1
ftimes = []
for pname in filelist: # TODO: make better with regular expressions (someday)
fname = os.path.split(pname)[1]
toks = fname.split('.')
tstr = toks[1].split('_')[0]
dto = datetime.datetime.strptime(tstr, gfs_date_format+'%H').replace(tzinfo=timezone.utc)
dto = dto + datetime.timedelta(hours=12)
ftimes.append(dto.timestamp())
tarr = np.array(ftimes)
sidxs = tarr.argsort()
farr = np.array(filelist)
farr = farr[sidxs]
ftimes = tarr[sidxs]
idxs = np.arange(len(filelist))
above = ftimes >= timestamp
if not above.any():
return None, None, None, None
below = ftimes <= timestamp
if not below.any():
return None, None, None, None
iL = idxs[below].max()
iR = iL + 1
fList = farr.tolist()
if timestamp == ftimes[iL]:
return fList[iL], ftimes[iL], None, None
else:
return fList[iL], ftimes[iL], fList[iR], ftimes[iR]
def get_profile(xr_dataset, fld_name, lons, lats, lon360=True, do_norm=False):
if lon360:
lons = np.where(lons < 0, lons + 360, lons) # convert -180,180 to 0,360
plevs = xr_dataset['pressure levels']
lat_coords = np.linspace(-90, 90, xr_dataset.fakeDim1.size)
lon_coords = np.linspace(0, 359.5, xr_dataset.fakeDim2.size)
fld = xr_dataset[fld_name]
fld = fld.assign_coords(fakeDim2=lon_coords, fakeDim1=lat_coords, fakeDim0=plevs)
dim2 = xr.DataArray(lons, dims='k')
dim1 = xr.DataArray(lats, dims='k')
intrp_fld = fld.interp(fakeDim1=dim1, fakeDim2=dim2, fakeDim0=plevs, method='linear')
intrp_fld = intrp_fld.values
if do_norm:
intrp_fld = normalize(intrp_fld, fld_name)
return intrp_fld
def get_profile_multi(xr_dataset, fld_name_s, lons, lats, lon360=True):
if lon360:
lons = np.where(lons < 0, lons + 360, lons) # convert -180,180 to 0,360
plevs = xr_dataset['pressure levels']
lat_coords = np.linspace(-90, 90, xr_dataset.fakeDim1.size)
lon_coords = np.linspace(0, 359.5, xr_dataset.fakeDim2.size)
fld_s = []
for fld_name in fld_name_s:
fld = xr_dataset[fld_name]
fld = fld.assign_coords(fakeDim2=lon_coords, fakeDim1=lat_coords, fakeDim0=plevs)
fld_s.append(fld)
fld = xr.concat(fld_s, 'fld_dim')
dim2 = xr.DataArray(lons, dims='k')
dim1 = xr.DataArray(lats, dims='k')
intrp_fld = fld.interp(fakeDim1=dim1, fakeDim2=dim2, fakeDim0=plevs, method='linear')
return intrp_fld
def get_scalar(xr_dataset, fld_name, lons, lats, lon360=True):
if lon360:
lons = np.where(lons < 0, lons + 360, lons) # convert -180,180 to 0,360
lat_coords = np.linspace(-90, 90, xr_dataset.fakeDim1.size)
lon_coords = np.linspace(0, 359.5, xr_dataset.fakeDim2.size)
fld = xr_dataset[fld_name]
fld = fld.assign_coords(fakeDim2=lon_coords, fakeDim1=lat_coords)
dim1 = xr.DataArray(lats, dims='k')
dim2 = xr.DataArray(lons, dims='k')
intrp_fld = fld.interp(fakeDim1=dim1, fakeDim2=dim2, method='linear')
return intrp_fld
def convert_file(fpath_hdf4):
fname_h4 = os.path.split(fpath_hdf4)[1]
fname_h5 = os.path.splitext(fname_h4)[0] + '.h5'
fpath_h5 = converted_file_dir + '/' + fname_h5
# check if we have it already
if os.path.exists(fpath_h5):
return fpath_h5
subprocess.call([h4_to_h5_path, fpath_hdf4, fpath_h5])
return fpath_h5
def get_interpolated_profile(ds_0, ds_1, time0, time1, fld_name, time, lons, lats, do_norm=False):
prof_0 = get_profile(ds_0, fld_name, lons, lats)
prof_1 = get_profile(ds_1, fld_name, lons, lats)
prof = xr.concat([prof_0, prof_1], 'time')
prof = prof.assign_coords(time=[time0, time1])
intrp_prof = prof.interp(time=time, method='linear')
intrp_prof = intrp_prof.values
if do_norm:
intrp_prof = normalize(intrp_prof, fld_name)
return intrp_prof
def get_interpolated_profile_multi(ds_0, ds_1, time0, time1, fld_name_s, time, lons, lats, do_norm_s):
prof_0 = get_profile_multi(ds_0, fld_name_s, lons, lats)
prof_1 = get_profile_multi(ds_1, fld_name_s, lons, lats)
prof = xr.concat([prof_0, prof_1], 'time')
prof = prof.assign_coords(time=[time0, time1])
intrp_prof = prof.interp(time=time, method='linear')
intrp_prof = intrp_prof.values
intrp_prof = normalize_multi(intrp_prof, fld_name_s, do_norm_s)
return intrp_prof
def get_interpolated_scalar(ds_0, ds_1, time0, time1, fld_name, time, lons, lats, do_norm=False):
vals_0 = get_scalar(ds_0, fld_name, lons, lats)
vals_1 = get_scalar(ds_1, fld_name, lons, lats)
vals = xr.concat([vals_0, vals_1], 'time')
vals = vals.assign_coords(time=[time0, time1])
intrp_vals = vals.interp(time=time, method='linear')
intrp_vals = intrp_vals.values
if do_norm:
intrp_vals = normalize(intrp_vals, fld_name)
return intrp_vals
path_to_reader_dict = {}
MAX_FILES_OPEN = 50
def get_reader(path):
if len(path_to_reader_dict) > MAX_FILES_OPEN:
path_to_reader_dict.clear()
rdr = path_to_reader_dict.get(path)
if rdr is None:
rdr = PugL1bTools(path)
path_to_reader_dict[path] = rdr
return rdr
def clear_readers():
path_to_reader_dict.clear()
def get_images(lons, lats, timestamp, channel_list, half_width, step, do_norm=False, daynight='ANY'):
global geoloc_2km, geoloc_1km, geoloc_hkm
dt_obj = datetime.datetime.fromtimestamp(timestamp, timezone.utc)
data = []
data_l = []
data_r = []
idxs = []
l_s = []
c_s = []
for ch_idx, ch in enumerate(channel_list):
img_width = int(2 * half_width[ch_idx] / step[ch_idx])
dum = np.arange(img_width * img_width, dtype=np.float32)
dum = dum.reshape((img_width, img_width))
fL, tL, fC, tC, fR, tR = get_bounding_goes16_files(timestamp, ch)
if (fL is None) or (fR is None) or (fC is None):
return None, None, None, None
# copy from archive to local drive
local_path = fC
if COPY_GOES:
local_path = goes_cache_dir + '/' + os.path.split(fC)[1]
if not os.path.exists(local_path):
subprocess.call(['cp', fC, goes_cache_dir])
subprocess.call(['chmod', 'u+w', local_path])
# Navigation, convert to BT/REFL
try:
pug_l1b_c = get_reader(local_path)
except Exception as exc:
print(exc)
return None, None, None, None
bt_or_refl = None
if pug_l1b_c.bt_or_refl == 'bt':
bt_or_refl = pug_l1b_c.bt
elif pug_l1b_c.bt_or_refl == 'refl':
bt_or_refl = pug_l1b_c.refl
if TRIPLET or CONV3D:
# copy from archive to local drive
local_path_l = fL
if COPY_GOES:
local_path_l = goes_cache_dir + '/' + os.path.split(fL)[1]
if not os.path.exists(local_path_l):
subprocess.call(['cp', fL, goes_cache_dir])
subprocess.call(['chmod', 'u+w', local_path_l])
# Navigation, convert to BT/REFL
try:
pug_l1b_l = get_reader(local_path_l)
except Exception as exc:
print(exc)
return None, None, None, None
if pug_l1b_l.bt_or_refl == 'bt':
bt_or_refl_l = pug_l1b_l.bt
elif pug_l1b_l.bt_or_refl == 'refl':
bt_or_refl_l = pug_l1b_l.refl
# copy from archive to local drive
local_path_r = fR
if COPY_GOES:
local_path_r = goes_cache_dir + '/' + os.path.split(fR)[1]
if not os.path.exists(local_path_r):
subprocess.call(['cp', fR, goes_cache_dir])
subprocess.call(['chmod', 'u+w', local_path_r])
# Navigation, convert to BT/REFL
try:
pug_l1b_r = get_reader(local_path_r)
except Exception as exc:
print(exc)
return None, None, None, None
if pug_l1b_r.bt_or_refl == 'bt':
bt_or_refl_r = pug_l1b_r.bt
elif pug_l1b_r.bt_or_refl == 'refl':
bt_or_refl_r = pug_l1b_r.refl
if daynight != 'ANY':
if step[ch_idx] == 1:
if geoloc_2km is None:
geoloc_2km = pug_l1b_c.geo
glons = geoloc_2km.lon
glats = geoloc_2km.lat
elif step[ch_idx] == 4:
if geoloc_hkm is None:
geoloc_hkm = pug_l1b_c.geo
glons = geoloc_hkm.lon
glats = geoloc_hkm.lat
elif step[ch_idx] == 2:
if geoloc_1km is None:
geoloc_1km = pug_l1b_c.geo
glons = geoloc_1km.lon
glats = geoloc_1km.lat
alt_fnc = get_solar_alt_func(pug_l1b_c, glons, glats, dt_obj)
images = []
images_l = []
images_r = []
for idx, lon in enumerate(lons):
lat = lats[idx]
if ch_idx == 0:
l, c = pug_l1b_c.lat_lon_to_l_c(lat, lon)
l_s.append(l)
c_s.append(c)
else:
l = l_s[idx]
c = c_s[idx]
if daynight == 'NIGHT':
salt = alt_fnc(l, c)
if salt[0] > 3.0:
images.append(dum)
continue
elif daynight == 'DAY':
salt = alt_fnc(l, c)
if salt[0] < 3.0:
images.append(dum)
continue
l_a = l - half_width[ch_idx]
l_b = l + half_width[ch_idx]
c_a = c - half_width[ch_idx]
c_b = c + half_width[ch_idx]
if (l_a >= 0 and l_b < pug_l1b_c.shape[0]) and (c_a >= 0 and c_b < pug_l1b_c.shape[1]):
img = bt_or_refl[l_a:l_b:step[ch_idx], c_a:c_b:step[ch_idx]]
if do_norm:
img = prepare_image(img, ch)
images.append(img)
if ch_idx == 0:
idxs.append(idx)
if TRIPLET or CONV3D:
img = bt_or_refl_l[l_a:l_b:step[ch_idx], c_a:c_b:step[ch_idx]]
if do_norm:
img = prepare_image(img, ch)
images_l.append(img)
img = bt_or_refl_r[l_a:l_b:step[ch_idx], c_a:c_b:step[ch_idx]]
if do_norm:
img = prepare_image(img, ch)
images_r.append(img)
else:
images.append(dum)
if TRIPLET or CONV3D:
images_l.append(dum)
images_r.append(dum)
images = np.array(images)
data.append(images)
if TRIPLET or CONV3D:
images_l = np.array(images_l)
images_r = np.array(images_r)
data_l.append(images_l)
data_r.append(images_r)
# remove file from local cache
if COPY_GOES:
if not CACHE_GOES:
subprocess.call(['rm', local_path])
if TRIPLET or CONV3D:
subprocess.call(['rm', local_path_l])
subprocess.call(['rm', local_path_r])
return np.array(data), np.array(data_l), np.array(data_r), np.array(idxs)
def copy_abi(matchup_dict, channel_list):
for key in matchup_dict.keys():
obs = matchup_dict.get(key)
timestamp = obs[0][0]
for ch_idx, ch in enumerate(channel_list):
f0, t0, f1, t1 = get_bounding_goes16_files(timestamp, ch)
if (f0 is None) or (f1 is None):
continue
# copy from archive to local drive
local_path = goes_cache_dir + os.path.split(f0)[1]
if not os.path.exists(local_path):
subprocess.call(['cp', f0, goes_cache_dir])
subprocess.call(['chmod', 'u+w', local_path])
def prepare_image(image, ch):
from deeplearning.cloudheight import abi_valid_range, abi_mean, abi_std
vld_rng = abi_valid_range.get(ch)
valid = np.logical_and(image >= vld_rng[0], image <= vld_rng[1])
mean = abi_mean.get(ch)
std = abi_std.get(ch)
image -= mean
image /= std
not_valid = np.invert(valid)
image[not_valid] = 0
return image
def normalize(data, param):
from deeplearning.cloudheight import mean_std_dict, valid_range_dict
if mean_std_dict.get(param) is None:
return data
mean, std = mean_std_dict.get(param)
data -= mean
data /= std
vld_rng = valid_range_dict.get(param)
if vld_rng is None:
return data
shape = data.shape
data = data.flatten()
valid = np.logical_and(data >= vld_rng[0], data <= vld_rng[1])
not_valid = np.invert(valid)
data[not_valid] = 0
data = np.reshape(data, shape)
return data
def normalize_multi(data, param_s, do_norm_s):
from deeplearning.cloudheight import mean_std_dict, valid_range_dict
for idx, param in enumerate(param_s):
if not do_norm_s[idx]:
continue
if mean_std_dict.get(param) is None:
continue
mean, std = mean_std_dict.get(param)
data[idx] -= mean
data[idx] /= std
vld_rng = valid_range_dict.get(param)
if vld_rng is None:
continue
tmp = data[idx]
shape = tmp.shape
tmp = tmp.flatten()
valid = np.logical_and(tmp >= vld_rng[0], tmp <= vld_rng[1])
not_valid = np.invert(valid)
tmp[not_valid] = 0
tmp = np.reshape(tmp, shape)
data[idx] = tmp
return data
def get_abi_mean_std(matchup_dict, ch_str, valid_range, step=100):
keys = matchup_dict.keys()
image_s = []
for key in keys:
obs = matchup_dict.get(key)
if obs is None:
continue
timestamp = obs[0][0]
dt_obj, ttup = get_time_tuple_utc(timestamp)
print(ttup.tm_year, ttup.tm_mon, ttup.tm_mday)
f0, t0, f1, t1, f2, t2 = get_bounding_goes16_files(timestamp, ch_str)
if (f0 is None) or (f1 is None) or (f2 is None):
continue
# copy from archive to local drive
local_path = goes_cache_dir + '/' + os.path.split(f0)[1]
if not os.path.exists(local_path):
subprocess.call(['cp', f0, goes_cache_dir])
subprocess.call(['chmod', 'u+w', local_path])
# Navigation, convert to BT/REFL
try:
pug = PugFile(local_path, 'Rad')
pug_l1b = PugL1bTools(local_path)
except Exception as exc:
print(exc)
continue
if ch_str == '02':
corner_lons = []
corner_lats = []
elems = pug.shape[1]
lines = pug.shape[0]
lons = pug.geo.lon
lats = pug.geo.lat
lon_a = lons[0,0]
lat_a = lats[0,0]
corner_lons.append(lon_a)
corner_lats.append(lat_a)
lon_b = lons[0,elems-1]
lat_b = lats[0,elems-1]
corner_lons.append(lon_b)
corner_lats.append(lat_b)
lon_c = lons[lines-1, 0]
lat_c = lats[lines-1, 0]
corner_lons.append(lon_c)
corner_lats.append(lat_c)
lon_d = lons[lines-1, elems-1]
lat_d = lats[lines-1, elems-1]
corner_lons.append(lon_d)
corner_lats.append(lat_d)
for i in range(4):
lon = corner_lons[i]
lat = corner_lats[i]
alt = get_altitude(lat, lon, dt_obj)
if alt < 10:
print(ttup.tm_year, ttup.tm_mon, ttup.tm_mday)
continue
bt_or_refl = None
if pug.bt_or_refl == 'bt':
bt_or_refl = pug_l1b.bt
elif pug.bt_or_refl == 'refl':
bt_or_refl = pug_l1b.refl
bt_or_refl = bt_or_refl[::step, ::step]
valid = np.logical_and(bt_or_refl >= valid_range[0], bt_or_refl <= valid_range[1])
bt_or_refl = bt_or_refl[valid]
image_s.append(bt_or_refl)
# remove file from local cache
if not CACHE_GOES:
subprocess.call(['rm', local_path])
if len(image_s) == 0:
return None, None
values = np.concatenate(image_s)
mean = np.mean(values)
values -= mean
std = np.std(values)
return mean, std
NUM_VERT_LEVS = 26
def get_gfs_mean_std(matchup_dict, fld_name, valid_range, step=2):
keys = matchup_dict.keys()
fld_at_levels = [[] for i in range(NUM_VERT_LEVS)]
for key in keys:
obs = matchup_dict.get(key)
if obs is None:
continue
timestamp = obs[0][0]
_, ttup = get_time_tuple_utc(timestamp)
print(ttup.tm_year, ttup.tm_mon, ttup.tm_mday)
gfs_0, time_0, gfs_1, time_1 = get_bounding_gfs_files(timestamp)
if gfs_0 is None:
continue
gfs_0 = convert_file(gfs_0)
try:
xr_dataset = xr.open_dataset(gfs_0)
except Exception as exc:
print(exc)
return None
fld = xr_dataset[fld_name]
fld_3d = fld.values
for i in range(NUM_VERT_LEVS):
fld_i = fld_3d[::step, ::step, i]
valid = np.logical_and(fld_i >= valid_range[0], fld_i <= valid_range[1])
fld_i = fld_i[valid]
fld_at_levels[i].append(fld_i)
mean = []
std = []
for i in range(NUM_VERT_LEVS):
fld_at_levels[i] = (np.concatenate(fld_at_levels[i])).flatten()
m = np.mean(fld_at_levels[i])
fld_at_levels[i] -= m
mean.append(m)
std.append(np.std(fld_at_levels[i]))
mean = np.array(mean)
std = np.array(std)
mean = np.reshape(mean, (NUM_VERT_LEVS, 1))
std = np.reshape(std, (NUM_VERT_LEVS, 1))
return mean, std
def get_gfs_mean_std_2d(matchup_dict, fld_name, valid_range, step=2):
keys = matchup_dict.keys()
fld_s = []
for key in keys:
obs = matchup_dict.get(key)
if obs is None:
continue
timestamp = obs[0][0]
_, ttup = get_time_tuple_utc(timestamp)
print(ttup.tm_year, ttup.tm_mon, ttup.tm_mday)
gfs_0, time_0, gfs_1, time_1 = get_bounding_gfs_files(timestamp)
if gfs_0 is None:
continue
gfs_0 = convert_file(gfs_0)
try:
xr_dataset = xr.open_dataset(gfs_0)
except Exception as exc:
print(exc)
return None
fld = xr_dataset[fld_name]
fld_2d = fld.values
fld_i = fld_2d[::step, ::step]
valid = np.logical_and(fld_i >= valid_range[0], fld_i <= valid_range[1])
fld_i = fld_i[valid]
fld_s.append(fld_i)
fld_s = (np.concatenate(fld_s)).flatten()
mean = np.mean(fld_s)
fld_s -= mean
std = np.std(fld_s)
return mean, std
PROC_BATCH_SIZE = 10
BATCH_SIZE = 256
def run_mean_std(filename, matchup_dict=None):
if matchup_dict is None:
matchup = get_matchups(filename)
matchup_dict = merge(matchup, None)
matchup_dict = matchup_dict_to_nd(matchup_dict)
train_dict, valid_dict = split_matchup(matchup_dict, perc=0.1)
channels = ['14', '08', '02']
valid_range = [[150, 350], [150, 350], [0, 100]]
step = [100, 100, 400]
f = open('/home/rink/stats.txt', 'w')
for ch_idx, ch in enumerate(channels):
mean, std = get_abi_mean_std(valid_dict, channels[ch_idx], valid_range[ch_idx], step[ch_idx])
print(ch, ': ', mean, std)
f.write('channel: %s, %f, %f\n' % (ch, mean, std))
mean, std = get_gfs_mean_std(valid_dict, 'temperature', [150, 350])
f.write('gfs temperature: \n')
for i in range(NUM_VERT_LEVS):
f.write('%f, %f\n' % (mean[i], std[i]))
f.close()
return matchup_dict
def get_solar_alt_func(pug, glons, glats, dt_obj):
elems = pug.shape[1]
lines = pug.shape[0]
gx = np.arange(elems)[::100]
gy = np.arange(lines)[::100]
glons = glons[gy]
glons = glons[:, gx]
glats = glats[gy]
glats = glats[:, gx]
glons = glons.flatten()
glats = glats.flatten()
slr_alt = []
for i in range(len(glons)):
alt = get_altitude(glats[i], glons[i], dt_obj)
slr_alt.append(alt)
nd_alt = np.array(slr_alt)
alt_fnc = interp2d(gy, gx, nd_alt, kind='cubic')
return alt_fnc