Newer
Older
from metpy.units import units
from metpy.calc import thickness_hydrostatic
LatLonTuple = namedtuple('LatLonTuple', ['lat', 'lon'])
class EarlyStop:
def __init__(self, window_length=3, patience=5):
self.patience = patience
self.cnt = 0
self.cnt_wait = 0
self.window = np.zeros(window_length, dtype=np.single)
self.window.fill(np.nan)
def check_stop(self, value):
self.window[:-1] = self.window[1:]
self.window[-1] = value
if np.any(np.isnan(self.window)):
return False
ave = np.mean(self.window)
if ave < self.min:
self.min = ave
self.cnt_wait = 0
return False
else:
self.cnt_wait += 1
if self.cnt_wait > self.patience:
return True
else:
return False
def get_time_tuple_utc(timestamp):
dt_obj = datetime.datetime.fromtimestamp(timestamp, timezone.utc)
return dt_obj, dt_obj.timetuple()
def get_timestamp(dt_str, format_code='%Y-%m-%d_%H:%M'):
dto = datetime.datetime.strptime(dt_str, format_code).replace(tzinfo=timezone.utc)
ts = dto.timestamp()
return ts
def add_time_range_to_filename(pathname, tstart, tend):
dt_obj, _ = get_time_tuple_utc(tstart)
str_start = dt_obj.strftime('%Y%m%d%H')
dt_obj, _ = get_time_tuple_utc(tend)
str_end = dt_obj.strftime('%Y%m%d%H')
filename = os.path.split(pathname)[1]
w_o_ext, ext = os.path.splitext(filename)
filename = w_o_ext+'_'+str_start+'_'+str_end+ext
path = os.path.split(pathname)[0]
path = path+'/'+filename
return path
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def haversine_np(lon1, lat1, lon2, lat2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
(lon1, lat1) must be broadcastable with (lon2, lat2).
"""
lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
c = 2 * np.arcsin(np.sqrt(a))
km = 6367 * c
return km
def bin_data_by(a, b, bin_ranges):
nbins = len(bin_ranges)
binned_data = []
for i in range(nbins):
rng = bin_ranges[i]
idxs = (b >= rng[0]) & (b < rng[1])
binned_data.append(a[idxs])
return binned_data
def get_bin_ranges(lop, hip, bin_size=100):
bin_ranges = []
delp = hip - lop
nbins = int(delp/bin_size)
for i in range(nbins):
rng = [lop + i*bin_size, lop + i*bin_size + bin_size]
bin_ranges.append(rng)
return bin_ranges
# t must be monotonic increasing
def get_breaks(t, threshold):
t_0 = t[0:t.shape[0]-1]
t_1 = t[1:t.shape[0]]
d = t_1 - t_0
idxs = np.nonzero(d > threshold)
return idxs
def pressure_to_altitude(pres, temp, prof_pres, prof_temp, sfc_pres=None, sfc_temp=None, sfc_elev=0):
raise GenericException("target pressure profile must be monotonic increasing")
raise GenericException("target pressure less than top of pressure profile")
if temp is None:
temp = np.interp(pres, prof_pres, prof_temp)
i_top = np.argmax(np.extract(prof_pres <= pres, prof_pres)) + 1
pres_s = prof_pres.tolist()
temp_s = prof_temp.tolist()
pres_s = [pres] + pres_s[i_top:]
temp_s = [temp] + temp_s[i_top:]
return -1
prof_pres = np.array(pres_s)
prof_temp = np.array(temp_s)
i_bot = prof_pres.shape[0] - 1
pres_s = pres_s + [sfc_pres]
temp_s = temp_s + [sfc_temp]
else:
idx = np.argmax(np.extract(prof_pres < sfc_pres, prof_pres))
if sfc_temp is None:
sfc_temp = np.interp(sfc_pres, prof_pres, prof_temp)
pres_s = prof_pres.tolist()
temp_s = prof_temp.tolist()
pres_s = pres_s[0:idx+1] + [sfc_pres]
temp_s = temp_s[0:idx+1] + [sfc_temp]
prof_pres = np.array(pres_s)
prof_temp = np.array(temp_s)
prof_pres = prof_pres[::-1]
prof_temp = prof_temp[::-1]
prof_pres = prof_pres * units.hectopascal
prof_temp = prof_temp * units.kelvin
sfc_elev = sfc_elev * units.meter
z = thickness_hydrostatic(prof_pres, prof_temp) + sfc_elev
return z
# http://fourier.eng.hmc.edu/e176/lectures/NM/node25.html
def minimize_quadratic(xa, xb, xc, ya, yb, yc):
x_m = xb + 0.5*(((ya-yb)*(xc-xb)*(xc-xb) - (yc-yb)*(xb-xa)*(xb-xa)) / ((ya-yb)*(xc-xb) + (yc-yb)*(xb-xa)))
return x_m
def value_to_index(nda, value):
diff = np.abs(nda - value)
idx = np.argmin(diff)
for k, v in enumerate(value_s):
above = v >= nda
if not above.any():
continue
# array solzen must be degrees, missing values must NaN. For small roughly 50x50km regions only
def is_day(solzen, test_angle=80.0):
solzen = solzen.flatten()
solzen = solzen[np.invert(np.isnan(solzen))]
if len(solzen) == 0 or np.sum(solzen <= test_angle) < len(solzen):
# array solzen must be degrees, missing values must NaN. For small roughly 50x50km regions only
solzen = solzen.flatten()
solzen = solzen[np.invert(np.isnan(solzen))]
if len(solzen) == 0 or np.sum(solzen >= test_angle) < len(solzen):
def check_oblique(satzen, test_angle=70.0):
satzen = satzen.flatten()
satzen = satzen[np.invert(np.isnan(satzen))]
if len(satzen) == 0 or np.sum(satzen <= test_angle) < len(satzen):
return False
else:
return True
def get_grid_values_all(h5f, grid_name, scale_factor_name='scale_factor', add_offset_name='add_offset',
fill_value_name='_FillValue', range_name='actual_range', fill_value=None):
hfds = h5f[grid_name]
attrs = hfds.attrs
if attrs is None:
raise GenericException('No attributes object for: '+grid_name)
grd_vals = hfds[:,]
if fill_value is not None:
grd_vals = np.where(grd_vals == fill_value, np.nan, grd_vals)
attr = attrs.get(scale_factor_name)
if attr is None:
raise GenericException('Attribute: '+scale_factor_name+' not found for variable: '+grid_name)
scale_factor = attr[0]
grd_vals = grd_vals * scale_factor
if add_offset_name is not None:
attr = attrs.get(add_offset_name)
if attr is None:
raise GenericException('Attribute: '+add_offset_name+' not found for variable: '+grid_name)
add_offset = attr[0]
if range_name is not None:
attr = attrs.get(range_name)
if attr is None:
raise GenericException('Attribute: '+range_name+' not found for variable: '+grid_name)
low = attr[0]
high = attr[1]
grd_vals = np.where(grd_vals < low, np.nan, grd_vals)
grd_vals = np.where(grd_vals > high, np.nan, grd_vals)
elif fill_value_name is not None:
attr = attrs.get(fill_value_name)
if attr is None:
raise GenericException('Attribute: '+fill_value_name+' not found for variable: '+grid_name)
fill_value = attr[0]
grd_vals = np.where(grd_vals == fill_value, np.nan, grd_vals)
# dt_str_0: start datetime string in format YYYY-MM-DD_HH:MM
# dt_str_1: stop datetime string, if not None num_steps is computed
# return num_steps+1 lists of datetime strings and timestamps (edges of a numpy histogram)
def make_times(dt_str_0, dt_str_1=None, format_code='%Y-%m-%d_%H:%M', num_steps=None, days=None, hours=None, minutes=None, seconds=None):
if days is not None:
inc = 86400*days
elif hours is not None:
inc = 3600*hours
elif minutes is not None:
inc = 60*minutes
else:
inc = seconds
dt_obj_s = []
ts_s = []
dto_0 = datetime.datetime.strptime(dt_str_0, format_code).replace(tzinfo=timezone.utc)
dto_1 = datetime.datetime.strptime(dt_str_1, format_code).replace(tzinfo=timezone.utc)
dt_obj_s.append(dto_0)
ts_s.append(ts_0)
dto_last = dto_0
for k in range(num_steps):
dt_obj = dto_last + datetime.timedelta(seconds=inc)
dt_obj_s.append(dt_obj)
ts_s.append(dt_obj.timestamp())
dto_last = dt_obj
return dt_obj_s, ts_s
def make_histogram(values, edges):
h = np.histogram(values, bins=edges)
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
return h
def normalize(data, param, mean_std_dict, add_noise=False, noise_scale=1.0, seed=None):
if mean_std_dict.get(param) is None:
return data
shape = data.shape
data = data.flatten()
mean, std, lo, hi = mean_std_dict.get(param)
data -= mean
data /= std
if add_noise:
if seed is not None:
np.random.seed(seed)
rnd = np.random.normal(loc=0, scale=noise_scale, size=data.size)
data += rnd
not_valid = np.isnan(data)
data[not_valid] = 0
data = np.reshape(data, shape)
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
return data
# ------------ This code will not be needed when we implement a Fully Connected CNN -----------------------------------
# Example GOES file to retrieve GEOS parameters in MetPy form (CONUS)
exmp_file_conus = '/Users/tomrink/data/OR_ABI-L1b-RadC-M6C14_G16_s20193140811215_e20193140813588_c20193140814070.nc'
# Full Disk
exmp_file_fd = '/Users/tomrink/data/OR_ABI-L1b-RadF-M6C16_G16_s20212521800223_e20212521809542_c20212521809596.nc'
def make_for_full_domain_predict(clvrx_file, name_list=None, domain='FD'):
w_x = 16
w_y = 16
if domain == 'CONUS':
exmpl_ds = xr.open_dataset(exmp_file_conus)
elif domain == 'FD':
exmpl_ds = xr.open_dataset(exmp_file_fd)
mdat = exmpl_ds.metpy.parse_cf('Rad')
geos = mdat.metpy.cartopy_crs
xlen = mdat.x.values.size
ylen = mdat.y.values.size
exmpl_ds.close()
h5f = h5py.File(clvrx_file, 'r')
grd_dct = {name: None for name in name_list}
cnt_a = 0
for didx, ds_name in enumerate(name_list):
gvals = get_grid_values_all(h5f, ds_name)
if gvals is not None:
grd_dct[ds_name] = gvals
cnt_a += 1
if cnt_a > 0 and cnt_a != len(name_list):
raise GenericException('weirdness')
grd_dct_n = {name: [] for name in name_list}
n_x = int(xlen/w_x)
n_y = int(ylen/w_y)
i_0 = 0
j_0 = 0
cc = []
ll = []
for didx, ds_name in enumerate(name_list):
for j in range(4, n_y-4, 1):
j_ul = j_0 + j * w_y
for i in range(4, n_x-4, 1):
i_ul = i_0 + i * w_x
if didx == 0:
ll.append(j_ul)
cc.append(i_ul)
grd_dct_n[ds_name].append(grd_dct[ds_name][j_ul:j_ul+w_y, i_ul:i_ul+w_x])
grd_dct = {name: None for name in name_list}
for didx, ds_name in enumerate(name_list):
grd_dct[ds_name] = np.stack(grd_dct_n[ds_name])
h5f.close()
return grd_dct, ll, cc
# -------------------------------------------------------------------------------------------