Newer
Older
from metpy.units import units
from metpy.calc import thickness_hydrostatic
LatLonTuple = namedtuple('LatLonTuple', ['lat', 'lon'])
class EarlyStop:
def __init__(self, window_length=3, patience=5):
self.patience = patience
self.cnt = 0
self.cnt_wait = 0
self.window = np.zeros(window_length, dtype=np.single)
self.window.fill(np.nan)
def check_stop(self, value):
self.window[:-1] = self.window[1:]
self.window[-1] = value
if np.any(np.isnan(self.window)):
return False
ave = np.mean(self.window)
if ave < self.min:
self.min = ave
self.cnt_wait = 0
return False
else:
self.cnt_wait += 1
if self.cnt_wait > self.patience:
return True
else:
return False
def get_time_tuple_utc(timestamp):
dt_obj = datetime.datetime.fromtimestamp(timestamp, timezone.utc)
return dt_obj, dt_obj.timetuple()
def get_timestamp(dt_str, format_code='%Y-%m-%d_%H:%M'):
dto = datetime.datetime.strptime(dt_str, format_code).replace(tzinfo=timezone.utc)
ts = dto.timestamp()
return ts
def add_time_range_to_filename(pathname, tstart, tend):
dt_obj, _ = get_time_tuple_utc(tstart)
str_start = dt_obj.strftime('%Y%m%d%H')
dt_obj, _ = get_time_tuple_utc(tend)
str_end = dt_obj.strftime('%Y%m%d%H')
filename = os.path.split(pathname)[1]
w_o_ext, ext = os.path.splitext(filename)
filename = w_o_ext+'_'+str_start+'_'+str_end+ext
path = os.path.split(pathname)[0]
path = path+'/'+filename
return path
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def haversine_np(lon1, lat1, lon2, lat2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
(lon1, lat1) must be broadcastable with (lon2, lat2).
"""
lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
c = 2 * np.arcsin(np.sqrt(a))
km = 6367 * c
return km
def bin_data_by(a, b, bin_ranges):
nbins = len(bin_ranges)
binned_data = []
for i in range(nbins):
rng = bin_ranges[i]
idxs = (b >= rng[0]) & (b < rng[1])
binned_data.append(a[idxs])
return binned_data
def get_bin_ranges(lop, hip, bin_size=100):
bin_ranges = []
delp = hip - lop
nbins = int(delp/bin_size)
for i in range(nbins):
rng = [lop + i*bin_size, lop + i*bin_size + bin_size]
bin_ranges.append(rng)
return bin_ranges
# t must be monotonic increasing
def get_breaks(t, threshold):
t_0 = t[0:t.shape[0]-1]
t_1 = t[1:t.shape[0]]
d = t_1 - t_0
idxs = np.nonzero(d > threshold)
return idxs
def pressure_to_altitude(pres, temp, prof_pres, prof_temp, sfc_pres=None, sfc_temp=None, sfc_elev=0):
raise GenericException("target pressure profile must be monotonic increasing")
raise GenericException("target pressure less than top of pressure profile")
if temp is None:
temp = np.interp(pres, prof_pres, prof_temp)
i_top = np.argmax(np.extract(prof_pres <= pres, prof_pres)) + 1
pres_s = prof_pres.tolist()
temp_s = prof_temp.tolist()
pres_s = [pres] + pres_s[i_top:]
temp_s = [temp] + temp_s[i_top:]
return -1
prof_pres = np.array(pres_s)
prof_temp = np.array(temp_s)
i_bot = prof_pres.shape[0] - 1
pres_s = pres_s + [sfc_pres]
temp_s = temp_s + [sfc_temp]
else:
idx = np.argmax(np.extract(prof_pres < sfc_pres, prof_pres))
if sfc_temp is None:
sfc_temp = np.interp(sfc_pres, prof_pres, prof_temp)
pres_s = prof_pres.tolist()
temp_s = prof_temp.tolist()
pres_s = pres_s[0:idx+1] + [sfc_pres]
temp_s = temp_s[0:idx+1] + [sfc_temp]
prof_pres = np.array(pres_s)
prof_temp = np.array(temp_s)
prof_pres = prof_pres[::-1]
prof_temp = prof_temp[::-1]
prof_pres = prof_pres * units.hectopascal
prof_temp = prof_temp * units.kelvin
sfc_elev = sfc_elev * units.meter
z = thickness_hydrostatic(prof_pres, prof_temp) + sfc_elev
return z
# http://fourier.eng.hmc.edu/e176/lectures/NM/node25.html
def minimize_quadratic(xa, xb, xc, ya, yb, yc):
x_m = xb + 0.5*(((ya-yb)*(xc-xb)*(xc-xb) - (yc-yb)*(xb-xa)*(xb-xa)) / ((ya-yb)*(xc-xb) + (yc-yb)*(xb-xa)))
return x_m
def value_to_index(nda, value):
diff = np.abs(nda - value)
idx = np.argmin(diff)
for k, v in enumerate(value_s):
above = v >= nda
if not above.any():
continue
# array solzen must be degrees, missing values must NaN. For small roughly 50x50km regions only
def is_day(solzen, test_angle=80.0):
solzen = solzen.flatten()
solzen = solzen[np.invert(np.isnan(solzen))]
if len(solzen) == 0 or np.sum(solzen <= test_angle) < len(solzen):
# array solzen must be degrees, missing values must NaN. For small roughly 50x50km regions only
solzen = solzen.flatten()
solzen = solzen[np.invert(np.isnan(solzen))]
if len(solzen) == 0 or np.sum(solzen >= test_angle) < len(solzen):
def check_oblique(satzen, test_angle=70.0):
satzen = satzen.flatten()
satzen = satzen[np.invert(np.isnan(satzen))]
if len(satzen) == 0 or np.sum(satzen <= test_angle) < len(satzen):
return False
else:
return True
def get_grid_values_all(h5f, grid_name, scale_factor_name='scale_factor', add_offset_name='add_offset',
fill_value_name='_FillValue', range_name='actual_range', fill_value=None):
hfds = h5f[grid_name]
attrs = hfds.attrs
if attrs is None:
raise GenericException('No attributes object for: '+grid_name)
grd_vals = hfds[:,]
if fill_value is not None:
grd_vals = np.where(grd_vals == fill_value, np.nan, grd_vals)
attr = attrs.get(scale_factor_name)
if attr is None:
raise GenericException('Attribute: '+scale_factor_name+' not found for variable: '+grid_name)
scale_factor = attr[0]
grd_vals = grd_vals * scale_factor
if add_offset_name is not None:
attr = attrs.get(add_offset_name)
if attr is None:
raise GenericException('Attribute: '+add_offset_name+' not found for variable: '+grid_name)
add_offset = attr[0]
if range_name is not None:
attr = attrs.get(range_name)
if attr is None:
raise GenericException('Attribute: '+range_name+' not found for variable: '+grid_name)
low = attr[0]
high = attr[1]
grd_vals = np.where(grd_vals < low, np.nan, grd_vals)
grd_vals = np.where(grd_vals > high, np.nan, grd_vals)
elif fill_value_name is not None:
attr = attrs.get(fill_value_name)
if attr is None:
raise GenericException('Attribute: '+fill_value_name+' not found for variable: '+grid_name)
fill_value = attr[0]
grd_vals = np.where(grd_vals == fill_value, np.nan, grd_vals)
# dt_str_0: start datetime string in format YYYY-MM-DD_HH:MM
# dt_str_1: stop datetime string, if not None num_steps is computed
# return num_steps+1 lists of datetime strings and timestamps (edges of a numpy histogram)
def make_times(dt_str_0, dt_str_1=None, format_code='%Y-%m-%d_%H:%M', num_steps=None, days=None, hours=None, minutes=None, seconds=None):
if days is not None:
inc = 86400*days
elif hours is not None:
inc = 3600*hours
elif minutes is not None:
inc = 60*minutes
else:
inc = seconds
dt_obj_s = []
ts_s = []
dto_0 = datetime.datetime.strptime(dt_str_0, format_code).replace(tzinfo=timezone.utc)
dto_1 = datetime.datetime.strptime(dt_str_1, format_code).replace(tzinfo=timezone.utc)
dt_obj_s.append(dto_0)
ts_s.append(ts_0)
dto_last = dto_0
for k in range(num_steps):
dt_obj = dto_last + datetime.timedelta(seconds=inc)
dt_obj_s.append(dt_obj)
ts_s.append(dt_obj.timestamp())
dto_last = dt_obj
return dt_obj_s, ts_s
def make_histogram(values, edges):
h = np.histogram(values, bins=edges)
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
return h
def normalize(data, param, mean_std_dict, add_noise=False, noise_scale=1.0, seed=None):
if mean_std_dict.get(param) is None:
return data
shape = data.shape
data = data.flatten()
mean, std, lo, hi = mean_std_dict.get(param)
data -= mean
data /= std
if add_noise:
if seed is not None:
np.random.seed(seed)
rnd = np.random.normal(loc=0, scale=noise_scale, size=data.size)
data += rnd
not_valid = np.isnan(data)
data[not_valid] = 0
data = np.reshape(data, shape)
f = open('/Users/tomrink/geos_crs_goes16_FD.pkl', 'rb')
geos_goes16_fd = pickle.load(f)
f.close()
f = open('/Users/tomrink/geos_crs_goes16_CONUS.pkl', 'rb')
geos_goes16_conus = pickle.load(f)
f.close()
f = open('/Users/tomrink/geos_crs_H08_FD.pkl', 'rb')
geos_h08_fd = pickle.load(f)
f.close()
def get_cartopy_crs(satellite, domain):
if satellite == 'GOES16':
if domain == 'FD':
geos = geos_goes16_fd
xlen = 5424
xmin = -5433893.0
ylen = 5424
ymin = -5433893.0
ymax = 5433893.0
elif domain == 'CONUS':
geos = geos_goes16_conus
xlen = 2500
xmin = -3626269.5
xmax = 1381770.0
ylen = 1500
ymin = 1584175.9
ymax = 4588198.0
elif satellite == 'H08':
geos = geos_h08_fd
xlen = 5500
xmin = -5498.99990119
xmax = 5498.99990119
ylen = 5500
ymin = -5498.99990119
ymax = 5498.99990119
return geos, xlen, xmin, xmax, ylen, ymin, ymax
# ------------ This code will not be needed when we implement a Fully Connected CNN -----------------------------------
# Example GOES file to retrieve GEOS parameters in MetPy form (CONUS)
exmp_file_conus = '/Users/tomrink/data/OR_ABI-L1b-RadC-M6C14_G16_s20193140811215_e20193140813588_c20193140814070.nc'
# Full Disk
exmp_file_fd = '/Users/tomrink/data/OR_ABI-L1b-RadF-M6C16_G16_s20212521800223_e20212521809542_c20212521809596.nc'
def make_for_full_domain_predict(clvrx_file, name_list=None, satellite='GOES16', domain='FD'):
# if domain == 'CONUS':
# exmpl_ds = xr.open_dataset(exmp_file_conus)
# elif domain == 'FD':
# exmpl_ds = xr.open_dataset(exmp_file_fd)
# mdat = exmpl_ds.metpy.parse_cf('Rad')
# geos = mdat.metpy.cartopy_crs
# xlen = mdat.x.values.size
# ylen = mdat.y.values.size
# exmpl_ds.close()
geos, xlen, xmin, xmax, ylen, ymin, ymax = get_cartopy_crs(satellite, domain)
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
h5f = h5py.File(clvrx_file, 'r')
grd_dct = {name: None for name in name_list}
cnt_a = 0
for didx, ds_name in enumerate(name_list):
gvals = get_grid_values_all(h5f, ds_name)
if gvals is not None:
grd_dct[ds_name] = gvals
cnt_a += 1
if cnt_a > 0 and cnt_a != len(name_list):
raise GenericException('weirdness')
grd_dct_n = {name: [] for name in name_list}
n_x = int(xlen/w_x)
n_y = int(ylen/w_y)
i_0 = 0
j_0 = 0
cc = []
ll = []
for didx, ds_name in enumerate(name_list):
for j in range(4, n_y-4, 1):
j_ul = j_0 + j * w_y
for i in range(4, n_x-4, 1):
i_ul = i_0 + i * w_x
if didx == 0:
ll.append(j_ul)
cc.append(i_ul)
grd_dct_n[ds_name].append(grd_dct[ds_name][j_ul:j_ul+w_y, i_ul:i_ul+w_x])
grd_dct = {name: None for name in name_list}
for didx, ds_name in enumerate(name_list):
grd_dct[ds_name] = np.stack(grd_dct_n[ds_name])
h5f.close()
return grd_dct, ll, cc
# -------------------------------------------------------------------------------------------