import metpy import numpy as np import xarray as xr import datetime from datetime import timezone from metpy.units import units from metpy.calc import thickness_hydrostatic from collections import namedtuple import os import h5py import pickle LatLonTuple = namedtuple('LatLonTuple', ['lat', 'lon']) homedir = os.path.expanduser('~') + '/' class GenericException(Exception): def __init__(self, message): self.message = message class EarlyStop: def __init__(self, window_length=3, patience=5): self.patience = patience self.min = np.finfo(np.single).max self.cnt = 0 self.cnt_wait = 0 self.window = np.zeros(window_length, dtype=np.single) self.window.fill(np.nan) def check_stop(self, value): self.window[:-1] = self.window[1:] self.window[-1] = value if np.any(np.isnan(self.window)): return False ave = np.mean(self.window) if ave < self.min: self.min = ave self.cnt_wait = 0 return False else: self.cnt_wait += 1 if self.cnt_wait > self.patience: return True else: return False def get_time_tuple_utc(timestamp): dt_obj = datetime.datetime.fromtimestamp(timestamp, timezone.utc) return dt_obj, dt_obj.timetuple() def get_timestamp(dt_str, format_code='%Y-%m-%d_%H:%M'): dto = datetime.datetime.strptime(dt_str, format_code).replace(tzinfo=timezone.utc) ts = dto.timestamp() return ts def add_time_range_to_filename(pathname, tstart, tend): dt_obj, _ = get_time_tuple_utc(tstart) str_start = dt_obj.strftime('%Y%m%d%H') dt_obj, _ = get_time_tuple_utc(tend) str_end = dt_obj.strftime('%Y%m%d%H') filename = os.path.split(pathname)[1] w_o_ext, ext = os.path.splitext(filename) filename = w_o_ext+'_'+str_start+'_'+str_end+ext path = os.path.split(pathname)[0] path = path+'/'+filename return path def haversine_np(lon1, lat1, lon2, lat2): """ Calculate the great circle distance between two points on the earth (specified in decimal degrees) (lon1, lat1) must be broadcastable with (lon2, lat2). """ lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2]) dlon = lon2 - lon1 dlat = lat2 - lat1 a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2 c = 2 * np.arcsin(np.sqrt(a)) km = 6367 * c return km def bin_data_by(a, b, bin_ranges): nbins = len(bin_ranges) binned_data = [] for i in range(nbins): rng = bin_ranges[i] idxs = (b >= rng[0]) & (b < rng[1]) binned_data.append(a[idxs]) return binned_data def get_bin_ranges(lop, hip, bin_size=100): bin_ranges = [] delp = hip - lop nbins = int(delp/bin_size) for i in range(nbins): rng = [lop + i*bin_size, lop + i*bin_size + bin_size] bin_ranges.append(rng) return bin_ranges # t must be monotonic increasing def get_breaks(t, threshold): t_0 = t[0:t.shape[0]-1] t_1 = t[1:t.shape[0]] d = t_1 - t_0 idxs = np.nonzero(d > threshold) return idxs def pressure_to_altitude(pres, temp, prof_pres, prof_temp, sfc_pres=None, sfc_temp=None, sfc_elev=0): if not np.all(np.diff(prof_pres) > 0): raise GenericException("target pressure profile must be monotonic increasing") if pres < prof_pres[0]: raise GenericException("target pressure less than top of pressure profile") if temp is None: temp = np.interp(pres, prof_pres, prof_temp) i_top = np.argmax(np.extract(prof_pres <= pres, prof_pres)) + 1 pres_s = prof_pres.tolist() temp_s = prof_temp.tolist() pres_s = [pres] + pres_s[i_top:] temp_s = [temp] + temp_s[i_top:] if sfc_pres is not None: if pres > sfc_pres: # incoming pressure below surface return -1 prof_pres = np.array(pres_s) prof_temp = np.array(temp_s) i_bot = prof_pres.shape[0] - 1 if sfc_pres > prof_pres[i_bot]: # surface below profile bottom pres_s = pres_s + [sfc_pres] temp_s = temp_s + [sfc_temp] else: idx = np.argmax(np.extract(prof_pres < sfc_pres, prof_pres)) if sfc_temp is None: sfc_temp = np.interp(sfc_pres, prof_pres, prof_temp) pres_s = prof_pres.tolist() temp_s = prof_temp.tolist() pres_s = pres_s[0:idx+1] + [sfc_pres] temp_s = temp_s[0:idx+1] + [sfc_temp] prof_pres = np.array(pres_s) prof_temp = np.array(temp_s) prof_pres = prof_pres[::-1] prof_temp = prof_temp[::-1] prof_pres = prof_pres * units.hectopascal prof_temp = prof_temp * units.kelvin sfc_elev = sfc_elev * units.meter z = thickness_hydrostatic(prof_pres, prof_temp) + sfc_elev return z # http://fourier.eng.hmc.edu/e176/lectures/NM/node25.html def minimize_quadratic(xa, xb, xc, ya, yb, yc): x_m = xb + 0.5*(((ya-yb)*(xc-xb)*(xc-xb) - (yc-yb)*(xb-xa)*(xb-xa)) / ((ya-yb)*(xc-xb) + (yc-yb)*(xb-xa))) return x_m def value_to_index(nda, value): diff = np.abs(nda - value) idx = np.argmin(diff) return idx def find_bin_index(nda, value_s): idxs = np.arange(nda.shape[0]) iL_s = np.zeros(value_s.shape[0]) iL_s[:,] = -1 for k, v in enumerate(value_s): above = v >= nda if not above.any(): continue below = v < nda if not below.any(): continue iL = idxs[above].max() iL_s[k] = iL return iL_s.astype(np.int32) # array solzen must be degrees, missing values must NaN. For small roughly 50x50km regions only def is_day(solzen, test_angle=80.0): solzen = solzen.flatten() solzen = solzen[np.invert(np.isnan(solzen))] if len(solzen) == 0 or np.sum(solzen <= test_angle) < len(solzen): return False else: return True # array solzen must be degrees, missing values must NaN. For small roughly 50x50km regions only def is_night(solzen, test_angle=100.0): solzen = solzen.flatten() solzen = solzen[np.invert(np.isnan(solzen))] if len(solzen) == 0 or np.sum(solzen >= test_angle) < len(solzen): return False else: return True def check_oblique(satzen, test_angle=70.0): satzen = satzen.flatten() satzen = satzen[np.invert(np.isnan(satzen))] if len(satzen) == 0 or np.sum(satzen <= test_angle) < len(satzen): return False else: return True def get_grid_values_all(h5f, grid_name, scale_factor_name='scale_factor', add_offset_name='add_offset', fill_value_name='_FillValue', range_name='actual_range', fill_value=None): hfds = h5f[grid_name] attrs = hfds.attrs if attrs is None: raise GenericException('No attributes object for: '+grid_name) grd_vals = hfds[:,] if fill_value is not None: grd_vals = np.where(grd_vals == fill_value, np.nan, grd_vals) if scale_factor_name is not None: attr = attrs.get(scale_factor_name) if attr is None: raise GenericException('Attribute: '+scale_factor_name+' not found for variable: '+grid_name) scale_factor = attr[0] grd_vals = grd_vals * scale_factor if add_offset_name is not None: attr = attrs.get(add_offset_name) if attr is None: raise GenericException('Attribute: '+add_offset_name+' not found for variable: '+grid_name) add_offset = attr[0] grd_vals = grd_vals + add_offset if range_name is not None: attr = attrs.get(range_name) if attr is None: raise GenericException('Attribute: '+range_name+' not found for variable: '+grid_name) low = attr[0] high = attr[1] grd_vals = np.where(grd_vals < low, np.nan, grd_vals) grd_vals = np.where(grd_vals > high, np.nan, grd_vals) elif fill_value_name is not None: attr = attrs.get(fill_value_name) if attr is None: raise GenericException('Attribute: '+fill_value_name+' not found for variable: '+grid_name) fill_value = attr[0] grd_vals = np.where(grd_vals == fill_value, np.nan, grd_vals) return grd_vals # dt_str_0: start datetime string in format YYYY-MM-DD_HH:MM # dt_str_1: stop datetime string, if not None num_steps is computed # format_code: default '%Y-%m-%d_%H:%M' # num_steps with increment of days, hours, minutes or seconds # dt_str_1 and num_steps cannot both be None # return num_steps+1 lists of datetime strings and timestamps (edges of a numpy histogram) def make_times(dt_str_0, dt_str_1=None, format_code='%Y-%m-%d_%H:%M', num_steps=None, days=None, hours=None, minutes=None, seconds=None): if days is not None: inc = 86400*days elif hours is not None: inc = 3600*hours elif minutes is not None: inc = 60*minutes else: inc = seconds dt_obj_s = [] ts_s = [] dto_0 = datetime.datetime.strptime(dt_str_0, format_code).replace(tzinfo=timezone.utc) ts_0 = dto_0.timestamp() if dt_str_1 is not None: dto_1 = datetime.datetime.strptime(dt_str_1, format_code).replace(tzinfo=timezone.utc) ts_1 = dto_1.timestamp() num_steps = int((ts_1 - ts_0)/inc) dt_obj_s.append(dto_0) ts_s.append(ts_0) dto_last = dto_0 for k in range(num_steps): dt_obj = dto_last + datetime.timedelta(seconds=inc) dt_obj_s.append(dt_obj) ts_s.append(dt_obj.timestamp()) dto_last = dt_obj return dt_obj_s, ts_s def make_histogram(values, edges): h = np.histogram(values, bins=edges) return h def normalize(data, param, mean_std_dict, add_noise=False, noise_scale=1.0, seed=None): if mean_std_dict.get(param) is None: return data shape = data.shape data = data.flatten() mean, std, lo, hi = mean_std_dict.get(param) data -= mean data /= std if add_noise: if seed is not None: np.random.seed(seed) rnd = np.random.normal(loc=0, scale=noise_scale, size=data.size) data += rnd not_valid = np.isnan(data) data[not_valid] = 0 data = np.reshape(data, shape) return data f = open(homedir+'geos_crs_goes16_FD.pkl', 'rb') geos_goes16_fd = pickle.load(f) f.close() f = open(homedir+'geos_crs_goes16_CONUS.pkl', 'rb') geos_goes16_conus = pickle.load(f) f.close() f = open(homedir+'geos_crs_H08_FD.pkl', 'rb') geos_h08_fd = pickle.load(f) f.close() def get_cartopy_crs(satellite, domain): if satellite == 'GOES16': if domain == 'FD': geos = geos_goes16_fd xlen = 5424 xmin = -5433893.0 xmax = 5433893.0 ylen = 5424 ymin = -5433893.0 ymax = 5433893.0 elif domain == 'CONUS': geos = geos_goes16_conus xlen = 2500 xmin = -3626269.5 xmax = 1381770.0 ylen = 1500 ymin = 1584175.9 ymax = 4588198.0 elif satellite == 'H08': geos = geos_h08_fd xlen = 5500 xmin = -5498.99990119 xmax = 5498.99990119 ylen = 5500 ymin = -5498.99990119 ymax = 5498.99990119 return geos, xlen, xmin, xmax, ylen, ymin, ymax # ------------ This code will not be needed when we implement a Fully Connected CNN ----------------------------------- # Example GOES file to retrieve GEOS parameters in MetPy form (CONUS) exmp_file_conus = '/Users/tomrink/data/OR_ABI-L1b-RadC-M6C14_G16_s20193140811215_e20193140813588_c20193140814070.nc' # Full Disk exmp_file_fd = '/Users/tomrink/data/OR_ABI-L1b-RadF-M6C16_G16_s20212521800223_e20212521809542_c20212521809596.nc' def make_for_full_domain_predict(h5f, name_list=None, satellite='GOES16', domain='FD'): w_x = 16 w_y = 16 # keep for reference # if domain == 'CONUS': # exmpl_ds = xr.open_dataset(exmp_file_conus) # elif domain == 'FD': # exmpl_ds = xr.open_dataset(exmp_file_fd) # mdat = exmpl_ds.metpy.parse_cf('Rad') # geos = mdat.metpy.cartopy_crs # xlen = mdat.x.values.size # ylen = mdat.y.values.size # exmpl_ds.close() geos, xlen, xmin, xmax, ylen, ymin, ymax = get_cartopy_crs(satellite, domain) #h5f = h5py.File(clvrx_file, 'r') grd_dct = {name: None for name in name_list} cnt_a = 0 for didx, ds_name in enumerate(name_list): gvals = get_grid_values_all(h5f, ds_name) if gvals is not None: grd_dct[ds_name] = gvals cnt_a += 1 if cnt_a > 0 and cnt_a != len(name_list): raise GenericException('weirdness') grd_dct_n = {name: [] for name in name_list} n_x = int(xlen/w_x) n_y = int(ylen/w_y) i_0 = 0 j_0 = 0 cc = [] ll = [] for didx, ds_name in enumerate(name_list): for j in range(4, n_y-4, 1): j_ul = j_0 + j * w_y for i in range(4, n_x-4, 1): i_ul = i_0 + i * w_x if didx == 0: ll.append(j_ul) cc.append(i_ul) grd_dct_n[ds_name].append(grd_dct[ds_name][j_ul:j_ul+w_y, i_ul:i_ul+w_x]) grd_dct = {name: None for name in name_list} for didx, ds_name in enumerate(name_list): grd_dct[ds_name] = np.stack(grd_dct_n[ds_name]) #h5f.close() return grd_dct, ll, cc # -------------------------------------------------------------------------------------------