util.py 16.51 KiB
import metpy
import numpy as np
import xarray as xr
import datetime
from datetime import timezone
from metpy.units import units
from metpy.calc import thickness_hydrostatic
from collections import namedtuple
import os
import h5py
import pickle
LatLonTuple = namedtuple('LatLonTuple', ['lat', 'lon'])
homedir = os.path.expanduser('~') + '/'
class GenericException(Exception):
def __init__(self, message):
self.message = message
class EarlyStop:
def __init__(self, window_length=3, patience=5):
self.patience = patience
self.min = np.finfo(np.single).max
self.cnt = 0
self.cnt_wait = 0
self.window = np.zeros(window_length, dtype=np.single)
self.window.fill(np.nan)
def check_stop(self, value):
self.window[:-1] = self.window[1:]
self.window[-1] = value
if np.any(np.isnan(self.window)):
return False
ave = np.mean(self.window)
if ave < self.min:
self.min = ave
self.cnt_wait = 0
return False
else:
self.cnt_wait += 1
if self.cnt_wait > self.patience:
return True
else:
return False
def get_time_tuple_utc(timestamp):
dt_obj = datetime.datetime.fromtimestamp(timestamp, timezone.utc)
return dt_obj, dt_obj.timetuple()
def get_timestamp(dt_str, format_code='%Y-%m-%d_%H:%M'):
dto = datetime.datetime.strptime(dt_str, format_code).replace(tzinfo=timezone.utc)
ts = dto.timestamp()
return ts
def add_time_range_to_filename(pathname, tstart, tend):
dt_obj, _ = get_time_tuple_utc(tstart)
str_start = dt_obj.strftime('%Y%m%d%H')
dt_obj, _ = get_time_tuple_utc(tend)
str_end = dt_obj.strftime('%Y%m%d%H')
filename = os.path.split(pathname)[1]
w_o_ext, ext = os.path.splitext(filename)
filename = w_o_ext+'_'+str_start+'_'+str_end+ext
path = os.path.split(pathname)[0]
path = path+'/'+filename
return path
def haversine_np(lon1, lat1, lon2, lat2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
(lon1, lat1) must be broadcastable with (lon2, lat2).
"""
lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
dlon = lon2 - lon1
dlat = lat2 - lat1
a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2
c = 2 * np.arcsin(np.sqrt(a))
km = 6367 * c
return km
def bin_data_by(a, b, bin_ranges):
nbins = len(bin_ranges)
binned_data = []
for i in range(nbins):
rng = bin_ranges[i]
idxs = (b >= rng[0]) & (b < rng[1])
binned_data.append(a[idxs])
return binned_data
def bin_data_by_edges(a, b, edges):
nbins = len(edges) - 1
binned_data = []
for i in range(nbins):
idxs = (b >= edges[i]) & (b < edges[i+1])
binned_data.append(a[idxs])
return binned_data
def get_bin_ranges(lop, hip, bin_size=100):
bin_ranges = []
delp = hip - lop
nbins = int(delp/bin_size)
for i in range(nbins):
rng = [lop + i*bin_size, lop + i*bin_size + bin_size]
bin_ranges.append(rng)
return bin_ranges
# t must be monotonic increasing
def get_breaks(t, threshold):
t_0 = t[0:t.shape[0]-1]
t_1 = t[1:t.shape[0]]
d = t_1 - t_0
idxs = np.nonzero(d > threshold)
return idxs
def pressure_to_altitude(pres, temp, prof_pres, prof_temp, sfc_pres=None, sfc_temp=None, sfc_elev=0):
if not np.all(np.diff(prof_pres) > 0):
raise GenericException("target pressure profile must be monotonic increasing")
if pres < prof_pres[0]:
raise GenericException("target pressure less than top of pressure profile")
if temp is None:
temp = np.interp(pres, prof_pres, prof_temp)
i_top = np.argmax(np.extract(prof_pres <= pres, prof_pres)) + 1
pres_s = prof_pres.tolist()
temp_s = prof_temp.tolist()
pres_s = [pres] + pres_s[i_top:]
temp_s = [temp] + temp_s[i_top:]
if sfc_pres is not None:
if pres > sfc_pres: # incoming pressure below surface
return -1
prof_pres = np.array(pres_s)
prof_temp = np.array(temp_s)
i_bot = prof_pres.shape[0] - 1
if sfc_pres > prof_pres[i_bot]: # surface below profile bottom
pres_s = pres_s + [sfc_pres]
temp_s = temp_s + [sfc_temp]
else:
idx = np.argmax(np.extract(prof_pres < sfc_pres, prof_pres))
if sfc_temp is None:
sfc_temp = np.interp(sfc_pres, prof_pres, prof_temp)
pres_s = prof_pres.tolist()
temp_s = prof_temp.tolist()
pres_s = pres_s[0:idx+1] + [sfc_pres]
temp_s = temp_s[0:idx+1] + [sfc_temp]
prof_pres = np.array(pres_s)
prof_temp = np.array(temp_s)
prof_pres = prof_pres[::-1]
prof_temp = prof_temp[::-1]
prof_pres = prof_pres * units.hectopascal
prof_temp = prof_temp * units.kelvin
sfc_elev = sfc_elev * units.meter
z = thickness_hydrostatic(prof_pres, prof_temp) + sfc_elev
return z
# http://fourier.eng.hmc.edu/e176/lectures/NM/node25.html
def minimize_quadratic(xa, xb, xc, ya, yb, yc):
x_m = xb + 0.5*(((ya-yb)*(xc-xb)*(xc-xb) - (yc-yb)*(xb-xa)*(xb-xa)) / ((ya-yb)*(xc-xb) + (yc-yb)*(xb-xa)))
return x_m
def value_to_index(nda, value):
diff = np.abs(nda - value)
idx = np.argmin(diff)
return idx
def find_bin_index(nda, value_s):
idxs = np.arange(nda.shape[0])
iL_s = np.zeros(value_s.shape[0])
iL_s[:,] = -1
for k, v in enumerate(value_s):
above = v >= nda
if not above.any():
continue
below = v < nda
if not below.any():
continue
iL = idxs[above].max()
iL_s[k] = iL
return iL_s.astype(np.int32)
# array solzen must be degrees, missing values must NaN. For small roughly 50x50km regions only
def is_day(solzen, test_angle=80.0):
solzen = solzen.flatten()
solzen = solzen[np.invert(np.isnan(solzen))]
if len(solzen) == 0 or np.sum(solzen <= test_angle) < len(solzen):
return False
else:
return True
# array solzen must be degrees, missing values must NaN. For small roughly 50x50km regions only
def is_night(solzen, test_angle=100.0):
solzen = solzen.flatten()
solzen = solzen[np.invert(np.isnan(solzen))]
if len(solzen) == 0 or np.sum(solzen >= test_angle) < len(solzen):
return False
else:
return True
def check_oblique(satzen, test_angle=70.0):
satzen = satzen.flatten()
satzen = satzen[np.invert(np.isnan(satzen))]
if len(satzen) == 0 or np.sum(satzen <= test_angle) < len(satzen):
return False
else:
return True
def get_grid_values_all(h5f, grid_name, scale_factor_name='scale_factor', add_offset_name='add_offset',
fill_value_name='_FillValue', range_name='actual_range', fill_value=None):
hfds = h5f[grid_name]
attrs = hfds.attrs
if attrs is None:
raise GenericException('No attributes object for: '+grid_name)
grd_vals = hfds[:,]
if fill_value is not None:
grd_vals = np.where(grd_vals == fill_value, np.nan, grd_vals)
if scale_factor_name is not None:
attr = attrs.get(scale_factor_name)
if attr is None:
raise GenericException('Attribute: '+scale_factor_name+' not found for variable: '+grid_name)
scale_factor = attr[0]
grd_vals = grd_vals * scale_factor
if add_offset_name is not None:
attr = attrs.get(add_offset_name)
if attr is None:
raise GenericException('Attribute: '+add_offset_name+' not found for variable: '+grid_name)
add_offset = attr[0]
grd_vals = grd_vals + add_offset
if range_name is not None:
attr = attrs.get(range_name)
if attr is None:
raise GenericException('Attribute: '+range_name+' not found for variable: '+grid_name)
low = attr[0]
high = attr[1]
grd_vals = np.where(grd_vals < low, np.nan, grd_vals)
grd_vals = np.where(grd_vals > high, np.nan, grd_vals)
elif fill_value_name is not None:
attr = attrs.get(fill_value_name)
if attr is None:
raise GenericException('Attribute: '+fill_value_name+' not found for variable: '+grid_name)
fill_value = attr[0]
grd_vals = np.where(grd_vals == fill_value, np.nan, grd_vals)
return grd_vals
# dt_str_0: start datetime string in format YYYY-MM-DD_HH:MM
# dt_str_1: stop datetime string, if not None num_steps is computed
# format_code: default '%Y-%m-%d_%H:%M'
# num_steps with increment of days, hours, minutes or seconds
# dt_str_1 and num_steps cannot both be None
# return num_steps+1 lists of datetime strings and timestamps (edges of a numpy histogram)
def make_times(dt_str_0, dt_str_1=None, format_code='%Y-%m-%d_%H:%M', num_steps=None, days=None, hours=None, minutes=None, seconds=None):
if days is not None:
inc = 86400*days
elif hours is not None:
inc = 3600*hours
elif minutes is not None:
inc = 60*minutes
else:
inc = seconds
dt_obj_s = []
ts_s = []
dto_0 = datetime.datetime.strptime(dt_str_0, format_code).replace(tzinfo=timezone.utc)
ts_0 = dto_0.timestamp()
if dt_str_1 is not None:
dto_1 = datetime.datetime.strptime(dt_str_1, format_code).replace(tzinfo=timezone.utc)
ts_1 = dto_1.timestamp()
num_steps = int((ts_1 - ts_0)/inc)
dt_obj_s.append(dto_0)
ts_s.append(ts_0)
dto_last = dto_0
for k in range(num_steps):
dt_obj = dto_last + datetime.timedelta(seconds=inc)
dt_obj_s.append(dt_obj)
ts_s.append(dt_obj.timestamp())
dto_last = dt_obj
return dt_obj_s, ts_s
def make_histogram(values, edges):
h = np.histogram(values, bins=edges)
return h
def normalize(data, param, mean_std_dict, add_noise=False, noise_scale=1.0, seed=None):
if mean_std_dict.get(param) is None:
return data
shape = data.shape
data = data.flatten()
mean, std, lo, hi = mean_std_dict.get(param)
data -= mean
data /= std
if add_noise:
if seed is not None:
np.random.seed(seed)
rnd = np.random.normal(loc=0, scale=noise_scale, size=data.size)
data += rnd
not_valid = np.isnan(data)
data[not_valid] = 0
data = np.reshape(data, shape)
return data
f = open(homedir+'geos_crs_goes16_FD.pkl', 'rb')
geos_goes16_fd = pickle.load(f)
f.close()
f = open(homedir+'geos_crs_goes16_CONUS.pkl', 'rb')
geos_goes16_conus = pickle.load(f)
f.close()
f = open(homedir+'geos_crs_H08_FD.pkl', 'rb')
geos_h08_fd = pickle.load(f)
f.close()
def get_cartopy_crs(satellite, domain):
if satellite == 'GOES16':
if domain == 'FD':
geos = geos_goes16_fd
xlen = 5424
xmin = -5433893.0
xmax = 5433893.0
ylen = 5424
ymin = -5433893.0
ymax = 5433893.0
elif domain == 'CONUS':
geos = geos_goes16_conus
xlen = 2500
xmin = -3626269.5
xmax = 1381770.0
ylen = 1500
ymin = 1584175.9
ymax = 4588198.0
elif satellite == 'H08':
geos = geos_h08_fd
xlen = 5500
xmin = -5498.99990119
xmax = 5498.99990119
ylen = 5500
ymin = -5498.99990119
ymax = 5498.99990119
return geos, xlen, xmin, xmax, ylen, ymin, ymax
def get_grid_values(h5f, grid_name, j_c, i_c, half_width, num_j=None, num_i=None, scale_factor_name='scale_factor', add_offset_name='add_offset',
fill_value_name='_FillValue', range_name='actual_range', fill_value=None):
hfds = h5f[grid_name]
attrs = hfds.attrs
if attrs is None:
raise GenericException('No attributes object for: '+grid_name)
ylen, xlen = hfds.shape
if half_width is not None:
j_l = j_c-half_width
i_l = i_c-half_width
if j_l < 0 or i_l < 0:
return None
j_r = j_c+half_width+1
i_r = i_c+half_width+1
if j_r >= ylen or i_r >= xlen:
return None
else:
j_l = j_c
j_r = j_c + num_j + 1
i_l = i_c
i_r = i_c + num_i + 1
grd_vals = hfds[j_l:j_r, i_l:i_r]
if fill_value is not None:
grd_vals = np.where(grd_vals == fill_value, np.nan, grd_vals)
if scale_factor_name is not None:
attr = attrs.get(scale_factor_name)
if attr is None:
raise GenericException('Attribute: '+scale_factor_name+' not found for dataset: '+grid_name)
if np.isscalar(attr):
scale_factor = attr
else:
scale_factor = attr[0]
grd_vals = grd_vals * scale_factor
if add_offset_name is not None:
attr = attrs.get(add_offset_name)
if attr is None:
raise GenericException('Attribute: '+add_offset_name+' not found for dataset: '+grid_name)
if np.isscalar(attr):
add_offset = attr
else:
add_offset = attr[0]
grd_vals = grd_vals + add_offset
if range_name is not None:
attr = attrs.get(range_name)
if attr is None:
raise GenericException('Attribute: '+range_name+' not found for dataset: '+grid_name)
low = attr[0]
high = attr[1]
grd_vals = np.where(grd_vals < low, np.nan, grd_vals)
grd_vals = np.where(grd_vals > high, np.nan, grd_vals)
elif fill_value_name is not None:
attr = attrs.get(fill_value_name)
if attr is None:
raise GenericException('Attribute: '+fill_value_name+' not found for dataset: '+grid_name)
if np.isscalar(attr):
fill_value = attr
else:
fill_value = attr[0]
grd_vals = np.where(grd_vals == fill_value, np.nan, grd_vals)
return grd_vals
# Example GOES file to retrieve GEOS parameters in MetPy form (CONUS)
exmp_file_conus = '/Users/tomrink/data/OR_ABI-L1b-RadC-M6C14_G16_s20193140811215_e20193140813588_c20193140814070.nc'
# Full Disk
exmp_file_fd = '/Users/tomrink/data/OR_ABI-L1b-RadF-M6C16_G16_s20212521800223_e20212521809542_c20212521809596.nc'
# keep for reference
# if domain == 'CONUS':
# exmpl_ds = xr.open_dataset(exmp_file_conus)
# elif domain == 'FD':
# exmpl_ds = xr.open_dataset(exmp_file_fd)
# mdat = exmpl_ds.metpy.parse_cf('Rad')
# geos = mdat.metpy.cartopy_crs
# xlen = mdat.x.values.size
# ylen = mdat.y.values.size
# exmpl_ds.close()
# Taiwan domain:
# lon, lat = 120.955098, 23.834310
# elem, line = (1789, 1505)
# # UR from Taiwan
# lon, lat = 135.0, 35.0
# elem_ur, line_ur = (2499, 995)
taiwan_x0 = 1079
taiwan_y0 = 995
taiwan_lenx = 1420
taiwan_leny = 1020
# geos.transform_point(135.0, 35.0, ccrs.PlateCarree(), False)
# geos.transform_point(106.61, 13.97, ccrs.PlateCarree(), False)
taiwain_extent = [-3342, -502, 1470, 3510] # GEOS coordinates, not line, elem
# ------------ This code will not be needed when we implement a Fully Connected CNN -----------------------------------
def make_for_full_domain_predict(h5f, name_list=None, satellite='GOES16', domain='FD'):
w_x = 16
w_y = 16
i_0 = 0
j_0 = 0
geos, xlen, xmin, xmax, ylen, ymin, ymax = get_cartopy_crs(satellite, domain)
if satellite == 'H08':
xlen = taiwan_lenx
ylen = taiwan_leny
i_0 = taiwan_x0
j_0 = taiwan_y0
grd_dct = {name: None for name in name_list}
cnt_a = 0
for didx, ds_name in enumerate(name_list):
gvals = get_grid_values(h5f, ds_name, j_0, i_0, None, num_j=ylen, num_i=xlen)
if gvals is not None:
grd_dct[ds_name] = gvals
cnt_a += 1
if cnt_a > 0 and cnt_a != len(name_list):
raise GenericException('weirdness')
grd_dct_n = {name: [] for name in name_list}
n_x = int(xlen/w_x)
n_y = int(ylen/w_y)
ll = [j_0 + j*w_y for j in range(n_y-1)]
cc = [i_0 + i*w_x for i in range(n_x-1)]
for ds_name in name_list:
for j in range(n_y-1):
j_ul = j * w_y
for i in range(n_x-1):
i_ul = i * w_x
grd_dct_n[ds_name].append(grd_dct[ds_name][j_ul:j_ul+w_y, i_ul:i_ul+w_x])
# TODO: replace this with numpy meshgrid and flatten
# ll = []
# cc = []
# for j in range(n_y-1):
# j_y = j_0 + j*w_y
# for i in range(n_x-1):
# i_x = i_0 + i*w_x
# ll.append(j_y)
# cc.append(i_x)
grd_dct = {name: None for name in name_list}
for didx, ds_name in enumerate(name_list):
grd_dct[ds_name] = np.stack(grd_dct_n[ds_name])
return grd_dct, ll, cc
# -------------------------------------------------------------------------------------------