import datetime, os from datetime import timezone import glob import numpy as np import xarray as xr from netCDF4 import Dataset from aeolus.geos_nav import GEOSNavigation, get_navigation from aeolus.datasource import get_datasource from util.util import haversine_np, LatLonTuple, GenericException from amv.intercompare import best_fit, bin_data_by, get_press_bin_ranges, spd_dir_from_uv, uv_from_spd_dir, \ direction_difference, run_best_fit_gfs import math from metpy.units import units from util.gfs_reader import get_vert_profile_s from amv.intercompare import get_raob_dict_cdf from util.line_plot import do_plot amv_file_duration = 60 # minutes half_width = 50 # search box centered on AEOLUS profile (FGF coordinates) num_elems = 5424 num_lines = 5424 def get_amvs(amv_ds, timestamp, filepath=None): if filepath is None: filepath, ftime, f_idx = amv_ds.get_file(timestamp) if filepath is None: return None amv_params = amv_ds.get_parameters() # TODO: Need to generalize this better amv_params = [amv_ds.lat_name, amv_ds.lon_name, amv_ds.elem_name, amv_ds.line_name] + amv_params ds = Dataset(filepath) param_s = [] vld = None for name in amv_params: if name is None: continue param = ds[name][:] if vld is None: vld = np.invert(param.mask) else: vld = np.logical_and(vld, np.invert(param.mask)) param_s.append(param.data) # filter qc_name = amv_ds.get_qc_params() if qc_name is not None: qc_param = ds[qc_name][:].data good = amv_ds.filter(qc_param) vld = np.logical_and(vld, good) param_nd = np.vstack(param_s) param_nd = param_nd[:, vld] param_nd = np.transpose(param_nd, axes=[1, 0]) if amv_ds.elem_name is None: nav = amv_ds.get_navigation() cc, ll = nav.earth_to_lc_s(param_nd[:, 1], param_nd[:, 0]) tmp_nd = np.insert(param_nd, 2, cc, axis=1) param_nd = np.insert(tmp_nd, 3, ll, axis=1) return param_nd # raob_dict: (lat,lon) -> profiles # raob_time: nominal time # amv_ds: AMV data source # return dict: raob -> tuple (amv_lon, amv_lat, elem, line, amv_pres, amv_spd, amv_dir) def match_amvs_to_raobs(raob_dict, raob_time, amv_ds, filepath=None): nav = amv_ds.get_navigation() amv_params = amv_ds.get_parameters() match_dict = {} if filepath is None: filepath, ftime, f_idx = amv_ds.get_file(raob_time) if filepath is None: return None ds = Dataset(filepath) amv_lons = ds[amv_ds.lon_name][:].data amv_lats = ds[amv_ds.lat_name][:].data if amv_ds.elem_name is None or amv_ds.line_name is None: cc, ll = nav.earth_to_lc_s(amv_lons, amv_lats) else: cc = ds[amv_ds.elem_name][:].data ll = ds[amv_ds.line_name][:].data param_s = [] param_s.append(amv_lons) param_s.append(amv_lats) param_s.append(cc) param_s.append(ll) vld = None for param in amv_params: data = ds[param][:] if vld is None: vld = np.invert(data.mask) else: vld = np.logical_and(vld, np.invert(data.mask)) if param == 'V_3D': param_s.append(data.data[0]) param_s.append(data.data[1]) else: param_s.append(data.data) # filter qc_name = amv_ds.get_qc_params() if qc_name is not None: qc_param = ds[qc_name][:].data good = amv_ds.filter(qc_param) vld = np.logical_and(vld, good) param_nd = np.vstack(param_s) param_nd = param_nd[:, vld] cc = param_nd[2, :] ll = param_nd[3, :] ds.close() keys = list(raob_dict.keys()) for key in keys: raob = raob_dict.get(key) if raob is None: continue lat = key.lat lon = key.lon c_rng, l_rng = get_search_box(nav, lon, lat) if c_rng is None: continue in_cc = np.logical_and(cc > c_rng[0], cc < c_rng[1]) in_ll = np.logical_and(ll > l_rng[0], ll < l_rng[1]) in_box = np.logical_and(in_cc, in_ll) num_amvs = np.sum(in_box) if num_amvs == 0: continue match_dict[key] = param_nd[:, in_box] return match_dict def create_file2(filename, raob_to_amv_dct, raob_dct, amv_files): keys = list(raob_to_amv_dct.keys()) num_amvs = [] num_levs = [] times = [] namvs = 0 nlevs = 0 for key in keys: param_nd = raob_to_amv_dct.get(key) num_amvs.append(param_nd.shape[1]) namvs += param_nd.shape[1] prof = raob_dct.get(key) num_levs.append(prof.shape[0]) nlevs += prof.shape[0] # TODO: need a time times.append(0.0) # amv_per_alus = len(aeolus_to_amv_dct) rootgrp = Dataset(filename, 'w', format='NETCDF4') dim_amvs = rootgrp.createDimension('amvs', size=namvs) dim_alus = rootgrp.createDimension('raobs', size=nlevs) dim_num_aeolus_prof = rootgrp.createDimension('num_raobs', size=len(raob_to_amv_dct)) nc4_vars = [] out_params = amv_files.get_out_parameters() meta_dict = amv_files.get_meta_dict() for pidx, param in enumerate(out_params): u, t = meta_dict.get(param) var = rootgrp.createVariable(param, t, ['amvs']) if u is not None: var.units = u nc4_vars.append(var) dist = rootgrp.createVariable('dist_to_raob', 'f4', ['amvs']) dist.units = 'km' num_amvs_per_raob = rootgrp.createVariable('num_amvs_per_raob', 'i4', ['num_raobs']) num_levs_per_raob = rootgrp.createVariable('num_levs_per_raob', 'i4', ['num_raobs']) prof_time = rootgrp.createVariable('time', 'f4', ['num_raobs']) # ---- Profile variables --------------- prf_lon = rootgrp.createVariable('raob_longitude', 'f4', ['num_raobs']) prf_lon.units = 'degrees east' prf_lat = rootgrp.createVariable('raob_latitude', 'f4', ['num_raobs']) prf_lat.units = 'degrees north' prof_time.units = 'seconds since 1970-01-1 00:00:00' prf_azm = rootgrp.createVariable('prof_azm', 'f4', ['raobs']) prf_azm.units = 'degree' prf_spd = rootgrp.createVariable('prof_spd', 'f4', ['raobs']) prf_spd.units = 'm s-1' prf_prs = rootgrp.createVariable('prof_pres', 'f4', ['raobs']) prf_prs.units = 'hPa' prf_tmp = rootgrp.createVariable('prof_temp', 'f4', ['raobs']) prf_tmp.units = 'K' # -------------------------------------- i_a = 0 i_c = 0 for idx, key in enumerate(keys): namvs = num_amvs[idx] nlevs = num_levs[idx] i_b = i_a + namvs i_d = i_c + nlevs prof = raob_dct.get(key) prf_azm[i_c:i_d] = prof[:, 2] prf_spd[i_c:i_d] = prof[:, 3] prf_prs[i_c:i_d] = prof[:, 0] prf_tmp[i_c:i_d] = prof[:, 1] i_c += nlevs plat = key.lat plon = key.lon prf_lat[idx::] = plat prf_lon[idx::] = plon param_nd = raob_to_amv_dct.get(key) for pidx, param in enumerate(out_params): nc4_vars[pidx][i_a:i_b] = param_nd[pidx, :] dist[i_a:i_b] = haversine_np(plon, plat, param_nd[0, :], param_nd[1, :]) i_a += namvs num_amvs_per_raob[:] = num_amvs num_levs_per_raob[:] = num_levs prof_time[:] = times rootgrp.close() def run_best_fit_all(amv_dir, source, product_dir, product, raob_path, gfs_path, full_domain=False): amv_files = get_datasource(amv_dir, source) prd_files = get_datasource(product_dir, product) gfs_files = get_datasource(gfs_path, 'GFS') raob_ds = get_datasource(raob_path, 'RAOB') raob_files = raob_ds.flist out_list = [] amvs_list = [] bfs_list = [] rb_list = [] bfs_gfs_list = [] prd_list = [] if not full_domain: for k, file in enumerate(raob_files): raob_dct = get_raob_dict_cdf(file) ts = raob_ds.ftimes[k,0] m_d = match_amvs_to_raobs(raob_dct, ts, amv_files) if m_d is None: continue gfs_file = gfs_files.get_file(ts)[0] if gfs_file is None: continue bf_dct = run_best_fit(m_d, raob_dct, gfs_file) prd_dct = get_product_at_locs(m_d, ts, prd_files) out_list.append((bf_dct, prd_dct)) for tup in out_list: ab_dct = tup[0] pr_dct = tup[1] keys = list(ab_dct.keys()) for key in keys: tup = ab_dct.get(key) amvs_list.append(tup[0]) bfs_list.append(tup[1]) rb_list.append(tup[2]) bfs_gfs_list.append(tup[3]) keys = list(pr_dct.keys()) for key in keys: prd_list.append(pr_dct.get(key)) amvs = np.concatenate(amvs_list) bfs = np.concatenate(bfs_list) rbm = np.concatenate(rb_list) bfs_gfs = np.concatenate(bfs_gfs_list) prd = np.concatenate(prd_list) tup = (amvs, bfs, prd, bfs_gfs, rbm) return tup else: for k, file in enumerate(raob_files): raob_dct = get_raob_dict_cdf(file) ts = raob_ds.ftimes[k,0] amvs = get_amvs(amv_files, ts) if amvs is None: continue gfs_file = gfs_files.get_file(ts)[0] if gfs_file is None: continue bfs = run_best_fit_gfs(amvs, gfs_file, amv_lat_idx=0, amv_lon_idx=1, amv_prs_idx=4, amv_spd_idx=5, amv_dir_idx=6) alons = amvs[:, 0] alats = amvs[:, 1] prds = get_product_at_lat_lons(prd_files, ts, alons, alats, filepath=None) out_list.append((amvs, np.array(bfs), prds)) for tup in out_list: amvs_list.append(tup[0]) bfs_list.append(tup[1]) prd_list.append(tup[2]) amvs = np.concatenate(amvs_list) bfs = np.concatenate(bfs_list) prd = np.concatenate(prd_list) tup = (amvs, bfs, prd) return tup def get_product_at_locs(raob_to_amv_dct, ts, files, filepath=None): keys = list(raob_to_amv_dct.keys()) m_dct = {} nav = files.get_navigation() the_params = files.get_parameters() num_params = len(the_params) if filepath is None: filepath, ftime, f_idx = files.get_file(ts) ds = Dataset(filepath) param_s = [] for pstr in the_params: data = ds[pstr][:] data = data.data param_s.append(data) param_nd = np.stack(param_s) for key in keys: amvs = raob_to_amv_dct.get(key) num_amvs = amvs.shape[1] alons = amvs[0, :] alats = amvs[1, :] cc, ll = nav.earth_to_lc_s(alons, alats) aaa = np.zeros((num_params, num_amvs), dtype=np.float) for k in range(num_amvs): aaa[:, k] = param_nd[:, ll[k], cc[k]] m_dct[key] = np.transpose(aaa, axes=[1, 0]) ds.close() return m_dct def get_product_at_locs1x1(raob_to_amv_dct, ts, files, filepath=None): keys = list(raob_to_amv_dct.keys()) m_dct = {} nav = files.get_navigation() the_params = files.get_parameters() num_params = len(the_params) if filepath is None: filepath, ftime, f_idx = files.get_file(ts) ds = Dataset(filepath) var_s = [] for pstr in the_params: var = ds[pstr] var_s.append(var) for key in keys: amvs = raob_to_amv_dct.get(key) num_amvs = amvs.shape[1] alons = amvs[0, :] alats = amvs[1, :] cc, ll = nav.earth_to_lc_s(alons, alats) aaa = np.zeros((num_params, num_amvs), dtype=np.float) for vidx, var in enumerate(var_s): for k in range(num_amvs): aaa[vidx, k] = var[ll[k], cc[k]].data m_dct[key] = np.transpose(aaa, axes=[1,0]) ds.close() return m_dct def get_product_at_lat_lons(files, ts, lons, lats, filepath=None): nav = files.get_navigation() the_params = files.get_parameters() num_params = len(the_params) if filepath is None: filepath, ftime, f_idx = files.get_file(ts) ds = Dataset(filepath) var_s = [] for pstr in the_params: var = ds[pstr] var_s.append(var) num = lons.shape[0] cc, ll = nav.earth_to_lc_s(lons, lats) aaa = np.zeros((num_params, num), dtype=np.float) for vidx, var in enumerate(var_s): for k in range(num): aaa[vidx, k] = var[ll[k], cc[k]].data return np.transpose(aaa, axes=[1, 0]) def run_best_fit(raob_to_amv_dct, raob_dct, gfs_filename=None): out_dct = {} keys = list(raob_to_amv_dct.keys()) do_gfs_best_fit = False gfs_press = None gfs_spd = None gfs_dir = None if gfs_filename is not None: locs = np.array(keys) do_gfs_best_fit = True xr_dataset = xr.open_dataset(gfs_filename) gfs_press = xr_dataset['pressure levels'] gfs_press = gfs_press.values gfs_press = gfs_press[::-1] lon_idx = LatLonTuple._fields.index('lon') lat_idx = LatLonTuple._fields.index('lat') uv_wind = get_vert_profile_s(xr_dataset, ['u-wind', 'v-wind'], locs[:, lon_idx], locs[:, lat_idx], method='nearest') uv_wind = uv_wind.values wspd, wdir = spd_dir_from_uv(uv_wind[0, :, :], uv_wind[1, :, :]) wspd = wspd.magnitude wdir = wdir.magnitude gfs_spd = wspd[:, ::-1] gfs_dir = wdir[:, ::-1] for key_idx, key in enumerate(keys): raob = raob_dct.get(key) raob_prs = raob[:, 0] raob_spd = raob[:, 3] raob_dir = raob[:, 2] amvs = raob_to_amv_dct.get(key) num_amvs = amvs.shape[1] bf_list = [] raob_match_list = [] bf_gfs_list = [] for i in range(num_amvs): amv_lon = amvs[0, i] amv_lat = amvs[1, i] amv_prs = amvs[4, i] amv_spd = amvs[5, i] amv_dir = amvs[6, i] bf = best_fit(amv_spd, amv_dir, amv_prs, amv_lat, amv_lon, raob_spd, raob_dir, raob_prs) bf_list.append(bf) pdiff = np.abs(amv_prs - raob_prs) lev_idx = np.argmin(pdiff) if raob_spd[lev_idx] < 0 or raob_dir[lev_idx] < 0: tup = (raob_spd[lev_idx], raob_dir[lev_idx], raob_prs[lev_idx], -9) elif pdiff[lev_idx] > 60.0: tup = (raob_spd[lev_idx], raob_dir[lev_idx], raob_prs[lev_idx], -9) else: uv = uv_from_spd_dir(raob_spd[lev_idx], raob_dir[lev_idx]) tup = (uv[0].magnitude, uv[1].magnitude, raob_prs[lev_idx], 0) raob_match_list.append(tup) if do_gfs_best_fit: bf = best_fit(amv_spd, amv_dir, amv_prs, amv_lat, amv_lon, gfs_spd[key_idx], gfs_dir[key_idx], gfs_press) bf_gfs_list.append(bf) out_dct[key] = (np.transpose(amvs, axes=[1, 0]), np.array(bf_list), np.array(raob_match_list), np.array(bf_gfs_list)) return out_dct def analyze2(amvs, bfs, raob_match, bfs_gfs, amv_prod): good_amvs = amvs num_good = good_amvs.shape[0] didx = 6 sidx = 5 pidx = 4 print('Number of AMVs: {0:d}'.format(num_good)) spd_min = good_amvs[:, sidx].min() spd_max = good_amvs[:, sidx].max() print('spd min/max/mean: {0:.2f} {1:.2f} {2:.2f}'.format(spd_min, spd_max, np.average(good_amvs[:, sidx]))) p_min = good_amvs[:, pidx].min() p_max = good_amvs[:, pidx].max() print('pres min/max/mean: {0:.2f} {1:.2f} {2:.2f}'.format(p_min, p_max, np.average(good_amvs[:, pidx]))) low = good_amvs[:, pidx] >= 700 mid = np.logical_and(good_amvs[:, pidx] < 700, good_amvs[:, pidx] > 400) hgh = good_amvs[:, pidx] <= 400 n_low = np.sum(low) n_mid = np.sum(mid) n_hgh = np.sum(hgh) print('% low: {0:.2f}'.format(100.0*(n_low/num_good))) print('% mid: {0:.2f}'.format(100.0*(n_mid/num_good))) print('% hgh: {0:.2f}'.format(100.0*(n_hgh/num_good))) print('---------------------------') print('Low Spd min/max/mean: {0:.2f} {1:.2f} {2:.2f}'.format(good_amvs[low, sidx].min(), good_amvs[low, sidx].max(), good_amvs[low,sidx].mean())) print('Low Press min/max/mean: {0:.2f} {1:.2f} {2:.2f}'.format(good_amvs[low, pidx].min(), good_amvs[low, pidx].max(), good_amvs[low, pidx].mean())) print('---------------------------') print('Mid Spd min/max/mean: {0:.2f} {1:.2f} {2:.2f}'.format(good_amvs[mid, sidx].min(), good_amvs[mid, sidx].max(), good_amvs[mid, sidx].mean())) print('Mid Press min/max/mean: {0:.2f} {1:.2f} {2:.2f}'.format(good_amvs[mid, pidx].min(), good_amvs[mid, pidx].max(), good_amvs[mid, pidx].mean())) print('---------------------------') print('Hgh Spd min/max/mean: {0:.2f} {1:.2f} {2:.2f}'.format(good_amvs[hgh, sidx].min(), good_amvs[hgh, sidx].max(), good_amvs[hgh, sidx].mean())) print('Hgh Press min/max/mean: {0:.2f} {1:.2f} {2:.2f}'.format(good_amvs[hgh, pidx].min(), good_amvs[hgh, pidx].max(), good_amvs[hgh, pidx].mean())) # Comparison to Level of Best Fit (LBF) ------------------------------------------------------------------ # -------------------------------------------------------------------------------------------------------- bin_size = 100.0 bin_ranges = get_press_bin_ranges(100, 1000, bin_size=bin_size) vld_bf = bfs[:, 3] == 0 keep_idxs = vld_bf amv_p = good_amvs[keep_idxs, pidx] bf_p = bfs[keep_idxs, 2] diff = amv_p - bf_p mad = np.average(np.abs(diff)) bias = np.average(diff) print('********************************************************') print('Number of good best fits to RAOB: ', bf_p.shape[0]) print('press, MAD: {0:.2f}'.format(mad)) print('press, bias: {0:.2f}'.format(bias)) pd_std = np.std(diff) pd_mean = np.mean(diff) print('press bias/rms: {0:.2f} {1:.2f} '.format(pd_mean, np.sqrt(pd_mean**2 + pd_std**2))) print('------------------------------------------') bin_pres = bin_data_by(diff, amv_p, bin_ranges) amv_spd = good_amvs[keep_idxs, sidx] amv_dir = good_amvs[keep_idxs, didx] bf_spd, bf_dir = spd_dir_from_uv(bfs[keep_idxs, 0], bfs[keep_idxs, 1]) diff = amv_spd * units('m/s') - bf_spd diff = diff.magnitude spd_mad = np.average(np.abs(diff)) spd_bias = np.average(diff) print('spd, MAD: {0:.2f}'.format(spd_mad)) print('spd, bias: {0:.2f}'.format(spd_bias)) spd_mean = np.mean(diff) spd_std = np.std(diff) print('spd MAD/bias/rms: {0:.2f} {1:.2f} {2:.2f}'.format(np.average(np.abs(diff)), spd_mean, np.sqrt(spd_mean**2 + spd_std**2))) print('-----------------') bin_spd = bin_data_by(diff, amv_p, bin_ranges) dir_diff = direction_difference(amv_dir, bf_dir.magnitude) print('dir, MAD: {0:.2f}'.format(np.average(np.abs(dir_diff)))) print('dir bias: {0:.2f}'.format(np.average(dir_diff))) print('-------------------------------------') bin_dir = bin_data_by(dir_diff, amv_p, bin_ranges) amv_u, amv_v = uv_from_spd_dir(good_amvs[keep_idxs, sidx], good_amvs[keep_idxs, didx]) u_diffs = amv_u - (bfs[keep_idxs, 0] * units('m/s')) v_diffs = amv_v - (bfs[keep_idxs, 1] * units('m/s')) vd = np.sqrt(u_diffs**2 + v_diffs**2) vd_mean = np.mean(vd) vd_std = np.std(vd) print('VD bias/rms: {0:.2f} {1:.2f}'.format(vd_mean, np.sqrt(vd_mean**2 + vd_std**2))) print('******************************************************') x_values = [] num_pres = [] num_spd = [] num_dir = [] print('level num cases hgt MAD/bias spd MAD/bias dir MAD/bias') print('-------------------------------------------------------------------') for i in range(len(bin_ranges)): x_values.append(np.average(bin_ranges[i])) num_pres.append(bin_pres[i].shape[0]) num_spd.append(bin_spd[i].shape[0]) num_dir.append(bin_dir[i].shape[0]) print('{0:d} {1:d} {2:.2f}/{3:.2f} {4:.2f}/{5:.2f} {6:.2f}/{7:.2f}' .format(int(x_values[i]), num_pres[i], np.average(np.abs(bin_pres[i])), np.average(bin_pres[i]), np.average(np.abs(bin_spd[i])), np.average(bin_spd[i]), np.average(np.abs(bin_dir[i])), np.average(bin_dir[i]))) # Comparison to Level of Best Fit (LBF) GFS ------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------------------ bfs_raob = bfs bfs = bfs_gfs vld_bf = bfs[:, 3] == 0 keep_idxs = vld_bf amv_p = good_amvs[keep_idxs, pidx] bf_p = bfs[keep_idxs, 2] diff = amv_p - bf_p mad = np.average(np.abs(diff)) bias = np.average(diff) print('********************************************************') print('Number of good best fits to GFS: ', bf_p.shape[0]) print('press, MAD: {0:.2f}'.format(mad)) print('press, bias: {0:.2f}'.format(bias)) pd_std = np.std(diff) pd_mean = np.mean(diff) print('press bias/rms: {0:.2f} {1:.2f} '.format(pd_mean, np.sqrt(pd_mean**2 + pd_std**2))) print('------------------------------------------') bin_pres = bin_data_by(diff, amv_p, bin_ranges) amv_spd = good_amvs[keep_idxs, sidx] amv_dir = good_amvs[keep_idxs, didx] bf_spd, bf_dir = spd_dir_from_uv(bfs[keep_idxs, 0], bfs[keep_idxs, 1]) diff = amv_spd * units('m/s') - bf_spd diff = diff.magnitude spd_mad = np.average(np.abs(diff)) spd_bias = np.average(diff) print('spd, MAD: {0:.2f}'.format(spd_mad)) print('spd, bias: {0:.2f}'.format(spd_bias)) spd_mean = np.mean(diff) spd_std = np.std(diff) print('spd MAD/bias/rms: {0:.2f} {1:.2f} {2:.2f}'.format(np.average(np.abs(diff)), spd_mean, np.sqrt(spd_mean**2 + spd_std**2))) print('-----------------') bin_spd = bin_data_by(diff, amv_p, bin_ranges) dir_diff = direction_difference(amv_dir, bf_dir.magnitude) print('dir, MAD: {0:.2f}'.format(np.average(np.abs(dir_diff)))) print('dir bias: {0:.2f}'.format(np.average(dir_diff))) print('-------------------------------------') bin_dir = bin_data_by(dir_diff, amv_p, bin_ranges) amv_u, amv_v = uv_from_spd_dir(good_amvs[keep_idxs, sidx], good_amvs[keep_idxs, didx]) u_diffs = amv_u - (bfs[keep_idxs, 0] * units('m/s')) v_diffs = amv_v - (bfs[keep_idxs, 1] * units('m/s')) vd = np.sqrt(u_diffs**2 + v_diffs**2) vd_mean = np.mean(vd) vd_std = np.std(vd) print('VD bias/rms: {0:.2f} {1:.2f}'.format(vd_mean, np.sqrt(vd_mean**2 + vd_std**2))) print('******************************************************') x_values = [] num_pres = [] num_spd = [] num_dir = [] print('level num cases hgt MAD/bias spd MAD/bias dir MAD/bias') print('-------------------------------------------------------------------') for i in range(len(bin_ranges)): x_values.append(np.average(bin_ranges[i])) num_pres.append(bin_pres[i].shape[0]) num_spd.append(bin_spd[i].shape[0]) num_dir.append(bin_dir[i].shape[0]) print('{0:d} {1:d} {2:.2f}/{3:.2f} {4:.2f}/{5:.2f} {6:.2f}/{7:.2f}' .format(int(x_values[i]), num_pres[i], np.average(np.abs(bin_pres[i])), np.average(bin_pres[i]), np.average(np.abs(bin_spd[i])), np.average(bin_spd[i]), np.average(np.abs(bin_dir[i])), np.average(bin_dir[i]))) # Direct comparison to RAOB profile --------------------------------------------------------------- # ------------------------------------------------------------------------------------------------- vld = raob_match[:, 3] == 0 keep_idxs = vld amv_p = good_amvs[keep_idxs, pidx] bf_p = raob_match[keep_idxs, 2] diff = amv_p - bf_p mad = np.average(np.abs(diff)) bias = np.average(diff) print('********************************************************') print('Number of good raob matches: ', bf_p.shape[0]) print('press, MAD: {0:.2f}'.format(mad)) print('press, bias: {0:.2f}'.format(bias)) pd_std = np.std(diff) pd_mean = np.mean(diff) print('press bias/rms: {0:.2f} {1:.2f} '.format(pd_mean, np.sqrt(pd_mean**2 + pd_std**2))) print('------------------------------------------') bin_pres = bin_data_by(diff, amv_p, bin_ranges) amv_spd = good_amvs[keep_idxs, sidx] amv_dir = good_amvs[keep_idxs, didx] bf_spd, bf_dir = raob_match[keep_idxs, 0], raob_match[keep_idxs, 1] #diff = amv_spd * units('m/s') - bf_spd #diff = diff.magnitude diff = amv_spd - bf_spd spd_mad = np.average(np.abs(diff)) spd_bias = np.average(diff) print('spd, MAD: {0:.2f}'.format(spd_mad)) print('spd, bias: {0:.2f}'.format(spd_bias)) spd_mean = np.mean(diff) spd_std = np.std(diff) print('spd MAD/bias/rms: {0:.2f} {1:.2f} {2:.2f}'.format(np.average(np.abs(diff)), spd_mean, np.sqrt(spd_mean**2 + spd_std**2))) print('-----------------') bin_spd = bin_data_by(diff, amv_p, bin_ranges) dir_diff = direction_difference(amv_dir, bf_dir) print('dir, MAD: {0:.2f}'.format(np.average(np.abs(dir_diff)))) print('dir bias: {0:.2f}'.format(np.average(dir_diff))) print('-------------------------------------') bin_dir = bin_data_by(dir_diff, amv_p, bin_ranges) amv_u, amv_v = uv_from_spd_dir(good_amvs[keep_idxs, sidx], good_amvs[keep_idxs, didx]) bf_u, bf_v = uv_from_spd_dir(raob_match[keep_idxs, 0], raob_match[keep_idxs, 1]) u_diffs = amv_u - bf_u v_diffs = amv_v - bf_v vd = np.sqrt(u_diffs**2 + v_diffs**2) vd_mean = np.mean(vd) vd_std = np.std(vd) print('VD bias/rms: {0:.2f} {1:.2f}'.format(vd_mean, np.sqrt(vd_mean**2 + vd_std**2))) print('******************************************************') x_values = [] num_pres = [] num_spd = [] num_dir = [] print('level num cases hgt MAD/bias spd MAD/bias dir MAD/bias') print('-------------------------------------------------------------------') for i in range(len(bin_ranges)): x_values.append(np.average(bin_ranges[i])) num_pres.append(bin_pres[i].shape[0]) num_spd.append(bin_spd[i].shape[0]) num_dir.append(bin_dir[i].shape[0]) print('{0:d} {1:d} {2:.2f}/{3:.2f} {4:.2f}/{5:.2f} {6:.2f}/{7:.2f}' .format(int(x_values[i]), num_pres[i], np.average(np.abs(bin_pres[i])), np.average(bin_pres[i]), np.average(np.abs(bin_spd[i])), np.average(bin_spd[i]), np.average(np.abs(bin_dir[i])), np.average(bin_dir[i]))) # Comparison to Level of Best Fits (LBF) GFS to RAOB ------------------------------------------------------------------ # ------------------------------------------------------------------------------------------------------------ bfs = bfs_raob vld_bf = bfs[:, 3] == 0 vld_bf_gfs = bfs_gfs[:, 3] == 0 keep_idxs = np.logical_and(vld_bf, vld_bf_gfs) amv_p = good_amvs[keep_idxs, pidx] bf_p = bfs[keep_idxs, 2] bf_p_gfs = bfs_gfs[keep_idxs, 2] diff = bf_p - bf_p_gfs mad = np.average(np.abs(diff)) bias = np.average(diff) print('********************************************************') print('Number of good best fits to GFS: ', bf_p.shape[0]) print('press, MAD: {0:.2f}'.format(mad)) print('press, bias: {0:.2f}'.format(bias)) pd_std = np.std(diff) pd_mean = np.mean(diff) print('press bias/rms: {0:.2f} {1:.2f} '.format(pd_mean, np.sqrt(pd_mean**2 + pd_std**2))) print('------------------------------------------') bin_pres = bin_data_by(diff, amv_p, bin_ranges) amv_spd = good_amvs[keep_idxs, sidx] amv_dir = good_amvs[keep_idxs, didx] bf_gfs_spd, bf_gfs_dir = spd_dir_from_uv(bfs_gfs[keep_idxs, 0], bfs_gfs[keep_idxs, 1]) bf_spd, bf_dir = spd_dir_from_uv(bfs[keep_idxs, 0], bfs[keep_idxs, 1]) diff = bf_spd - bf_gfs_spd diff = diff.magnitude spd_mad = np.average(np.abs(diff)) spd_bias = np.average(diff) print('spd, MAD: {0:.2f}'.format(spd_mad)) print('spd, bias: {0:.2f}'.format(spd_bias)) spd_mean = np.mean(diff) spd_std = np.std(diff) print('spd MAD/bias/rms: {0:.2f} {1:.2f} {2:.2f}'.format(np.average(np.abs(diff)), spd_mean, np.sqrt(spd_mean**2 + spd_std**2))) print('-----------------') bin_spd = bin_data_by(diff, amv_p, bin_ranges) dir_diff = direction_difference(bf_dir.magnitude, bf_gfs_dir.magnitude) print('dir, MAD: {0:.2f}'.format(np.average(np.abs(dir_diff)))) print('dir bias: {0:.2f}'.format(np.average(dir_diff))) print('-------------------------------------') bin_dir = bin_data_by(dir_diff, amv_p, bin_ranges) u_diffs = bfs[keep_idxs, 0] - bfs_gfs[keep_idxs, 0] v_diffs = bfs[keep_idxs, 1] - bfs_gfs[keep_idxs, 1] vd = np.sqrt(u_diffs**2 + v_diffs**2) vd_mean = np.mean(vd) vd_std = np.std(vd) print('VD bias/rms: {0:.2f} {1:.2f}'.format(vd_mean, np.sqrt(vd_mean**2 + vd_std**2))) print('******************************************************') x_values = [] num_pres = [] num_spd = [] num_dir = [] print('level num cases hgt MAD/bias spd MAD/bias dir MAD/bias') print('-------------------------------------------------------------------') for i in range(len(bin_ranges)): x_values.append(np.average(bin_ranges[i])) num_pres.append(bin_pres[i].shape[0]) num_spd.append(bin_spd[i].shape[0]) num_dir.append(bin_dir[i].shape[0]) print('{0:d} {1:d} {2:.2f}/{3:.2f} {4:.2f}/{5:.2f} {6:.2f}/{7:.2f}' .format(int(x_values[i]), num_pres[i], np.average(np.abs(bin_pres[i])), np.average(bin_pres[i]), np.average(np.abs(bin_spd[i])), np.average(bin_spd[i]), np.average(np.abs(bin_dir[i])), np.average(bin_dir[i]))) #return bin_ranges, bin_pres, bin_spd, bin_dir def compare_amvs_bestfit_driver(amvs, bfs, prd, bfs_gfs, rbm, bin_size=200): thin = np.logical_and(prd[:, 2] > 0, prd[:, 2] < 1) thick = np.logical_and(prd[:, 2] >= 1, prd[:, 2] < 6) deep = np.logical_and(prd[:, 2] >= 6, prd[:, 2] <= 20) amvs_thin = amvs[thin, :] amvs_thick = amvs[thick, :] amvs_deep = amvs[deep, :] bfs_thin = bfs[thin, :] bfs_thick = bfs[thick, :] bfs_deep = bfs[deep, :] bfs_gfs_thin = bfs_gfs[thin, :] bfs_gfs_thick = bfs_gfs[thick, :] bfs_gfs_deep = bfs_gfs[deep, :] rbm_thin = rbm[thin, :] rbm_thick = rbm[thick, :] rbm_deep = rbm[deep, :] # values_pres = [] # bin_ranges, bin_pres, bin_spd, bin_dir = compare_amvs_bestfit(amvs_thin, bfs_thin, bin_size=bin_size) # values_pres.append(bin_pres) # # bin_ranges, bin_pres, bin_spd, bin_dir = compare_amvs_bestfit(amvs_thick, bfs_thick, bin_size=bin_size) # values_pres.append(bin_pres) # # bin_ranges, bin_pres, bin_spd, bin_dir = compare_amvs_bestfit(amvs, bfs, bin_size=bin_size) # values_pres.append(bin_pres) # # values_spd = [] # bin_ranges, bin_pres, bin_spd, bin_dir = compare_amvs_bestfit(amvs_thin, bfs_gfs_thin, bin_size=bin_size) # values_spd.append(bin_spd) # # bin_ranges, bin_pres, bin_spd, bin_dir = compare_amvs_bestfit(amvs_thick, bfs_gfs_thick, bin_size=bin_size) # values_spd.append(bin_spd) # # bin_ranges, bin_pres, bin_spd, bin_dir = compare_amvs_bestfit(amvs, bfs_gfs, bin_size=bin_size) # values_spd.append(bin_spd) values_prs = [] values_spd = [] values_dir = [] bin_ranges, bin_pres, bin_spd, bin_dir = compare_amvs_bestfit(amvs_thin, bfs_thin, bin_size=bin_size) values_prs.append(bin_pres) values_spd.append(bin_spd) values_dir.append(bin_dir) bin_ranges, bin_pres, bin_spd, bin_dir = compare_amvs_bestfit(amvs_thick, bfs_thick, bin_size=bin_size) values_prs.append(bin_pres) values_spd.append(bin_spd) values_dir.append(bin_dir) bin_ranges, bin_pres, bin_spd, bin_dir = compare_amvs_bestfit(amvs, bfs, bin_size=bin_size) values_prs.append(bin_pres) values_spd.append(bin_spd) values_dir.append(bin_dir) return bin_ranges, values_prs, values_spd, values_dir def compare_amvs_bestfit_driver2(amvs, bfs, prd, bfs_gfs, rbm, bin_size=200): good = np.logical_and(prd[:, 0] != 5, prd[:, 0] != 0) ice = np.logical_and(good, prd[:, 0] == 4) not_ice = np.logical_and(good, prd[:, 0] != 4) amvs_ice = amvs[ice, :] amvs_not_ice = amvs[not_ice, :] bfs_ice = bfs[ice, :] bfs_not_ice = bfs[not_ice, :] # bfs_gfs_ice = bfs_gfs[ice, :] # bfs_gfs_not_ice = bfs_gfs[not_ice, :] # # rbm_ice = rbm[ice, :] # rbm_not_ice = rbm[thick, :] # values_pres = [] # bin_ranges, bin_pres, bin_spd, bin_dir = compare_amvs_bestfit(amvs_thin, bfs_thin, bin_size=bin_size) # values_pres.append(bin_pres) # # bin_ranges, bin_pres, bin_spd, bin_dir = compare_amvs_bestfit(amvs_thick, bfs_thick, bin_size=bin_size) # values_pres.append(bin_pres) # # bin_ranges, bin_pres, bin_spd, bin_dir = compare_amvs_bestfit(amvs, bfs, bin_size=bin_size) # values_pres.append(bin_pres) # # values_spd = [] # bin_ranges, bin_pres, bin_spd, bin_dir = compare_amvs_bestfit(amvs_thin, bfs_gfs_thin, bin_size=bin_size) # values_spd.append(bin_spd) # # bin_ranges, bin_pres, bin_spd, bin_dir = compare_amvs_bestfit(amvs_thick, bfs_gfs_thick, bin_size=bin_size) # values_spd.append(bin_spd) # # bin_ranges, bin_pres, bin_spd, bin_dir = compare_amvs_bestfit(amvs, bfs_gfs, bin_size=bin_size) # values_spd.append(bin_spd) values_prs = [] values_spd = [] values_dir = [] bin_ranges, bin_pres, bin_spd, bin_dir = compare_amvs_bestfit(amvs_ice, bfs_ice, bin_size=bin_size) values_prs.append(bin_pres) values_spd.append(bin_spd) values_dir.append(bin_dir) bin_ranges, bin_pres, bin_spd, bin_dir = compare_amvs_bestfit(amvs_not_ice, bfs_not_ice, bin_size=bin_size) values_prs.append(bin_pres) values_spd.append(bin_spd) values_dir.append(bin_dir) bin_ranges, bin_pres, bin_spd, bin_dir = compare_amvs_bestfit(amvs, bfs, bin_size=bin_size) values_prs.append(bin_pres) values_spd.append(bin_spd) values_dir.append(bin_dir) print('***************************************') return bin_ranges, values_prs, values_spd, values_dir def compare_amvs_bestfit(amvs, bfs, bin_size=200): good_amvs = amvs num_good = good_amvs.shape[0] didx = 6 sidx = 5 pidx = 4 print('Number of AMVs: {0:d}'.format(num_good)) spd_min = good_amvs[:, sidx].min() spd_max = good_amvs[:, sidx].max() print('spd min/max/mean: {0:.2f} {1:.2f} {2:.2f}'.format(spd_min, spd_max, np.average(good_amvs[:, sidx]))) p_min = good_amvs[:, pidx].min() p_max = good_amvs[:, pidx].max() print('pres min/max/mean: {0:.2f} {1:.2f} {2:.2f}'.format(p_min, p_max, np.average(good_amvs[:, pidx]))) low = good_amvs[:, pidx] >= 700 mid = np.logical_and(good_amvs[:, pidx] < 700, good_amvs[:, pidx] > 400) hgh = good_amvs[:, pidx] <= 400 n_low = np.sum(low) n_mid = np.sum(mid) n_hgh = np.sum(hgh) print('% low: {0:.2f}'.format(100.0*(n_low/num_good))) print('% mid: {0:.2f}'.format(100.0*(n_mid/num_good))) print('% hgh: {0:.2f}'.format(100.0*(n_hgh/num_good))) print('---------------------------') print('Low Spd min/max/mean: {0:.2f} {1:.2f} {2:.2f}'.format(good_amvs[low, sidx].min(), good_amvs[low, sidx].max(), good_amvs[low,sidx].mean())) print('Low Press min/max/mean: {0:.2f} {1:.2f} {2:.2f}'.format(good_amvs[low, pidx].min(), good_amvs[low, pidx].max(), good_amvs[low, pidx].mean())) print('---------------------------') print('Mid Spd min/max/mean: {0:.2f} {1:.2f} {2:.2f}'.format(good_amvs[mid, sidx].min(), good_amvs[mid, sidx].max(), good_amvs[mid, sidx].mean())) print('Mid Press min/max/mean: {0:.2f} {1:.2f} {2:.2f}'.format(good_amvs[mid, pidx].min(), good_amvs[mid, pidx].max(), good_amvs[mid, pidx].mean())) print('---------------------------') print('Hgh Spd min/max/mean: {0:.2f} {1:.2f} {2:.2f}'.format(good_amvs[hgh, sidx].min(), good_amvs[hgh, sidx].max(), good_amvs[hgh, sidx].mean())) print('Hgh Press min/max/mean: {0:.2f} {1:.2f} {2:.2f}'.format(good_amvs[hgh, pidx].min(), good_amvs[hgh, pidx].max(), good_amvs[hgh, pidx].mean())) # Comparison to Level of Best Fit (LBF) ------------------------------------------------------------------ # -------------------------------------------------------------------------------------------------------- bin_ranges = get_press_bin_ranges(100, 1000, bin_size=bin_size) bin_ranges = [] bin_ranges.append([50, 300]) bin_ranges.append([300, 400]) bin_ranges.append([400, 500]) bin_ranges.append([500, 600]) bin_ranges.append([600, 700]) bin_ranges.append([700, 800]) bin_ranges.append([800, 1000]) vld_bf = bfs[:, 3] == 0 keep_idxs = vld_bf amv_p = good_amvs[keep_idxs, pidx] bf_p = bfs[keep_idxs, 2] diff = amv_p - bf_p mad = np.average(np.abs(diff)) bias = np.average(diff) print('********************************************************') print('Number of good best fits: ', bf_p.shape[0]) print('press, MAD: {0:.2f}'.format(mad)) print('press, bias: {0:.2f}'.format(bias)) pd_std = np.std(diff) pd_mean = np.mean(diff) print('press bias/rms: {0:.2f} {1:.2f} '.format(pd_mean, np.sqrt(pd_mean**2 + pd_std**2))) print('------------------------------------------') bin_pres = bin_data_by(diff, amv_p, bin_ranges) amv_spd = good_amvs[keep_idxs, sidx] amv_dir = good_amvs[keep_idxs, didx] bf_spd, bf_dir = spd_dir_from_uv(bfs[keep_idxs, 0], bfs[keep_idxs, 1]) #bf_spd, bf_dir = bfs[keep_idxs, 0], bfs[keep_idxs, 1] diff = amv_spd * units('m/s') - bf_spd diff = diff.magnitude #diff = amv_spd - bf_spd spd_mad = np.average(np.abs(diff)) spd_bias = np.average(diff) print('spd, MAD: {0:.2f}'.format(spd_mad)) print('spd, bias: {0:.2f}'.format(spd_bias)) spd_mean = np.mean(diff) spd_std = np.std(diff) print('spd MAD/bias/rms: {0:.2f} {1:.2f} {2:.2f}'.format(np.average(np.abs(diff)), spd_mean, np.sqrt(spd_mean**2 + spd_std**2))) print('-----------------') bin_spd = bin_data_by(diff, amv_p, bin_ranges) dir_diff = direction_difference(amv_dir, bf_dir.magnitude) #dir_diff = direction_difference(amv_dir, bf_dir) print('dir, MAD: {0:.2f}'.format(np.average(np.abs(dir_diff)))) print('dir bias: {0:.2f}'.format(np.average(dir_diff))) print('-------------------------------------') bin_dir = bin_data_by(dir_diff, amv_p, bin_ranges) amv_u, amv_v = uv_from_spd_dir(good_amvs[keep_idxs, sidx], good_amvs[keep_idxs, didx]) u_diffs = amv_u - (bfs[keep_idxs, 0] * units('m/s')) v_diffs = amv_v - (bfs[keep_idxs, 1] * units('m/s')) vd = np.sqrt(u_diffs**2 + v_diffs**2) vd_mean = np.mean(vd) vd_std = np.std(vd) print('VD bias/rms: {0:.2f} {1:.2f}'.format(vd_mean, np.sqrt(vd_mean**2 + vd_std**2))) print('******************************************************') x_values = [] num_pres = [] num_spd = [] num_dir = [] print('level num cases hgt MAD/bias spd MAD/bias dir MAD/bias') print('-------------------------------------------------------------------') for i in range(len(bin_ranges)): x_values.append(np.average(bin_ranges[i])) num_pres.append(bin_pres[i].shape[0]) num_spd.append(bin_spd[i].shape[0]) num_dir.append(bin_dir[i].shape[0]) print('{0:d} {1:d} {2:.2f}/{3:.2f} {4:.2f}/{5:.2f} {6:.2f}/{7:.2f}' .format(int(x_values[i]), num_pres[i], np.average(np.abs(bin_pres[i])), np.average(bin_pres[i]), np.average(np.abs(bin_spd[i])), np.average(bin_spd[i]), np.average(np.abs(bin_dir[i])), np.average(bin_dir[i]))) return bin_ranges, bin_pres, bin_spd, bin_dir def make_plot(bin_ranges, bin_values): bin_vals_r = bin_values[0] bin_vals_g = bin_values[1] bin_vals = bin_values[2] x_values = [] num_vals_r = [] num_vals_g = [] num_vals = [] mad = [] bias = [] std = [] mad_r = [] bias_r = [] std_r = [] mad_g = [] bias_g = [] std_g = [] num_r = 0 num_g = 0 num = 0 for i in range(len(bin_ranges)): num_r += bin_vals_r[i].shape[0] num_g += bin_vals_g[i].shape[0] num += bin_vals[i].shape[0] for i in range(len(bin_ranges)): x_values.append(np.average(bin_ranges[i])) # num_vals_r.append((bin_vals_r[i].shape[0])/num_r) # num_vals_g.append((bin_vals_g[i].shape[0])/num_g) # num_vals.append((bin_vals[i].shape[0])/num) num_vals_r.append(np.log10(bin_vals_r[i].shape[0])) num_vals_g.append(np.log10(bin_vals_g[i].shape[0])) num_vals.append(np.log10(bin_vals[i].shape[0])) mad_r.append(np.average(np.abs(bin_vals_r[i]))) bias_r.append(np.average(bin_vals_r[i])) std_r.append(np.std(bin_vals_r[i])) mad_g.append(np.average(np.abs(bin_vals_g[i]))) bias_g.append(np.average(bin_vals_g[i])) std_g.append(np.std(bin_vals_g[i])) mad.append(np.average(np.abs(bin_vals[i]))) bias.append(np.average(bin_vals[i])) std.append(np.std(bin_vals[i])) title='ACHA - RAOB BestFit' x_axis_label = 'STD (deg)' do_plot(x_values, [std_r, std_g, std], ['ICE', 'NOT ICE', 'ALL'], ['blue', 'red', 'black'], title=title, x_axis_label=x_axis_label, y_axis_label='hPa', invert=True, flip=True) x_axis_label = 'BIAS (deg)' do_plot(x_values, [bias_r, bias_g, bias], ['ICE', 'NOT ICE', 'ALL'], ['blue', 'red', 'black'], title=title, x_axis_label=x_axis_label, y_axis_label='hPa', invert=True, flip=True) x_axis_label = 'MAD (deg)' do_plot(x_values, [mad_r, mad_g, mad], ['ICE', 'NOT ICE', 'ALL'], ['blue', 'red', 'black'], title=title, x_axis_label=x_axis_label, y_axis_label='hPa', invert=True, flip=True) do_plot(x_values, [num_vals_r, num_vals_g, num_vals], ['ICE: '+str(num_r), 'NOT ICE: '+str(num_g), 'ALL: '+str(num)], ['blue', 'red', 'black'], title=title, x_axis_label='log(Count)', y_axis_label='hPa', invert=True, flip=True) def make_plot2(bin_pres_r, bin_ranges): x_values = [] num_pres_r = [] num_pres_g = [] num_spd = [] num_dir = [] pres_mad_r = [] pres_bias_r = [] pres_mad_g = [] pres_bias_g = [] spd_mad_r = [] spd_bias_r = [] spd_mad_g = [] spd_bias_g = [] num_r = 0 for i in range(len(bin_ranges)): num_r += bin_pres_r[i].shape[0] for i in range(len(bin_ranges)): x_values.append(np.average(bin_ranges[i])) num_pres_r.append((bin_pres_r[i].shape[0])/num_r) pres_mad_r.append(np.average(np.abs(bin_pres_r[i]))) pres_bias_r.append(np.average(bin_pres_r[i])) #spd_mad_r.append(np.average(np.abs(bin_spd_r[i]))) #spd_bias_r.append(np.average(bin_spd_r[i])) do_plot(x_values, [pres_mad_r], ['RAOB'], ['blue'], title='ACHA - BestFit', x_axis_label='MAD', y_axis_label='hPa', invert=True, flip=True) #do_plot(x_values, [pres_mad_r, pres_mad_g], ['RAOB', 'GFS'], ['blue', 'red'], title='ACHA - BestFit', x_axis_label='MAD', y_axis_label='hPa', invert=True, flip=True) #do_plot(x_values, [spd_mad_r, spd_mad_g], ['RAOB', 'GFS'], ['blue', 'red'], title='ACHA - BestFit', x_axis_label='MAE (m/s)', y_axis_label='hPa', invert=True, flip=True) #do_plot(x_values, [pres_bias_r, pres_bias_g], ['RAOB', 'GFS'], ['blue', 'red'], title='ACHA - BestFit', x_axis_label='BIAS', y_axis_label='hPa', invert=True, flip=True) #do_plot(x_values, [spd_bias_r, spd_bias_g], ['RAOB', 'GFS'], ['blue', 'red'], title='ACHA - BestFit', x_axis_label='BIAS (m/s)', y_axis_label='hPa', invert=True, flip=True) #do_plot(x_values, [num_pres_r, num_pres_g], ['RAOB:'+str(num_r), 'GFS:'+str(num_g)], ['blue', 'red'], x_axis_label='Normalized Count', y_axis_label='hPa', invert=True, flip=True) def make_plot3(amvs_list, bfs_list): num = len(amvs_list) bin_ranges = get_press_bin_ranges(100, 1000, bin_size=100) y_values = [] x_values = None line_labels = [] colors = [] for k in range(num): amvs = amvs_list[k] bfs = bfs_list[k] bin_ranges, bin_pres, bin_spd, bin_dir = compare_amvs_bestfit(amvs, bfs, bin_size=100) #diff = amvs[:, 4] - amvs[:, 7] #bin_pres = bin_data_by(diff, amvs[:, 4], bin_ranges) pres_mad = [] x_values = [] for i in range(len(bin_ranges)): x_values.append(np.average(bin_ranges[i])) #num_pres_r.append((bin_pres[i].shape[0]) / num_r) pres_mad.append(np.average(np.abs(bin_pres[i]))) #pres_bias.append(np.average(bin_pres[i])) line_labels.append(None) colors.append(None) y_values.append(pres_mad) do_plot(x_values, y_values, line_labels, colors, title='Daily ACHA - BestFit RAOB', x_axis_label='MAD', y_axis_label='hPa', invert=True, flip=True) # imports the S4 NOAA output # filename: full path as a string, '/home/user/filename' # returns a dict: time -> list of profiles (a profile is a list of levels) def get_aeolus_time_dict(filename, lon360=False, do_sort=True): time_dict = {} with open(filename) as file: while True: prof_hdr = file.readline() if not prof_hdr: break toks = prof_hdr.split() yr = int(float(toks[0])) mon = int(float(toks[1])) dy = int(float(toks[2])) hr = int(float(toks[3])) mn = int(float(toks[4])) ss = int(float(toks[5])) lon = float(toks[6]) lat = float(toks[7]) nlevs = int(toks[8]) if lon360: if lon < 0: lon += 360.0 else: if lon > 180.0: lon -= 360.0 dto = datetime.datetime(year=yr, month=mon, day=dy, hour=hr, minute=mn, second=ss) dto = dto.replace(tzinfo=timezone.utc) timestamp = dto.timestamp() prof = [] if time_dict.get(timestamp, -1) == -1: prof_s = [] prof_s.append(prof) time_dict[timestamp] = prof_s else: prof_s = time_dict.get(timestamp) prof_s.append(prof) for k in range(nlevs): line = file.readline() toks = line.split() lvlidx = int(toks[0]) hhh = float(toks[1]) * 1000.0 hht = float(toks[2]) * 1000.0 hhb = float(toks[3]) * 1000.0 err = float(toks[4]) azm = float(toks[5]) ws = float(toks[6]) len = float(toks[7]) tup = (lat, lon, hhh, hht, hhb, azm, ws) prof.append(tup) file.close() if do_sort: keys = np.array(list(time_dict.keys())) keys.sort() keys = keys.tolist() sorted_time_dict = {} for key in keys: sorted_time_dict[key] = time_dict.get(key) time_dict = sorted_time_dict return time_dict # make each profile at a timestamp a numpy array def time_dict_to_nd_0(time_dict): keys = list(time_dict.keys()) for key in keys: vals = time_dict[key] if vals is not None: nda = np.array(vals[0]) # only take one profile per second time_dict[key] = nda return time_dict # # make each profile at a timestamp a numpy array def time_dict_to_nd(time_dict): keys = list(time_dict.keys()) for key in keys: prof_s = time_dict[key] if prof_s is not None: for i in range(len(prof_s)): nda = np.stack(prof_s[i]) prof_s[i] = nda return time_dict def time_dict_to_nd_2(time_dict): keys = list(time_dict.keys()) for key in keys: vals = time_dict[key] if vals is not None: time_dict[key] = np.stack(vals) return time_dict def concat(t_dct_0, t_dct_1): keys_0 = list(t_dct_0.keys()) nda_0 = np.array(keys_0) keys_1 = list(t_dct_1.keys()) nda_1 = np.array(keys_1) comm_keys, comm0, comm1 = np.intersect1d(nda_0, nda_1, return_indices=True) comm_keys = comm_keys.tolist() for key in comm_keys: t_dct_1.pop(key) t_dct_0.update(t_dct_1) return t_dct_0 def get_aeolus_time_dict_s(files_path, lon360=False, do_sort=True, chan='mie'): ftimes = [] fnames = glob.glob(files_path + chan + '1day.out.*') time_dct = {} for pathname in fnames: fname = os.path.split(pathname)[1] toks = fname.split('.') dstr = toks[2] dto = datetime.datetime.strptime(dstr, '%Y-%m-%d').replace(tzinfo=timezone.utc) ts = dto.timestamp() ftimes.append(ts) time_dct[ts] = pathname sorted_filenames = [] ftimes.sort() for t in ftimes: sorted_filenames.append(time_dct.get(t)) dct_s = [] for fname in sorted_filenames: a_dct = get_aeolus_time_dict(fname, lon360=lon360, do_sort=do_sort) dct_s.append(a_dct) t_dct = dct_s[0] for dct in dct_s[1:]: concat(t_dct, dct) return t_dct def time_dict_to_cld_layers(time_dict): time_dict_layers = {} keys = list(time_dict.keys()) for key in keys: prof_s = time_dict[key] layers = [] prof = prof_s[0] if len(prof) == 1: tup = prof[0] layers.append((tup[0], tup[1], tup[3], tup[4])) time_dict_layers[key] = layers continue top = -9999.9 last_bot = -9999.9 tup = None for i in range(len(prof)): tup = prof[i] if i == 0: top = tup[3] bot = tup[4] last_bot = bot else: if math.fabs(last_bot - tup[3]) > 10.0: layers.append((tup[0], tup[1], top, last_bot)) top = tup[3] last_bot = tup[4] layers.append((tup[0], tup[1], top, tup[4])) time_dict_layers[key] = layers return time_dict_layers def get_cloud_layers_dict(filename, lon360=False): a_d = get_aeolus_time_dict(filename, lon360=lon360) c_d = time_dict_to_cld_layers(a_d) cld_lyr_dct = time_dict_to_nd_2(c_d) return cld_lyr_dct def get_cloud_layers_dict_s(aeolus_files_dir, lon360=False): a_d = get_aeolus_time_dict_s(aeolus_files_dir, lon360=lon360, do_sort=True, chan='mie') cld_lyr_dct = time_dict_to_cld_layers(a_d) cld_lyr_dct = time_dict_to_nd_2(cld_lyr_dct) return cld_lyr_dct def match_aeolus_to_clavrx(aeolus_dict, clvrx_files): nav = get_navigation() #clvrx_params = clvrx_files.get_parameters() clvrx_params = ['cld_height_acha', 'cld_press_acha', 'cld_temp_acha'] match_dict = {} keys = list(aeolus_dict.keys()) last_f_idx = -1 cnt = 0 param_nd = None for key in keys: fname, ftime, f_idx = clvrx_files.get_file_containing_time(key) if f_idx is None: continue prof_s = aeolus_dict.get(key) if prof_s is None: continue if f_idx != last_f_idx: last_f_idx = f_idx ds = Dataset(fname) param_s = [] for param in clvrx_params: param_s.append(ds[param][:,:]) param_nd = np.stack(param_s, axis=0) ds.close() for prof in prof_s: lat = prof[0, 0] lon = prof[0, 1] cc, ll = nav.earth_to_lc(lon, lat) if cc is None or ll is None: continue # c_rng, l_rng = get_search_box(nav, lon, lat) # if c_rng is None: # continue param_nd = param_nd[:, cc-4:cc+5, ll-4:ll+5] match_dict[cnt] = (key, cc, ll, f_idx, prof, param_nd) cnt += 1 return match_dict def create_file_2(match_dct, filename, clvrx_params=['cld_height_acha', 'cld_press_acha', 'cld_temp_acha']): grd_x_len = 7 grd_y_len = 7 num_aparams = 7 num_aprofs = len(match_dct) max_num_alevels = 0 alons = [] alats = [] atimes = [] elems = [] lines = [] # scan to get max num levels keys = list(match_dct.keys()) for key in keys: tup = match_dct.get(key) prof = tup[4] lat = prof[0, 0] lon = prof[0, 1] alons.append(lon) alats.append(lat) atimes.append(tup[0]) elems.append(tup[1]) lines.append(tup[2]) nlevs = prof.shape[0] if nlevs > max_num_alevels: max_num_alevels = nlevs alons = np.array(alons) alats = np.array(alats) atimes = np.array(atimes) elems = np.array(elems) lines = np.array(lines) # the top level group for the output file rootgrp = Dataset(filename, 'w', format='NETCDF4') dim_amvs = rootgrp.createDimension('num_aeolus_params', size=num_aparams) dim_alevs = rootgrp.createDimension('max_num_aeolus_levels', size=max_num_alevels) dim_num_aeolus_prof = rootgrp.createDimension('num_aeolus_profs', size=num_aprofs) dim_grd_x = rootgrp.createDimension('grd_x_len', size=grd_x_len) dim_grd_y = rootgrp.createDimension('grd_y_len', size=grd_y_len) prf_time = rootgrp.createVariable('time', 'f4', ['num_aeolus_profs']) # ---- Profile variables --------------- prf_lon = rootgrp.createVariable('prof_longitude', 'f4', ['num_aeolus_profs']) prf_lon.units = 'degrees east' prf_lat = rootgrp.createVariable('prof_latitude', 'f4', ['num_aeolus_profs']) prf_lat.units = 'degrees north' prf_time.units = 'seconds since 1970-01-1 00:00:00' prf_elem = rootgrp.createVariable('FD_elem', 'f4', ['num_aeolus_profs']) prf_line = rootgrp.createVariable('FD_line', 'f4', ['num_aeolus_profs']) prf_azm = rootgrp.createVariable('prof_azm', 'f4', ['num_aeolus_profs', 'max_num_aeolus_levels']) prf_azm.units = 'degree' prf_spd = rootgrp.createVariable('prof_spd', 'f4', ['num_aeolus_profs', 'max_num_aeolus_levels']) prf_spd.units = 'm s-1' prf_hht = rootgrp.createVariable('prof_hht', 'f4', ['num_aeolus_profs', 'max_num_aeolus_levels']) prf_hht.units = 'meter' prf_hhb = rootgrp.createVariable('prof_hhb', 'f4', ['num_aeolus_profs', 'max_num_aeolus_levels']) prf_hhb.units = 'meter' # ----- Product variables ---------------- nc4_vars = [] #out_params = amv_files.get_out_parameters() #meta_dict = amv_files.get_meta_dict() for pidx, param in enumerate(clvrx_params): #u, t = meta_dict.get(param) u = None t = 'f4' var = rootgrp.createVariable(param, t, ['num_aeolus_profs', 'grd_y_len', 'grd_x_len']) if u is not None: var.units = u nc4_vars.append(var) # Write data to file... for idx, key in enumerate(keys): tup = match_dct.get(key) prof = tup[4] param_nd = tup[5] for pidx, param in enumerate(clvrx_params): nc4_vars[pidx][idx, :, :] = param_nd[pidx, :, :] prf_spd[idx,:] = prof[:,6] prf_azm[idx,:] = prof[:,5] prf_hht[idx,:] = prof[:,3] prf_hhb[idx,:] = prof[:,4] prf_lon[:] = alons prf_lat[:] = alats prf_elem[:] = elems prf_line[:] = lines prf_time[:] = atimes rootgrp.close() def get_search_box(nav, lon, lat): cc, ll = nav.earth_to_lc(lon, lat) if cc is None: return None, None c_rng = [cc - half_width, cc + half_width] l_rng = [ll - half_width, ll + half_width] if c_rng[0] < 0: c_rng[0] = 0 if l_rng[0] < 0: l_rng[0] = 0 if c_rng[1] >= num_elems: c_rng[1] = num_elems - 1 if l_rng[1] >= num_lines: l_rng[1] = num_lines - 1 return c_rng, l_rng # aeolus_dict: time -> profiles # amv_files_path: directory containing AMVs, '/home/user/amvdir/' # return dict: aeolus time -> tuple (amv_lon, amv_lat, amv_pres, amv_spd, amv_dir) def match_amvs_to_aeolus(aeolus_dict, amv_files_path, amv_source='OPS', band='14', amv_files=None): nav = amv_files.get_navigation() amv_params = amv_files.get_parameters() match_dict = {} keys = list(aeolus_dict.keys()) last_f_idx = -1 for key in keys: fname, ftime, f_idx = amv_files.get_file_containing_time(key) if f_idx is None: continue profs = aeolus_dict.get(key) if profs is None: continue layers = profs lat = layers[0, 0] lon = layers[0, 1] c_rng, l_rng = get_search_box(nav, lon, lat) if c_rng is None: continue if f_idx != last_f_idx: last_f_idx = f_idx ds = Dataset(fname) amv_lons = ds[amv_files.lon_name][:] amv_lats = ds[amv_files.lat_name][:] cc = ds[amv_files.elem_name][:] ll = ds[amv_files.line_name][:] # cc, ll = nav.earth_to_lc_s(amv_lons, amv_lats) param_s = [] param_s.append(amv_lons) param_s.append(amv_lats) param_s.append(cc) param_s.append(ll) for param in amv_params: if param == 'V_3D': param_s.append(ds[param][:, 0]) param_s.append(ds[param][:, 1]) else: param_s.append(ds[param][:]) ds.close() in_cc = np.logical_and(cc > c_rng[0], cc < c_rng[1]) in_ll = np.logical_and(ll > l_rng[0], ll < l_rng[1]) in_box = np.logical_and(in_cc, in_ll) num_amvs = np.sum(in_box) if num_amvs == 0: continue # dist = haversine_np(lon, lat, amv_lons[in_box], amv_lats[in_box]) param_nd = np.vstack(param_s) param_nd = param_nd[:, in_box] match_dict[key] = param_nd return match_dict # full path as string filename to create, '/home/user/newfilename' # aeolus_to_amv_dct: output from match_amvs_to_aeolus # aeolus_dct: output from get_aeolus_time_dict # amv_files: container representing specific AMV product info def create_file(filename, aeolus_to_amv_dct, aeolus_dct, amv_files, cld_lyr=False): keys = list(aeolus_to_amv_dct.keys()) num_amvs = [] num_levs = [] times = [] namvs = 0 nlevs = 0 for key in keys: param_nd = aeolus_to_amv_dct.get(key) num_amvs.append(param_nd.shape[1]) namvs += param_nd.shape[1] prof = aeolus_dct.get(key) num_levs.append(prof.shape[0]) nlevs += prof.shape[0] times.append(key) amv_per_alus = len(aeolus_to_amv_dct) rootgrp = Dataset(filename, 'w', format='NETCDF4') dim_amvs = rootgrp.createDimension('amvs', size=namvs) dim_alus = rootgrp.createDimension('profs', size=nlevs) dim_num_aeolus_prof = rootgrp.createDimension('num_aeolus_profs', size=len(aeolus_to_amv_dct)) nc4_vars = [] out_params = amv_files.get_out_parameters() meta_dict = amv_files.get_meta_dict() for pidx, param in enumerate(out_params): u, t = meta_dict.get(param) var = rootgrp.createVariable(param, t, ['amvs']) if u is not None: var.units = u nc4_vars.append(var) dist = rootgrp.createVariable('dist_to_prof', 'f4', ['amvs']) dist.units = 'km' num_amvs_per_prof = rootgrp.createVariable('num_amvs_per_prof', 'i4', ['num_aeolus_profs']) num_levs_per_prof = rootgrp.createVariable('num_levs_per_prof', 'i4', ['num_aeolus_profs']) prof_time = rootgrp.createVariable('time', 'f4', ['num_aeolus_profs']) # ---- Profile variables --------------- prf_lon = rootgrp.createVariable('prof_longitude', 'f4', ['num_aeolus_profs']) prf_lon.units = 'degrees east' prf_lat = rootgrp.createVariable('prof_latitude', 'f4', ['num_aeolus_profs']) prf_lat.units = 'degrees north' prof_time.units = 'seconds since 1970-01-1 00:00:00' if not cld_lyr: prf_azm = rootgrp.createVariable('prof_azm', 'f4', ['profs']) prf_azm.units = 'degree' prf_spd = rootgrp.createVariable('prof_spd', 'f4', ['profs']) prf_spd.units = 'm s-1' prf_hht = rootgrp.createVariable('prof_hht', 'f4', ['profs']) prf_hht.units = 'meter' prf_hhb = rootgrp.createVariable('prof_hhb', 'f4', ['profs']) prf_hhb.units = 'meter' # -------------------------------------- i_a = 0 i_c = 0 for idx, key in enumerate(keys): namvs = num_amvs[idx] nlevs = num_levs[idx] i_b = i_a + namvs i_d = i_c + nlevs prof = aeolus_dct.get(key) if not cld_lyr: prf_hht[i_c:i_d] = prof[:, 3] prf_hhb[i_c:i_d] = prof[:, 4] prf_azm[i_c:i_d] = prof[:, 5] prf_spd[i_c:i_d] = prof[:, 6] else: prf_hht[i_c:i_d] = prof[:, 2] prf_hhb[i_c:i_d] = prof[:, 3] i_c += nlevs plat = prof[0, 0] plon = prof[0, 1] prf_lat[idx::] = plat prf_lon[idx::] = plon param_nd = aeolus_to_amv_dct.get(key) for pidx, param in enumerate(out_params): nc4_vars[pidx][i_a:i_b] = param_nd[pidx, :] dist[i_a:i_b] = haversine_np(plon, plat, param_nd[0, :], param_nd[1, :]) i_a += namvs num_amvs_per_prof[:] = num_amvs num_levs_per_prof[:] = num_levs prof_time[:] = times rootgrp.close() # aeolus_files_dir: S4 NOAA txt output files # amv_files_dir: G16/17 AMV product files # outfile: pathname for the Netcdf match file def create_amv_to_aeolus_match_file(aeolus_files_dir, amv_files_dir, outfile=None, amv_source='OPS', band='14', chan='mie'): if chan == 'mie': a_d = get_cloud_layers_dict_s(aeolus_files_dir) else: a_d = get_aeolus_time_dict_s(aeolus_files_dir, chan=chan) a_d = time_dict_to_nd_0(a_d) amv_files = None if amv_source == 'CARR': amv_files = get_datasource(amv_files_dir, 60, 'CARR', band=band) m_d = match_amvs_to_aeolus(a_d, amv_files_dir, amv_source, band, amv_files) if outfile is not None: cld_lyr = False if chan == 'mie': cld_lyr = True create_file(outfile, m_d, a_d, amv_files, cld_lyr=cld_lyr) # match_file: pathname for the product file # dt_str_0: start time (YYYY-MM-DD_HH:MM) # dt_str_1: end time (YYYY-MM-DD_HH:MM) # amv_var_names: list of amv parameters (see match_file ncdump) to subset # returns: Xarray.DataArray # amvs[nprofs, max_num_amvs_per_prof, num_of_params], profs[nprofs, max_num_levs_per_prof, num_of_params], # prof_locs[nprofs, (lon, lat) def subset_by_time(match_file, dt_str_0, dt_str_1, amv_var_names): rootgrp = Dataset(match_file, 'r', format='NETCDF4') all_dims = rootgrp.dimensions t_var = rootgrp['time'] n_profs = len(all_dims['num_aeolus_profs']) n_amvs_per_prof = rootgrp['num_amvs_per_prof'][:] n_levs_per_prof = rootgrp['num_levs_per_prof'][:] a_nc_vars = [] for vname in amv_var_names: a_nc_vars.append(rootgrp[vname]) nvars = len(a_nc_vars) mf_vars = list(rootgrp.variables.keys()) p_lon_v = rootgrp['prof_longitude'] p_lat_v = rootgrp['prof_latitude'] p_vars = [] p_var_names = [] if 'prof_hhb' in mf_vars: p_vars.append(rootgrp['prof_hhb']) p_var_names.append('layer_bot') if 'prof_hht' in mf_vars: p_vars.append(rootgrp['prof_hht']) p_var_names.append('layer_top') if 'prof_spd' in mf_vars: p_vars.append(rootgrp['prof_spd']) p_var_names.append('speed') if 'prof_azm' in mf_vars: p_vars.append(rootgrp['prof_azm']) p_var_names.append('azimuth') npvars = len(p_vars) dto = datetime.datetime.strptime(dt_str_0, '%Y-%m-%d_%H:%M').replace(tzinfo=timezone.utc) dto.replace(tzinfo=timezone.utc) t_0 = dto.timestamp() dto = datetime.datetime.strptime(dt_str_1, '%Y-%m-%d_%H:%M').replace(tzinfo=timezone.utc) dto.replace(tzinfo=timezone.utc) t_1 = dto.timestamp() if t_1 < t_0: t = t_0 t_1 = t_0 t_0 = t times = t_var[:] time_idxs = np.arange(n_profs) valid = np.logical_and(times >= t_0, times < t_1) time_idxs = time_idxs[valid] n_times = time_idxs.shape[0] lons = p_lon_v[:] lats = p_lat_v[:] prf_idx_start = np.sum(n_levs_per_prof[0:time_idxs[0]]) amv_idx_start = np.sum(n_amvs_per_prof[0:time_idxs[0]]) mx_namvs = np.max(n_amvs_per_prof[time_idxs[0]:time_idxs[0]+n_times]) mx_nlevs = np.max(n_levs_per_prof[time_idxs[0]:time_idxs[0]+n_times]) amvs = np.zeros((n_times, mx_namvs, nvars)) profs = np.zeros((n_times, mx_nlevs, npvars)) amvs.fill(-999.0) profs.fill(-999.0) accum_prf = prf_idx_start accum_amv = amv_idx_start for idx, t_i in enumerate(time_idxs): n_amvs = n_amvs_per_prof[t_i] n_levs = n_levs_per_prof[t_i] a = accum_amv b = accum_amv + n_amvs c = accum_prf d = accum_prf + n_levs for k in range(nvars): amvs[idx, 0:n_amvs, k] = a_nc_vars[k][a:b] for k in range(npvars): profs[idx, 0:n_levs, k] = p_vars[k][c:d] accum_amv += n_amvs accum_prf += n_levs coords = {'num_profs': times[time_idxs], 'num_params': p_var_names} prof_da = xr.DataArray(profs, coords=coords, dims=['num_profs', 'max_num_levels', 'num_params']) coords = {'num_profs': times[time_idxs], 'num_params': amv_var_names} amvs_da = xr.DataArray(amvs, coords=coords, dims=['num_profs', 'max_num_amvs', 'num_params']) prof_locs_da = xr.DataArray(np.column_stack([lons[time_idxs], lats[time_idxs], n_amvs_per_prof[time_idxs], n_levs_per_prof[time_idxs]]), coords=[times[time_idxs], ['longitude', 'latitude', 'num_amvs', 'num_levels']], dims=['num_profs', 'space']) return prof_da, amvs_da, prof_locs_da def analyze(prof_da, amvs_da, prof_locs_da, dist_threshold=5.0): # sort amvs by distance to profile dst = amvs_da.sel(num_params='dist_to_prof') s_i = np.argsort(dst, axis=1) s_i = s_i.values for k in range(amvs_da.shape[0]): amvs_da[k] = amvs_da[k, s_i[k]] # sort profiles by level highest to lowest top = prof_da.sel(num_params='layer_top') s_i = np.argsort(top, axis=1) s_i = s_i.values for k in range(prof_da.shape[0]): prof_da[k] = prof_da[k, s_i[k, ::-1]] # analyze single cloud layer profiles one_lyr = prof_locs_da.sel(space='num_levels') == 1 one_lyr_profs = prof_da[one_lyr] one_lyr_amvs = amvs_da[one_lyr] print('number of one layer profs: ', one_lyr_profs.shape[0]) hgt_vld = one_lyr_amvs.sel(num_params='H_3D') > 0 hgt_vld = hgt_vld.values has_vld = hgt_vld.sum(1) > 0 one_lyr_amvs = one_lyr_amvs[has_vld] one_lyr_profs = one_lyr_profs[has_vld] print('number of one layer profs with at least one valid AMV height', one_lyr_profs.shape[0]) # compare profile highest cloud layer top to closest within 5km dst = one_lyr_amvs.sel(num_params='dist_to_prof') close = (np.logical_and(dst > 0.0, dst < dist_threshold)).values close = close.sum(1) > 0 one_lyr_amvs = one_lyr_amvs[close] one_lyr_profs = one_lyr_profs[close] num_one_lyr_profs = one_lyr_profs.shape[0] print('number of one layer profs with at least one AMV within threshold: ', num_one_lyr_profs) cnt = 0 prof_bot = one_lyr_profs.sel(num_params='layer_bot') prof_top = one_lyr_profs.sel(num_params='layer_top') for k in range(num_one_lyr_profs): dst = one_lyr_amvs[k, :, ].sel(num_params='dist_to_prof') b = np.logical_and(dst > 0.0, dst < dist_threshold) h_3d = one_lyr_amvs[k, :].sel(num_params='H_3D') h_3d = h_3d[b] vld = h_3d > 0 h_3d = h_3d[vld] if len(h_3d) > 0: in_lyr = np.logical_and(h_3d > prof_bot[k, 0], h_3d < prof_top[k, 0]) cnt += np.sum(in_lyr) print('fraction hits single cloud layer: ', cnt/num_one_lyr_profs) # Do calculations for single and multi-layer combined hgt_vld = amvs_da.sel(num_params='H_3D') > 0 hgt_vld = hgt_vld.values has_vld = hgt_vld.sum(1) > 0 amvs_da = amvs_da[has_vld] prof_da = prof_da[has_vld] prof_locs_da = prof_locs_da[has_vld] print('number of profs with at least one valid AMV height', prof_da.shape[0]) # compare profile highest cloud layer top to closest within 5km dst = amvs_da.sel(num_params='dist_to_prof') close = (np.logical_and(dst > 0.0, dst < dist_threshold)).values close = close.sum(1) > 0 amvs_da = amvs_da[close] prof_da = prof_da[close] prof_locs_da = prof_locs_da[close] num_profs = prof_da.shape[0] print('number of profs with at least one AMV within 5km: ', num_profs) cnt = 0 prof_bot = prof_da.sel(num_params='layer_bot') prof_top = prof_da.sel(num_params='layer_top') for k in range(num_profs): dst = amvs_da[k, :, ].sel(num_params='dist_to_prof') b = np.logical_and(dst > 0.0, dst < dist_threshold) h_3d = amvs_da[k, :].sel(num_params='H_3D') h_3d = h_3d[b] vld = h_3d > 0 h_3d = h_3d[vld] if len(h_3d) > 0: nlevs = prof_locs_da[k].values.astype(int)[3] for j in range(nlevs): in_lyr = np.logical_and(h_3d > prof_bot[k, j], h_3d < prof_top[k, j]) cnt += np.sum(in_lyr) print('fraction hits multi layer: ', cnt/num_profs) return one_lyr_profs, one_lyr_amvs