import os import h5py import numpy as np import xarray as xr from netCDF4 import Dataset from aeolus.datasource import get_datasource, CLAVRx_CALIPSO, get_parameters_caliop_clavrx from util.util import haversine_np from amv.intercompare import * amv_file_duration = 60 # minutes half_width = 30 # search box centered on CALIOP profile (FGF coordinates) num_elems = 5424 num_lines = 5424 amv_fmwk_exmpl_file = '/ships19/cloud/scratch/4TH_AMV_INTERCOMPARISON/FMWK2_AMV/GOES16_ABI_2KM_FD_2019293_0020_34_WINDS_AMV_EN-14CT.nc' amv_ops_exmpl_file = '/ships19/cloud/scratch/AMV_FAST_BIAS/OPS_AMVS/OR_ABI-L2-DMWF-M6C14_G16_s20201050000166_e20201050009474_c20201050023226.nc' #caliop_clvrx_exmpl_file = '/data/Personal/rink/clavrx_calipso/g16_s20201050200_06kmCLay.matchup.calipso.h5' caliop_clvrx_exmpl_file = '/data/Personal/rink/4TH_AMV_INTERCOMPARISON/clavrx_calipso/g16_s20192930150_06kmCLay.matchup.calipso.h5' def get_search_box(cc, ll): c_rng = [cc - half_width, cc + half_width] l_rng = [ll - half_width, ll + half_width] if c_rng[0] < 0: c_rng[0] = 0 if l_rng[0] < 0: l_rng[0] = 0 if c_rng[1] >= num_elems: c_rng[1] = num_elems - 1 if l_rng[1] >= num_lines: l_rng[1] = num_lines - 1 return c_rng, l_rng def match_calipso_clavrx_to_amvs(calipso_clavrx_ds, calipso_clavrx_file, amv_files): nav = amv_files.get_navigation() amv_params = amv_files.get_parameters() all_params = [amv_files.lon_name, amv_files.lat_name, amv_files.elem_name, amv_files.line_name] + amv_params match_dict = {} coords = {'num_params': all_params} dims = ['num_params', 'num_amvs'] nom_time = calipso_clavrx_ds.get_datetime(calipso_clavrx_file).timestamp() calipso_clavrx_nc4 = Dataset(calipso_clavrx_file) amv_fname, ftime, f_idx = amv_files.get_file(nom_time) if f_idx is None: return None match_dict[nom_time] = [] # amv_ds = Dataset(amv_fname) # amv_lons = amv_ds[amv_files.lon_name][:] # amv_lats = amv_ds[amv_files.lat_name][:] # if amv_files.elem_name is not None: # amv_cc = amv_ds[amv_files.elem_name][:] # amv_ll = amv_ds[amv_files.line_name][:] # else: # amv_cc, amv_ll = nav.earth_to_lc_s(amv_lons, amv_lats) # # param_s = [] # param_s.append(amv_lons) # param_s.append(amv_lats) # param_s.append(amv_cc) # param_s.append(amv_ll) # for param in amv_params: # if param == 'V_3D': # param_s.append(amv_ds[param][:, 0]) # param_s.append(amv_ds[param][:, 1]) # else: # param_s.append(amv_ds[param][:]) # # amv_ds.close() # -------------------------------------------------------------- amvs_nd = get_amv_nd(amv_fname, delimiter=',') amvs_nd = filter_amvs(amvs_nd) amv_lons = amvs_nd[:, amv_lon_idx] amv_lats = amvs_nd[:, amv_lat_idx] amv_cc, amv_ll = nav.earth_to_lc_s(amv_lons, amv_lats) param_s = [] param_s.append(amv_lons) param_s.append(amv_lats) param_s.append(amv_cc) param_s.append(amv_ll) param_s.append(amvs_nd[:, amv_pres_idx]) param_s.append(amvs_nd[:, amv_spd_idx]) param_s.append(amvs_nd[:, amv_dir_idx]) # ----------------------------------------------------------------- calipso_clavrx_params = get_parameters_caliop_clavrx(filename='/data/Personal/rink/4TH_AMV_INTERCOMPARISON/clavrx_calipso/g16_s20192930150_06kmCLay.matchup.calipso.h5') coords_a = {'num_calipso_clavrx_params': calipso_clavrx_params} dims_a = ['num_calipso_clavrx_params'] calipso_clavrx_data = [] for pname in calipso_clavrx_params: calipso_clavrx_data.append(calipso_clavrx_nc4[pname][:]) lat_s = calipso_clavrx_nc4['latitude'][:] lon_s = calipso_clavrx_nc4['longitude'][:] xs = calipso_clavrx_nc4['x'][:] ys = calipso_clavrx_nc4['y'][:] clvr_xy_s = zip(xs, ys) for idx, clvr_xy in enumerate(clvr_xy_s): lon = lon_s[idx] lat = lat_s[idx] cc, ll = clvr_xy c_rng, l_rng = get_search_box(cc, ll) in_cc = np.logical_and(amv_cc > c_rng[0], amv_cc < c_rng[1]) in_ll = np.logical_and(amv_ll > l_rng[0], amv_ll < l_rng[1]) in_box = np.logical_and(in_cc, in_ll) num_amvs = np.sum(in_box) if num_amvs == 0: continue dist = haversine_np(lon, lat, amv_lons[in_box], amv_lats[in_box]) s_idxs = np.argsort(dist) param_nd = np.vstack(param_s) param_nd = param_nd[:, in_box] param_nd = param_nd[:, s_idxs] dist_to_amvs = dist[s_idxs] amvs_da = xr.DataArray(param_nd, coords=coords, dims=dims) data_nd = np.vstack(calipso_clavrx_data) data_da = xr.DataArray(data_nd[:, idx], coords=coords_a, dims=dims_a) match_dict[nom_time].append((cc, ll, lon, lat, f_idx, data_da, amvs_da, dist_to_amvs)) return match_dict def create_file(match_dct, output_path, target_filepath, caliop_clavrx_params, amv_params, amv_filenames): num_aprofs = 0 max_num_amvs = 0 alons = [] alats = [] atimes = [] elems = [] lines = [] fidxs = [] amvs_cnt = [] filename = os.path.split(target_filepath)[1] w_o_ext, ext = os.path.splitext(filename) filename = w_o_ext+'.amvs'+'.nc' output_filepath = output_path+filename amv_file_s = np.array(amv_filenames, dtype='object') # scan to get max num levels, amvs keys = list(match_dct.keys()) for key in keys: tup_s = match_dct.get(key) for tup in tup_s: num_aprofs += 1 atimes.append(key) elems.append(tup[0]) lines.append(tup[1]) alons.append(tup[2]) alats.append(tup[3]) fidxs.append(tup[4]) amvs = tup[6] num_amvs = amvs.shape[1] amvs_cnt.append(num_amvs) if num_amvs > max_num_amvs: max_num_amvs = num_amvs alons = np.array(alons) alats = np.array(alats) atimes = np.array(atimes) elems = np.array(elems) lines = np.array(lines) fidxs = np.array(fidxs) amvs_cnt = np.array(amvs_cnt) # Sample file to retrieve and copy variable attributes rg_exmpl = Dataset(amv_ops_exmpl_file) caliop_clavrx_exmpl = Dataset(caliop_clvrx_exmpl_file, 'r') # the top level group for the output file rootgrp = Dataset(output_filepath, 'w', format='NETCDF4') dim_amvs = rootgrp.createDimension('max_num_amvs', size=max_num_amvs) dim_num_aeolus_prof = rootgrp.createDimension('num_caliop_profs', size=num_aprofs) dim_num_files = rootgrp.createDimension('num_amv_files', size=amv_file_s.shape[0]) prf_time = rootgrp.createVariable('time', 'f4', ['num_caliop_profs']) amv_file_names = rootgrp.createVariable('amv_file_names', str, ['num_amv_files']) # ---- CALIOP variables --------------- prf_lon = rootgrp.createVariable('caliop_longitude', 'f4', ['num_caliop_profs']) prf_lon.units = 'degrees east' prf_lat = rootgrp.createVariable('caliop_latitude', 'f4', ['num_caliop_profs']) prf_lat.units = 'degrees north' prf_time.units = 'seconds since 1970-01-1 00:00:00' prf_elem = rootgrp.createVariable('FD_elem', 'f4', ['num_caliop_profs']) prf_line = rootgrp.createVariable('FD_line', 'f4', ['num_caliop_profs']) amvs_cnt_var = rootgrp.createVariable('num_amvs_per_match', 'i4', ['num_caliop_profs']) # ----- CALIOP+CLAVRx variables ------ nc4_vars_cc = [] var_s = caliop_clavrx_exmpl.variables for pidx, param in enumerate(caliop_clavrx_params): v = var_s[param] attr_s = v.ncattrs() if '_FillValue' in attr_s: var = rootgrp.createVariable(param, v.dtype, ['num_caliop_profs'], fill_value=v.getncattr('_FillValue')) else: var = rootgrp.createVariable(param, v.dtype, ['num_caliop_profs']) # copy attributes from example to new output variable of the same name for attr in attr_s: if attr != '_FillValue': var.setncattr(attr, v.getncattr(attr)) nc4_vars_cc.append(var) # ----- AMV variables ---------------- amv_lon = rootgrp.createVariable('amv_longitude', 'f4', ['num_caliop_profs', 'max_num_amvs']) amv_lon.units = 'degrees east' amv_lat = rootgrp.createVariable('amv_latitude', 'f4', ['num_caliop_profs', 'max_num_amvs']) amv_lat.units = 'degrees north' amv_elem = rootgrp.createVariable('amv_elem', 'f4', ['num_caliop_profs', 'max_num_amvs']) amv_line = rootgrp.createVariable('amv_line', 'f4', ['num_caliop_profs', 'max_num_amvs']) amv_fidx = rootgrp.createVariable('amv_file_index', 'i4', ['num_caliop_profs']) dst_to_amvs = rootgrp.createVariable('distance_to_amvs', 'f4', ['num_caliop_profs', 'max_num_amvs']) nc4_vars = [] var_s = rg_exmpl.variables for pidx, param in enumerate(amv_params): v = var_s[param] attr_s = v.ncattrs() if '_FillValue' in attr_s: var = rootgrp.createVariable(param, v.dtype, ['num_caliop_profs', 'max_num_amvs'], fill_value=v.getncattr('_FillValue')) else: var = rootgrp.createVariable(param, v.dtype, ['num_caliop_profs', 'max_num_amvs']) # copy attributes from example to new output variable of the same name for attr in attr_s: if attr != '_FillValue': var.setncattr(attr, v.getncattr(attr)) nc4_vars.append(var) # Write data to file --------------------- prf_lon[:] = alons prf_lat[:] = alats prf_elem[:] = elems prf_line[:] = lines prf_time[:] = atimes amvs_cnt_var[:] = amvs_cnt amv_fidx[:] = fidxs amv_file_names[:] = amv_file_s idx = 0 for key in keys: tup_s = match_dct.get(key) for tup in tup_s: data_nd = tup[5] amvs_nd = tup[6] dist = tup[7] for pidx, param in enumerate(caliop_clavrx_params): nda = data_nd[pidx] nc4_vars_cc[pidx][idx] = nda nda = amvs_nd[0,] cnt = nda.shape[0] amv_lon[idx, 0:cnt] = nda nda = amvs_nd[1,] cnt = nda.shape[0] amv_lat[idx, 0:cnt] = nda nda = amvs_nd[2,] cnt = nda.shape[0] amv_elem[idx, 0:cnt] = nda nda = amvs_nd[3,] cnt = nda.shape[0] amv_line[idx, 0:cnt] = nda dst_to_amvs[idx, 0:cnt] = dist for pidx, param in enumerate(amv_params): nda = amvs_nd[pidx+4,] cnt = nda.shape[0] nc4_vars[pidx][idx, 0:cnt] = nda idx += 1 rg_exmpl.close() rootgrp.close() def run_caliop_clavrx_amv_match(output_path, path_to_caliop_clavrx, path_to_amvs, amv_source='OPS', band='14'): caliop_clavrx_params = get_parameters_caliop_clavrx() caliop_clavrx_ds = CLAVRx_CALIPSO(path_to_caliop_clavrx) amv_ds = get_datasource(path_to_amvs, amv_source, file_time_span=20, band=band) amv_params = amv_ds.get_parameters() amv_filenames = amv_ds.flist for file in caliop_clavrx_ds.flist: match_dict = match_calipso_clavrx_to_amvs(caliop_clavrx_ds, file, amv_ds) if match_dict is None: continue create_file(match_dict, output_path, file, caliop_clavrx_params, amv_params, amv_filenames) print('Done processing: ', file) def analyze(filename): h5f = h5py.File(filename, 'r') cc_ctt = h5f['closest_calipso_top_temperature'][:] cc_ctt = np.where(cc_ctt == -9999.0, np.nan, cc_ctt) cc_ctt += 273.0 cc_ctp = h5f['closest_calipso_top_pressure'][:] cc_ctp = np.where(cc_ctp == -9999.0, np.nan, cc_ctp) lats = h5f['caliop_latitude'][:] lons = h5f['caliop_longitude'][:] acha_ctp = h5f['cld_press_acha'][:] acha_ctp = np.where(acha_ctp == -999.0, np.nan, acha_ctp) amv_press = h5f['pressure'][:, 0:3] amv_press = np.where(amv_press == -999.0, np.nan, amv_press) amv_press_avg = np.nanmean(amv_press, axis=1) return cc_ctp, acha_ctp, amv_press_avg, lats