Skip to content
Snippets Groups Projects
caliop_clavrx_amv.py 12.50 KiB
import os

import h5py
import numpy as np
import xarray as xr
from netCDF4 import Dataset
from aeolus.datasource import get_datasource, CLAVRx_CALIPSO, get_parameters_caliop_clavrx
from util.util import haversine_np
from amv.intercompare import *

amv_file_duration = 60  # minutes
half_width = 30  # search box centered on CALIOP profile (FGF coordinates)
num_elems = 5424
num_lines = 5424

amv_fmwk_exmpl_file = '/ships19/cloud/scratch/4TH_AMV_INTERCOMPARISON/FMWK2_AMV/GOES16_ABI_2KM_FD_2019293_0020_34_WINDS_AMV_EN-14CT.nc'
amv_ops_exmpl_file = '/ships19/cloud/scratch/AMV_FAST_BIAS/OPS_AMVS/OR_ABI-L2-DMWF-M6C14_G16_s20201050000166_e20201050009474_c20201050023226.nc'
#caliop_clvrx_exmpl_file = '/data/Personal/rink/clavrx_calipso/g16_s20201050200_06kmCLay.matchup.calipso.h5'
#caliop_clvrx_exmpl_file = '/data/Personal/rink/4TH_AMV_INTERCOMPARISON/clavrx_calipso/g16_s20192930150_06kmCLay.matchup.calipso.h5'
caliop_clvrx_exmpl_file = '/data/Personal/rink/AMS_2022/caliop_clavrx/g16_s20192931130_06kmCLay.matchup.calipso.h5'


def get_search_box(cc, ll):

    c_rng = [cc - half_width, cc + half_width]
    l_rng = [ll - half_width, ll + half_width]

    if c_rng[0] < 0:
        c_rng[0] = 0

    if l_rng[0] < 0:
        l_rng[0] = 0

    if c_rng[1] >= num_elems:
        c_rng[1] = num_elems - 1

    if l_rng[1] >= num_lines:
        l_rng[1] = num_lines - 1

    return c_rng, l_rng


def match_calipso_clavrx_to_amvs(calipso_clavrx_ds, calipso_clavrx_file, amv_files):
    nav = amv_files.get_navigation()
    amv_params = amv_files.get_parameters()
    all_params = [amv_files.lon_name, amv_files.lat_name, amv_files.elem_name, amv_files.line_name] + amv_params
    match_dict = {}
    coords = {'num_params': all_params}
    dims = ['num_params', 'num_amvs']

    nom_time = calipso_clavrx_ds.get_datetime(calipso_clavrx_file).timestamp()
    calipso_clavrx_nc4 = Dataset(calipso_clavrx_file)

    amv_fname, ftime, f_idx = amv_files.get_file(nom_time)
    if f_idx is None:
        return None

    match_dict[nom_time] = []

    # amv_ds = Dataset(amv_fname)
    # amv_lons = amv_ds[amv_files.lon_name][:]
    # amv_lats = amv_ds[amv_files.lat_name][:]
    # if amv_files.elem_name is not None:
    #     amv_cc = amv_ds[amv_files.elem_name][:]
    #     amv_ll = amv_ds[amv_files.line_name][:]
    # else:
    #     amv_cc, amv_ll = nav.earth_to_lc_s(amv_lons, amv_lats)
    #
    # param_s = []
    # param_s.append(amv_lons)
    # param_s.append(amv_lats)
    # param_s.append(amv_cc)
    # param_s.append(amv_ll)
    # for param in amv_params:
    #     if param == 'V_3D':
    #         param_s.append(amv_ds[param][:, 0])
    #         param_s.append(amv_ds[param][:, 1])
    #     else:
    #         param_s.append(amv_ds[param][:])
    #
    # amv_ds.close()

    # --------------------------------------------------------------
    amvs_nd = get_amv_nd(amv_fname, delimiter=',')
    amvs_nd = filter_amvs(amvs_nd)
    amv_lons = amvs_nd[:, amv_lon_idx]
    amv_lats = amvs_nd[:, amv_lat_idx]
    amv_cc, amv_ll = nav.earth_to_lc_s(amv_lons, amv_lats)

    param_s = []
    param_s.append(amv_lons)
    param_s.append(amv_lats)
    param_s.append(amv_cc)
    param_s.append(amv_ll)

    param_s.append(amvs_nd[:, amv_pres_idx])
    param_s.append(amvs_nd[:, amv_spd_idx])
    param_s.append(amvs_nd[:, amv_dir_idx])

    # -----------------------------------------------------------------
    # exmpl_file = '/data/Personal/rink/4TH_AMV_INTERCOMPARISON/clavrx_calipso/g16_s20192930150_06kmCLay.matchup.calipso.h5'
    exmpl_file = '/data/Personal/rink/AMS_2022/caliop_clavrx/g16_s20192931130_06kmCLay.matchup.calipso.h5'

    calipso_clavrx_params = get_parameters_caliop_clavrx(filename=exmpl_file)
    coords_a = {'num_calipso_clavrx_params': calipso_clavrx_params}
    dims_a = ['num_calipso_clavrx_params']
    calipso_clavrx_data = []
    for pname in calipso_clavrx_params:
        calipso_clavrx_data.append(calipso_clavrx_nc4[pname][:])
    lat_s = calipso_clavrx_nc4['latitude'][:]
    lon_s = calipso_clavrx_nc4['longitude'][:]
    if calipso_clavrx_ds.variables.get('x') is None:
        xs, yx = nav.earth_to_lc_s(amv_lons, amv_lats)
    else:
        xs = calipso_clavrx_nc4['x'][:]
        ys = calipso_clavrx_nc4['y'][:]
    clvr_xy_s = zip(xs, ys)

    for idx, clvr_xy in enumerate(clvr_xy_s):
        lon = lon_s[idx]
        lat = lat_s[idx]
        cc, ll = clvr_xy

        c_rng, l_rng = get_search_box(cc, ll)

        in_cc = np.logical_and(amv_cc > c_rng[0], amv_cc < c_rng[1])
        in_ll = np.logical_and(amv_ll > l_rng[0], amv_ll < l_rng[1])
        in_box = np.logical_and(in_cc, in_ll)

        num_amvs = np.sum(in_box)
        if num_amvs == 0:
            continue
        dist = haversine_np(lon, lat, amv_lons[in_box], amv_lats[in_box])
        s_idxs = np.argsort(dist)
        param_nd = np.vstack(param_s)
        param_nd = param_nd[:, in_box]
        param_nd = param_nd[:, s_idxs]
        dist_to_amvs = dist[s_idxs]
        amvs_da = xr.DataArray(param_nd, coords=coords, dims=dims)

        data_nd = np.vstack(calipso_clavrx_data)
        data_da = xr.DataArray(data_nd[:, idx], coords=coords_a, dims=dims_a)

        match_dict[nom_time].append((cc, ll, lon, lat, f_idx, data_da, amvs_da, dist_to_amvs))

    return match_dict


def create_file(match_dct, output_path, target_filepath, caliop_clavrx_params, amv_params, amv_filenames):
    num_aprofs = 0
    max_num_amvs = 0
    alons = []
    alats = []
    atimes = []
    elems = []
    lines = []
    fidxs = []
    amvs_cnt = []

    filename = os.path.split(target_filepath)[1]
    w_o_ext, ext = os.path.splitext(filename)
    filename = w_o_ext+'.amvs'+'.nc'

    output_filepath = output_path+filename

    amv_file_s = np.array(amv_filenames, dtype='object')

    # scan to get max num levels, amvs
    keys = list(match_dct.keys())
    for key in keys:
        tup_s = match_dct.get(key)
        for tup in tup_s:
            num_aprofs += 1
            atimes.append(key)
            elems.append(tup[0])
            lines.append(tup[1])
            alons.append(tup[2])
            alats.append(tup[3])
            fidxs.append(tup[4])

            amvs = tup[6]
            num_amvs = amvs.shape[1]
            amvs_cnt.append(num_amvs)
            if num_amvs > max_num_amvs:
                max_num_amvs = num_amvs

    alons = np.array(alons)
    alats = np.array(alats)
    atimes = np.array(atimes)
    elems = np.array(elems)
    lines = np.array(lines)
    fidxs = np.array(fidxs)
    amvs_cnt = np.array(amvs_cnt)

    # Sample file to retrieve and copy variable attributes
    rg_exmpl = Dataset(amv_ops_exmpl_file)
    caliop_clavrx_exmpl = Dataset(caliop_clvrx_exmpl_file, 'r')

    # the top level group for the output file
    rootgrp = Dataset(output_filepath, 'w', format='NETCDF4')

    dim_amvs = rootgrp.createDimension('max_num_amvs', size=max_num_amvs)
    dim_num_aeolus_prof = rootgrp.createDimension('num_caliop_profs', size=num_aprofs)
    dim_num_files = rootgrp.createDimension('num_amv_files', size=amv_file_s.shape[0])

    prf_time = rootgrp.createVariable('time', 'f4', ['num_caliop_profs'])
    amv_file_names = rootgrp.createVariable('amv_file_names', str, ['num_amv_files'])

    # ---- CALIOP variables ---------------
    prf_lon = rootgrp.createVariable('caliop_longitude', 'f4', ['num_caliop_profs'])
    prf_lon.units = 'degrees east'
    prf_lat = rootgrp.createVariable('caliop_latitude', 'f4', ['num_caliop_profs'])
    prf_lat.units = 'degrees north'
    prf_time.units = 'seconds since 1970-01-1 00:00:00'
    prf_elem = rootgrp.createVariable('FD_elem', 'f4', ['num_caliop_profs'])
    prf_line = rootgrp.createVariable('FD_line', 'f4', ['num_caliop_profs'])

    amvs_cnt_var = rootgrp.createVariable('num_amvs_per_match', 'i4', ['num_caliop_profs'])

    # ----- CALIOP+CLAVRx variables ------
    nc4_vars_cc = []
    var_s = caliop_clavrx_exmpl.variables
    for pidx, param in enumerate(caliop_clavrx_params):
        v = var_s[param]
        attr_s = v.ncattrs()
        if '_FillValue' in attr_s:
            var = rootgrp.createVariable(param, v.dtype, ['num_caliop_profs'], fill_value=v.getncattr('_FillValue'))
        else:
            var = rootgrp.createVariable(param, v.dtype, ['num_caliop_profs'])
        # copy attributes from example to new output variable of the same name
        for attr in attr_s:
            if attr != '_FillValue':
                var.setncattr(attr, v.getncattr(attr))
        nc4_vars_cc.append(var)

    # ----- AMV variables ----------------
    amv_lon = rootgrp.createVariable('amv_longitude', 'f4', ['num_caliop_profs', 'max_num_amvs'])
    amv_lon.units = 'degrees east'
    amv_lat = rootgrp.createVariable('amv_latitude', 'f4', ['num_caliop_profs', 'max_num_amvs'])
    amv_lat.units = 'degrees north'
    amv_elem = rootgrp.createVariable('amv_elem', 'f4', ['num_caliop_profs', 'max_num_amvs'])
    amv_line = rootgrp.createVariable('amv_line', 'f4', ['num_caliop_profs', 'max_num_amvs'])
    amv_fidx = rootgrp.createVariable('amv_file_index', 'i4', ['num_caliop_profs'])
    dst_to_amvs = rootgrp.createVariable('distance_to_amvs', 'f4', ['num_caliop_profs', 'max_num_amvs'])

    nc4_vars = []
    var_s = rg_exmpl.variables
    for pidx, param in enumerate(amv_params):
        v = var_s[param]
        attr_s = v.ncattrs()
        if '_FillValue' in attr_s:
            var = rootgrp.createVariable(param, v.dtype, ['num_caliop_profs', 'max_num_amvs'], fill_value=v.getncattr('_FillValue'))
        else:
            var = rootgrp.createVariable(param, v.dtype, ['num_caliop_profs', 'max_num_amvs'])
        # copy attributes from example to new output variable of the same name
        for attr in attr_s:
            if attr != '_FillValue':
                var.setncattr(attr, v.getncattr(attr))
        nc4_vars.append(var)

    # Write data to file  ---------------------
    prf_lon[:] = alons
    prf_lat[:] = alats
    prf_elem[:] = elems
    prf_line[:] = lines
    prf_time[:] = atimes
    amvs_cnt_var[:] = amvs_cnt
    amv_fidx[:] = fidxs

    amv_file_names[:] = amv_file_s

    idx = 0
    for key in keys:
        tup_s = match_dct.get(key)
        for tup in tup_s:
            data_nd = tup[5]
            amvs_nd = tup[6]
            dist = tup[7]

            for pidx, param in enumerate(caliop_clavrx_params):
                nda = data_nd[pidx]
                nc4_vars_cc[pidx][idx] = nda

            nda = amvs_nd[0,]
            cnt = nda.shape[0]
            amv_lon[idx, 0:cnt] = nda

            nda = amvs_nd[1,]
            cnt = nda.shape[0]
            amv_lat[idx, 0:cnt] = nda

            nda = amvs_nd[2,]
            cnt = nda.shape[0]
            amv_elem[idx, 0:cnt] = nda

            nda = amvs_nd[3,]
            cnt = nda.shape[0]
            amv_line[idx, 0:cnt] = nda

            dst_to_amvs[idx, 0:cnt] = dist

            for pidx, param in enumerate(amv_params):
                nda = amvs_nd[pidx+4,]
                cnt = nda.shape[0]
                nc4_vars[pidx][idx, 0:cnt] = nda
            idx += 1

    rg_exmpl.close()
    rootgrp.close()


def run_caliop_clavrx_amv_match(output_path, path_to_caliop_clavrx, path_to_amvs, amv_source='OPS', band='14'):
    # exmpl_file = '/data/Personal/rink/4TH_AMV_INTERCOMPARISON/clavrx_calipso/g16_s20192930150_06kmCLay.matchup.calipso.h5'
    exmpl_file = '/data/Personal/rink/AMS_2022/caliop_clavrx/g16_s20192931130_06kmCLay.matchup.calipso.h5'
    caliop_clavrx_params = get_parameters_caliop_clavrx(filename=exmpl_file)

    caliop_clavrx_ds = CLAVRx_CALIPSO(path_to_caliop_clavrx)

    amv_ds = get_datasource(path_to_amvs, amv_source, file_time_span=20, band=band)
    amv_params = amv_ds.get_parameters()
    amv_filenames = amv_ds.flist

    for file in caliop_clavrx_ds.flist:
        match_dict = match_calipso_clavrx_to_amvs(caliop_clavrx_ds, file, amv_ds)
        if match_dict is None:
            continue

        create_file(match_dict, output_path, file, caliop_clavrx_params, amv_params, amv_filenames)
        print('Done processing: ', file)


def analyze(filename):
    h5f = h5py.File(filename, 'r')
    cc_ctt = h5f['closest_calipso_top_temperature'][:]
    cc_ctt = np.where(cc_ctt == -9999.0, np.nan, cc_ctt)
    cc_ctt += 273.0

    cc_ctp = h5f['closest_calipso_top_pressure'][:]
    cc_ctp = np.where(cc_ctp == -9999.0, np.nan, cc_ctp)

    lats = h5f['caliop_latitude'][:]
    lons = h5f['caliop_longitude'][:]

    acha_ctp = h5f['cld_press_acha'][:]
    acha_ctp = np.where(acha_ctp == -999.0, np.nan, acha_ctp)

    amv_press = h5f['pressure'][:, 0:3]
    amv_press = np.where(amv_press == -999.0, np.nan, amv_press)

    amv_press_avg = np.nanmean(amv_press, axis=1)

    return cc_ctp, acha_ctp, amv_press_avg, lats