Skip to content
Snippets Groups Projects
aeolus_amv.py 11.60 KiB
import datetime, os
from datetime import timezone
import glob
import numpy as np
from netCDF4 import Dataset, Dimension, Variable
from aeolus.geos_nav import GEOSNavigation
from util.util import haversine_np


amv_file_duration = 60  # minutes
half_width = 20  # search box centered on AEOLUS profile (FGF coordinates)
num_elems = 5424
num_lines = 5424


first_time = True
ftimes = []
flist = None


class MyGenericException(Exception):
    def __init__(self, message):
        self.message = message


def get_datetime(pathname):
    fname = os.path.split(pathname)[1]
    toks = fname.split('_')
    dstr = toks[4]
    tstr = toks[5]
    dtstr = dstr+tstr
    dto = datetime.datetime.strptime(dtstr, '%Y%j%H%M').replace(tzinfo=timezone.utc)

    return dto


def get_datetime_ops(pathname):
    fname = os.path.split(pathname)[1]
    toks = fname.split('_')
    dtstr = toks[3]
    dtstr = dtstr[:-3]
    dto = datetime.datetime.strptime(dtstr, 's%Y%j%H%M').replace(tzinfo=timezone.utc)

    return dto


def get_datetime_carr(pathname):
    fname = os.path.split(pathname)[1]
    toks = fname.split('_')
    dtstr = toks[3]
    dto = datetime.datetime.strptime(dtstr, '%Y%j.%H%M.ch').replace(tzinfo=timezone.utc)

    return dto


def get_file_containing_time(timestamp, files_path, file_time_span, amv_source='OPS', band='14'):
    global first_time, ftimes, flist

    if first_time is True:

        if amv_source == 'OPS':
            flist = glob.glob(files_path + 'OR_ABI-L2-DMWF*'+'C'+band+'*.nc')
            dto_func = get_datetime_ops
        elif amv_source == 'SSEC':
            flist = glob.glob(files_path + '*WINDS_AMV_EN-'+band+'*.nc')
            dto_func = get_datetime
        elif amv_source == 'CARR':
            flist = glob.glob(files_path + 'tdw_qc_GOES*'+'ch_'+band+'.nc')
            dto_func = get_datetime_carr

        for pname in flist:  # TODO: make better with regular expressions (someday)
            dto = dto_func(pname)
            dto_start = dto
            dto_end = dto + datetime.timedelta(minutes=file_time_span)
            ftimes.append((dto_start.timestamp(), dto_end.timestamp()))

        first_time = False

    k = -1
    for i in range(len(ftimes)):
        if (timestamp >= ftimes[i][0]) and (timestamp < ftimes[i][1]):
            k = i
            break
    if k < 0:
        return None, None, None

    return flist[k], ftimes[k], k


# imports the S4 NOAA output
# filename: full path as a string, '/home/user/filename'
# returns a dict: time -> list of profiles (a profile is a list of levels)
def get_aeolus_time_dict(filename, lon360=False, do_sort=True):
    time_dict = {}

    with open(filename) as file:
        while True:
            prof_hdr = file.readline()
            if not prof_hdr:
                break
            toks = prof_hdr.split()

            yr = int(float(toks[0]))
            mon = int(float(toks[1]))
            dy = int(float(toks[2]))
            hr = int(float(toks[3]))
            mn = int(float(toks[4]))
            ss = int(float(toks[5]))
            lon = float(toks[6])
            lat = float(toks[7])
            nlevs = int(toks[8])

            if lon360:
                if lon < 0:
                    lon += 360.0
            else:
                if lon > 180.0:
                    lon -= 360.0

            dto = datetime.datetime(year=yr, month=mon, day=dy, hour=hr, minute=mn, second=ss)
            dto = dto.replace(tzinfo=timezone.utc)
            timestamp = dto.timestamp()

            prof = []
            if time_dict.get(timestamp, -1) == -1:
                prof_s = []
                prof_s.append(prof)
                time_dict[timestamp] = prof_s
            else:
                prof_s = time_dict.get(timestamp)
                prof_s.append(prof)

            for k in range(nlevs):
                line = file.readline()
                toks = line.split()
                lvlidx = int(toks[0])
                hhh = float(toks[1]) * 1000.0
                hht = float(toks[2]) * 1000.0
                hhb = float(toks[3]) * 1000.0
                err = float(toks[4])
                azm = float(toks[5])
                ws = float(toks[6])
                len = float(toks[7])

                tup = (lat, lon, hhh, hht, hhb, azm, ws)

                prof.append(tup)

        file.close()

    if do_sort:
        keys = np.array(list(time_dict.keys()))
        keys.sort()
        keys = keys.tolist()

        sorted_time_dict = {}

        for key in keys:
            sorted_time_dict[key] = time_dict.get(key)
        time_dict = sorted_time_dict

    return time_dict


# make each profile at a timestamp a numpy array
def time_dict_to_nd(time_dict):
    keys = list(time_dict.keys())
    for key in keys:
        vals = time_dict[key]
        if vals is not None:
            for i in range(len(vals)):
                nda = np.array(vals[i])
                vals[i] = nda

    return time_dict


def concat(t_dct_0, t_dct_1):
    keys_0 = list(t_dct_0.keys())
    nda_0 = np.array(keys_0)

    keys_1 = list(t_dct_1.keys())
    nda_1 = np.array(keys_1)

    comm_keys, comm0, comm1 = np.intersect1d(nda_0, nda_1, return_indices=True)

    comm_keys = comm_keys.tolist()

    for key in comm_keys:
        t_dct_1.pop(key)
    t_dct_0.update(t_dct_1)

    return t_dct_0


def get_aeolus_time_dict_s(filenames, lon360=False, do_sort=True):
    dct_s = []
    for fname in filenames:
        a_dct = get_aeolus_time_dict(fname, lon360=lon360, do_sort=do_sort)
        t_dct = time_dict_to_nd(a_dct)
        dct_s.append(t_dct)

    t_dct = dct_s[0]

    for dct in dct_s[1:]:
        concat(t_dct, dct)

    return t_dct


def run_amv_aeolus_best_fit(match_dict):
    return None


def get_search_box(nav, lon, lat):
    cc, ll = nav.earth_to_lc(lon, lat)
    if cc is None:
        return None, None

    c_rng = [cc - half_width, cc + half_width]
    l_rng = [ll - half_width, ll + half_width]

    if c_rng[0] < 0:
        c_rng[0] = 0

    if l_rng[0] < 0:
        l_rng[0] = 0

    if c_rng[1] >= num_elems:
        c_rng[1] = num_elems - 1

    if l_rng[1] >= num_lines:
        l_rng[1] = num_lines - 1

    return c_rng, l_rng


# Framework
amv_dir_name = 'Wind_Dir'
amv_spd_name = 'Wind_Speed'
amv_lon_name = 'Longitude'
amv_lat_name = 'Latitude'
amv_elem_name = 'Element'
amv_line_name = 'Line'
amv_press_name = 'MedianPress'
# -------------------------------
# Carr stereo
amv_lon_name = 'Lon'
amv_lat_name = 'Lat'
amv_press_name = 'pres'
# -------------------------------
sub_lon = -137.0  # GOES-17
#sub_lon = -75.0  # GOES-16


# aeolus_dict: time -> profiles
# amv_files_path: directory containing AMVs, '/home/user/amvdir/'
# return dict: aeolus time -> tuple (amv_lon, amv_lat, amv_pres, amv_spd, amv_dir)
def match_amvs_to_aeolus(aeolus_dict, amv_files_path, amv_source='OPS', band='14'):
    nav = GEOSNavigation(sub_lon=sub_lon)
    match_dict = {}

    keys = list(aeolus_dict.keys())

    last_f_idx = -1
    for key in keys:
        fname, ftime, f_idx = get_file_containing_time(key, amv_files_path, amv_file_duration, amv_source, band)
        if f_idx is None:
            continue
        profs = aeolus_dict.get(key)
        layers = profs[0]
        if layers is None:
            continue

        lat = layers[0, 0]
        lon = layers[0, 1]

        c_rng, l_rng = get_search_box(nav, lon, lat)
        if c_rng is None:
            continue

        if f_idx != last_f_idx:
            last_f_idx = f_idx
            ds = Dataset(fname)
            amv_lons = ds[amv_lon_name][:]
            amv_lats = ds[amv_lat_name][:]
            amv_spd = ds[amv_spd_name][:]
            amv_dir = ds[amv_dir_name][:]
            amv_pres = ds[amv_press_name][:]
            cc = ds[amv_elem_name][:]
            ll = ds[amv_line_name][:]
            # cc, ll = nav.earth_to_lc_s(amv_lons, amv_lats)
            ds.close()

        in_cc = np.logical_and(cc > c_rng[0], cc < c_rng[1])
        in_ll = np.logical_and(ll > l_rng[0], ll < l_rng[1])
        in_box = np.logical_and(in_cc, in_ll)

        num_amvs = np.sum(in_box)
        if num_amvs == 0:
            continue
        dist = haversine_np(lon, lat, amv_lons[in_box], amv_lats[in_box])
        match_dict[key] = (amv_lons[in_box], amv_lats[in_box], amv_pres[in_box], amv_spd[in_box], amv_dir[in_box], dist)

    return match_dict


# full path as string filename to create, '/home/user/newfilename'
# aeolus_to_amv_dct: output from match_amvs_to_aeolus
# aeolus_dct: output from get_aeolus_time_dict
def create_file(filename, aeolus_to_amv_dct, aeolus_dct):
    keys = list(aeolus_to_amv_dct.keys())

    num_amvs = []
    num_levs = []
    times = []

    namvs = 0
    nlevs = 0
    for key in keys:
        lons, lats, pres, spd, dir, dist = aeolus_to_amv_dct.get(key)
        num_amvs.append(len(lons))
        namvs += len(lons)

        prof_s = aeolus_dct.get(key)
        prof = prof_s[0]
        num_levs.append(prof.shape[0])
        nlevs += prof.shape[0]

        times.append(key)

    amv_per_alus = len(aeolus_to_amv_dct)
    rootgrp = Dataset(filename, 'w', format='NETCDF4')
    dim_amvs = rootgrp.createDimension('amvs', size=namvs)
    dim_alus = rootgrp.createDimension('profs', size=nlevs)
    dim_num_aeolus_prof = rootgrp.createDimension('num_aeolus_profs', size=len(aeolus_to_amv_dct))

    amv_lon = rootgrp.createVariable('amv_longitude', 'f4', ['amvs'])
    amv_lon.units = 'degrees east'
    amv_lat = rootgrp.createVariable('amv_latitude', 'f4', ['amvs'])
    amv_lat.units = 'degrees north'
    amv_spd = rootgrp.createVariable('amv_spd', 'f4', ['amvs'])
    amv_spd.units = 'm s-1'
    amv_dir = rootgrp.createVariable('amv_dir', 'f4', ['amvs'])
    amv_dir.units = 'degree'
    amv_pres = rootgrp.createVariable('amv_pres', 'f4', ['amvs'])
    amv_pres.units = 'hPa'
    amv_dist = rootgrp.createVariable('amv_dist', 'f4', ['amvs'])
    amv_dist.units = 'km'

    num_amvs_per_prof = rootgrp.createVariable('num_amvs_per_prof', 'i4', ['num_aeolus_profs'])
    num_levs_per_prof = rootgrp.createVariable('num_levs_per_prof', 'i4', ['num_aeolus_profs'])
    prof_time = rootgrp.createVariable('time', 'f4', ['num_aeolus_profs'])
    prof_time.units = 'seconds since 1970-01-1 00:00:00'

    prf_azm = rootgrp.createVariable('prof_azm', 'f4', ['profs'])
    prf_azm.units = 'degree'
    prf_spd = rootgrp.createVariable('prof_spd', 'f4', ['profs'])
    prf_spd.units = 'm s-1'
    prf_hht = rootgrp.createVariable('prof_hht', 'f4', ['profs'])
    prf_hht.units = 'meter'
    prf_hhb = rootgrp.createVariable('prof_hhb', 'f4', ['profs'])
    prf_hhb.units = 'meter'

    i_a = 0
    i_c = 0
    for idx, key in enumerate(keys):
        namvs = num_amvs[idx]
        nlevs = num_levs[idx]
        i_b = i_a + namvs
        i_d = i_c + nlevs

        lons, lats, pres, spd, dir, _ = aeolus_to_amv_dct.get(key)
        amv_lon[i_a:i_b] = lons[:]
        amv_lat[i_a:i_b] = lats[:]
        amv_spd[i_a:i_b] = spd[:]
        amv_dir[i_a:i_b] = dir[:]
        amv_pres[i_a:i_b] = pres[:]
        amv_dist[i_a:i_b] = dist[:]
        i_a += namvs

        prof_s = aeolus_dct.get(key)
        prof = prof_s[0]
        prf_hht[i_c:i_d] = prof[:, 2]
        prf_hhb[i_c:i_d] = prof[:, 3]
        prf_azm[i_c:i_d] = prof[:, 5]
        prf_spd[i_c:i_d] = prof[:, 6]
        i_c += nlevs

    num_amvs_per_prof[:] = num_amvs
    num_levs_per_prof[:] = num_levs
    prof_time[:] = times

    rootgrp.close()


# aeolus_file: S4 NOAA txt output
# amv_files_dir: G16/17 AMV product file
# outfile: pathname for the Netcdf match file
def run(aeolus_file, amv_files_dir, outfile=None, amv_source='OPS', band='14'):
    a_d = get_aeolus_time_dict(aeolus_file)
    a_d = time_dict_to_nd(a_d)
    m_d = match_amvs_to_aeolus(a_d, amv_files_dir, amv_source, band)

    if outfile is not None:
        create_file(outfile, m_d, a_d)