Skip to content
Snippets Groups Projects
datasource.py 20.04 KiB
import datetime, os
from datetime import timezone
import glob
import numpy as np
import re
import pickle
from netCDF4 import Dataset
from pathlib import Path

from util.util import GenericException
from util.lon_lat_grid import LonLatGrid
from util.geos_nav import GEOSNavigation
from util.setup import ancillary_path


# def get_parameters_clavrx(filename=homedir+'data/clavrx/clavrx_OR_ABI-L1b-RadF-M6C01_G16_s20192930000343.level2.nc'):
def get_parameters_clavrx(filename=ancillary_path+'clavrx_parameters.pkl'):
    # rg = Dataset(filename, 'r')
    # var_s = rg.variables
    # var_names = list(var_s.keys())
    # var_names_2d = []
    #
    # for str in var_names:
    #     v = var_s[str]
    #     if len(v.shape) == 2:
    #         if not (str.find('latitude') != -1 or str.find('longitude') != -1):
    #             var_names_2d.append(str)
    #
    # rg.close()

    var_names_2d = pickle.load(open(filename, 'rb'))

    return var_names_2d


caliop_clavrx_params = None


def get_parameters_caliop_clavrx(filename='/data/Personal/rink/clavrx_calipso/g16_s20201050200_06kmCLay.matchup.calipso.h5'):
    global caliop_clavrx_params
    if caliop_clavrx_params is not None:
        return caliop_clavrx_params
    rg = Dataset(filename, 'r')
    var_s = rg.variables
    var_names = list(var_s.keys())
    var_names_keep = []

    for str in var_names:
        v = var_s[str]
        if len(v.shape) == 1:
            if not (str.find('latitude') != -1 or str.find('longitude') != -1):
                var_names_keep.append(str)

    rg.close()

    caliop_clavrx_params = var_names_keep
    return var_names_keep


def get_parameters_fmwk_amvs(filename='/ships19/cloud/scratch/4TH_AMV_INTERCOMPARISON/FMWK2_AMV/GOES16_ABI_2KM_FD_2019293_0020_34_WINDS_AMV_EN-14CT.nc'):
    rg = Dataset(filename, 'r')
    var_s = rg.variables
    var_names = list(var_s.keys())
    var_names_to_keep = []

    lon = var_s['Longitude']
    num_amvs = lon.shape[0]

    for str in var_names:
        v = var_s[str]
        if len(v.shape) == 1 and v.shape[0] == num_amvs:
            var_names_to_keep.append(str)

    rg.close()

    return var_names_to_keep


class Files:
    def __init__(self, files_path, file_time_span, pattern):
        if os.path.isdir(files_path):
            self.flist = []
            for path in Path(files_path).rglob(pattern):
                self.flist.append(path)
        else:
            self.flist = glob.glob(files_path + pattern)
        if len(self.flist) == 0:
            raise GenericException('no matching files found in: ' + files_path + pattern)

        self.ftimes = []
        self.span_seconds = datetime.timedelta(minutes=file_time_span).seconds

        for pname in self.flist:
            dto = self.get_datetime(pname)
            dto_start = dto
            dto_end = dto + datetime.timedelta(minutes=file_time_span)
            self.ftimes.append((dto_start.timestamp(), dto_end.timestamp()))

        self.ftimes = np.array(self.ftimes)
        self.flist = np.array(self.flist)
        sidxs = np.argsort(self.ftimes[:, 0])
        self.ftimes = self.ftimes[sidxs, :]
        self.flist = self.flist[sidxs]

    def get_datetime(self, pathname):
        pass

    def get_file_containing_time(self, timestamp):
        k = -1
        for i in range(self.ftimes.shape[0]):
            if (timestamp >= self.ftimes[i, 0]) and (timestamp < self.ftimes[i, 1]):
                k = i
                break
        if k < 0:
            return None, None, None

        return self.flist[k], self.ftimes[k, 0], k

    def get_file(self, timestamp, window=None):
        if window is None:
            window = self.span_seconds
        else:
            window = datetime.timedelta(minutes=window).seconds
        diff = self.ftimes[:, 0] - timestamp
        midx = np.argmin(np.abs(diff))
        if np.abs(self.ftimes[midx, 0] - timestamp) < window:
            return self.flist[midx], self.ftimes[midx, 0], midx
        else:
            return None, None, None

    def get_parameters(self):
        pass


class GOESL1B(Files):
    def __init__(self, files_path, band='14'):
        super().__init__(files_path, 10, 'OR_ABI-L1b-RadC-M*C'+band+'*.nc')

    def get_datetime(self, pathname):
        filename = os.path.split(pathname)[1]
        so = re.search('_s\\d{11}', filename)
        dt_str = so.group()
        dto = datetime.datetime.strptime(dt_str, '_s%Y%j%H%M').replace(tzinfo=timezone.utc)
        return dto


# GOES-16, CONUS. TODO: Generalize to G-16 and FD, MESO
class CLAVRx(Files):
    def __init__(self, files_path):
        super().__init__(files_path, 10, 'clavrx_OR_ABI-L1b*.level2.nc')
        self.params = get_parameters_clavrx()

    def get_datetime(self, pathname):
        filename = os.path.split(pathname)[1]
        so = re.search('_s\\d{11}', filename)
        dt_str = so.group()
        dto = datetime.datetime.strptime(dt_str, '_s%Y%j%H%M').replace(tzinfo=timezone.utc)
        return dto

    def get_parameters(self):
        return self.params

    def get_navigation(self, h5f):
        return GEOSNavigation(sub_lon=-75.0, CFAC=5.6E-05, COFF=-0.101332, LFAC=-5.6E-05, LOFF=0.128212, num_elems=2500, num_lines=1500)


# filename must begin with 'clavrx_H08_YYYYMMDD_HHMM'
class CLAVRx_H08(Files):
    def __init__(self, files_path):
        super().__init__(files_path, 10, 'clavrx_H08*.level2.nc')
        self.params = get_parameters_clavrx()

    def get_datetime(self, pathname):
        filename = os.path.split(pathname)[1]
        dt_str = filename[11:24]
        dto = datetime.datetime.strptime(dt_str, '%Y%m%d_%H%M').replace(tzinfo=timezone.utc)
        return dto

    def get_parameters(self):
        return self.params

    def get_navigation(self, h5f):
        return None


class CLAVRx_CALIPSO(Files):
    def __init__(self, files_path):
        super().__init__(files_path, 10, '*_06kmCLay.matchup.calipso.h5')
        self.params = ['cld_opd_acha', 'cld_reff_acha', 'cld_temp_acha', 'cld_press_acha', 'cld_height_acha']
        # self.params = get_parameters_clavrx()

    def get_datetime(self, pathname):
        filename = os.path.split(pathname)[1]
        dto = datetime.datetime.strptime(filename, 'g16_s%Y%j%H%M_06kmCLay.matchup.calipso.h5').replace(tzinfo=timezone.utc)
        return dto

    def get_parameters(self):
        return self.params

    def get_navigation(self, h5f):
        return None


class CLAVRx_VIIRS(Files):
    def __init__(self, files_path):
        super().__init__(files_path, 5, 'clavrx*viirs*level2.??')
        self.params = get_parameters_clavrx()

    def get_datetime(self, pathname):
        filename = os.path.split(pathname)[1]
        toks = filename.split('.')
        # so = re.search('\\d{11}', toks[4])
        # dt_str = so.group()
        dt_str = toks[1] + toks[2]
        dto = datetime.datetime.strptime(dt_str, 'A%Y%j%H%M').replace(tzinfo=timezone.utc)
        return dto

    def get_parameters(self):
        return self.params

    def get_navigation(self, h5f):
        lons = h5f['longitude'][:, :]
        lats = h5f['latitude'][:, :]
        lons = np.where(lons < 0, lons + 360, lons)
        ll_grid = LonLatGrid(lons, lats, reduce=2)
        return ll_grid


class RAOBfiles(Files):
    def __init__(self, files_path, file_time_span=10):
        super().__init__(files_path, file_time_span, 'raob_soundings*.cdf')

    def get_datetime(self, pathname):
        filename = os.path.split(pathname)[1]
        dt_str = (((filename.split('raob_soundings'))[1]).split('.'))[0]
        dto = datetime.datetime.strptime(dt_str, '%Y%m%d_%H%M').replace(tzinfo=timezone.utc)
        return dto


class GFSfiles(Files):
    def __init__(self, files_path):
        super().__init__(files_path, 10, 'gfs*.h5')

    def get_datetime(self, pathname):
        filename = os.path.split(pathname)[1]
        dto = datetime.datetime.strptime(filename, 'gfs.%y%m%d%H_F012.h5').replace(tzinfo=timezone.utc)
        dto += datetime.timedelta(hours=12)
        return dto


class AMVFiles:

    def __init__(self, files_path, file_time_span, pattern, band='14', elem_name=None, line_name=None, lat_name=None,
                 lon_name=None, press_name=None, params=None, out_params=None, meta_dict=None, qc_params=None):

        self.flist = glob.glob(files_path + pattern)
        if len(self.flist) == 0:
            raise GenericException('no matching files found in: ' + files_path)

        self.band = band
        self.ftimes = []
        self.span_seconds = datetime.timedelta(minutes=file_time_span).seconds

        for pname in self.flist:
            dto = self.get_datetime(pname)
            dto_start = dto
            dto_end = dto + datetime.timedelta(minutes=file_time_span)
            self.ftimes.append((dto_start.timestamp(), dto_end.timestamp()))

        self.ftimes = np.array(self.ftimes)
        self.flist = np.array(self.flist)
        sidxs = np.argsort(self.ftimes[:, 0])
        self.ftimes = self.ftimes[sidxs, :]
        self.flist = self.flist[sidxs]

        self.elem_name = elem_name
        self.line_name = line_name
        self.lon_name = lon_name
        self.lat_name = lat_name
        self.press_name = press_name
        self.params = params
        self.out_params = params
        if out_params is not None:
            self.out_params = out_params
        self.meta_dict = meta_dict
        self.qc_params = qc_params

    def get_datetime(self, pathname):
        pass

    def get_navigation(self):
        pass

    def get_file_containing_time(self, timestamp):
        k = -1
        for i in range(self.ftimes.shape[0]):
            if (timestamp >= self.ftimes[i, 0]) and (timestamp < self.ftimes[i, 1]):
                k = i
                break
        if k < 0:
            return None, None, None

        return self.flist[k], self.ftimes[k, 0], k

    def get_file(self, timestamp, window=None):
        if window is None:
            window = self.span_seconds
        else:
            window = datetime.timedelta(minutes=window).seconds
        diff = self.ftimes[:, 0] - timestamp
        midx = np.argmin(np.abs(diff))
        if np.abs(self.ftimes[midx, 0] - timestamp) < window:
            return self.flist[midx], self.ftimes[midx, 0], midx
        else:
            return None, None, None

    def get_parameters(self):
        return self.params

    def get_out_parameters(self):
        return self.out_params

    def get_meta_dict(self):
        return self.meta_dict

    def get_qc_params(self):
        return self.qc_params

    def filter(self, qc_param):
        pass


class Framework(AMVFiles):
    def __init__(self, files_path, file_time_span, band='14'):
        elem_name = 'Element'
        line_name = 'Line'
        lon_name = 'Longitude'
        lat_name = 'Latitude'

        params = get_parameters_fmwk_amvs()
        params.remove(elem_name)
        params.remove(line_name)
        params.remove(lon_name)
        params.remove(lat_name)
        params.remove('MedianPress')
        params.remove('Wind_Speed')
        params.remove('Wind_Dir')

        params = ['MedianPress', 'Wind_Speed', 'Wind_Dir'] + params

        out_params = ['Lon', 'Lat', 'Element', 'Line', 'pressure', 'wind_speed', 'wind_direction']
        meta_dict = {'Lon': ('degrees east', 'f4'), 'Lat': ('degrees north', 'f4'), 'Element': (None, 'i4'),
                     'Line': (None, 'i4'), 'pressure': ('hPa', 'f4'), 'wind_speed': ('m s-1', 'f4'),
                     'wind_direction': ('degrees', 'f4')}
        #qc_param = 'QI'
        qc_param = 'Flag'

        super().__init__(files_path, file_time_span, '*WINDS_AMV_EN-'+band+'*.nc', band=band, elem_name=elem_name, params=params,
                         line_name=line_name, lat_name=lat_name, lon_name=lon_name, out_params=out_params, meta_dict=meta_dict, qc_params=qc_param)

    def get_navigation(self):
        return GEOSNavigation(sub_lon=-75.0)

    def get_datetime(self, pathname):
        fname = os.path.split(pathname)[1]
        toks = fname.split('_')
        dstr = toks[4]
        tstr = toks[5]
        dtstr = dstr + tstr
        dto = datetime.datetime.strptime(dtstr, '%Y%j%H%M').replace(tzinfo=timezone.utc)

        return dto

    def filter(self, qc_param):
        # good = qc_param > 50
        good = qc_param == 0
        return good


class FrameworkCloudHeight(AMVFiles):
    def __init__(self, files_path, file_time_span):
        elem_name = 'Element'
        line_name = 'Line'
        lon_name = 'Longitude'
        lat_name = 'Latitude'

        out_params = ['CldTopPres', 'CldTopHght', 'CldOptDpth']
        params = ['CldTopPres', 'CldTopHght', 'CldOptDpth']
        meta_dict = {'CldTopPres': ('hPa', 'f4'), 'CldTopHght': ('km', 'f4'), 'CldOptDpth': ('km', 'f4')}

        super().__init__(files_path, file_time_span, '*_CLOUD_HEIGHT_EN'+'*.nc', band=None, elem_name=elem_name, params=params,
                         line_name=line_name, lat_name=lat_name, lon_name=lon_name, out_params=out_params, meta_dict=meta_dict)

    def get_navigation(self):
        return GEOSNavigation(sub_lon=-75.0)

    def get_datetime(self, pathname):
        fname = os.path.split(pathname)[1]
        toks = fname.split('_')
        dstr = toks[4]
        tstr = toks[5]
        dtstr = dstr + tstr
        dto = datetime.datetime.strptime(dtstr, '%Y%j%H%M').replace(tzinfo=timezone.utc)

        return dto


class FrameworkCloudPhase(AMVFiles):
    def __init__(self, files_path, file_time_span):
        elem_name = 'Element'
        line_name = 'Line'
        lon_name = 'Longitude'
        lat_name = 'Latitude'

        out_params = ['CloudPhase', 'CloudType']
        params = ['CloudPhase', 'CloudType']
        meta_dict = {'CloudPhase': (None, 'i1'), 'CloudType': (None, 'i1')}

        super().__init__(files_path, file_time_span, '*_CLOUD_PHASE_EN'+'*.nc', band=None, elem_name=elem_name, params=params,
                         line_name=line_name, lat_name=lat_name, lon_name=lon_name, out_params=out_params, meta_dict=meta_dict)

    def get_navigation(self):
        return GEOSNavigation(sub_lon=-75.0)

    def get_datetime(self, pathname):
        fname = os.path.split(pathname)[1]
        toks = fname.split('_')
        dstr = toks[4]
        tstr = toks[5]
        dtstr = dstr + tstr
        dto = datetime.datetime.strptime(dtstr, '%Y%j%H%M').replace(tzinfo=timezone.utc)

        return dto


class OpsCloudPhase(AMVFiles):
    def __init__(self, files_path, file_time_span):
        elem_name = None
        line_name = None
        lon_name = None
        lat_name = None

        out_params = ['Phase']
        params = ['Phase']
        meta_dict = {'Phase': (None, 'i1')}

        super().__init__(files_path, file_time_span, 'OR_ABI-L2-ACTPF'+'*.nc', band=None, elem_name=elem_name, params=params,
                         line_name=line_name, lat_name=lat_name, lon_name=lon_name, out_params=out_params, meta_dict=meta_dict)

    def get_navigation(self):
        return GEOSNavigation(sub_lon=-75.0)

    def get_datetime(self, pathname):
        fname = os.path.split(pathname)[1]
        toks = fname.split('_')
        dtstr = toks[3]
        dtstr = dtstr[:-3]
        dto = datetime.datetime.strptime(dtstr, 's%Y%j%H%M').replace(tzinfo=timezone.utc)
        return dto


class OPS(AMVFiles):
    def __init__(self, files_path, file_time_span, band='14'):
        elem_name = None
        line_name = None
        lon_name = 'lon'
        lat_name = 'lat'

        params = ['pressure', 'wind_speed', 'wind_direction', 'local_zenith_angle', 'DQF']
        out_params = ['Lon', 'Lat', 'Element', 'Line'] + params
        meta_dict = {'Lon': ('degrees east', 'f4'), 'Lat': ('degrees north', 'f4'), 'Element': (None, 'i4'), 'Line': (None, 'i4'),
                          'pressure': ('hPa', 'f4'), 'wind_speed': ('m s-1', 'f4'), 'wind_direction': ('degrees', 'f4')}

        super().__init__(files_path, file_time_span, 'OR_ABI-L2-DMWF*'+'C'+band+'*.nc', band=band, elem_name=elem_name, params=params,
                         line_name=line_name, lat_name=lat_name, lon_name=lon_name, out_params=out_params, meta_dict=meta_dict)

    def get_navigation(self):
        # return GEOSNavigation(sub_lon=-75.2) ?
        return GEOSNavigation(sub_lon=-75.0)

    def get_datetime(self, pathname):
        fname = os.path.split(pathname)[1]
        toks = fname.split('_')
        dtstr = toks[3]
        dtstr = dtstr[:-3]
        dto = datetime.datetime.strptime(dtstr, 's%Y%j%H%M').replace(tzinfo=timezone.utc)

        return dto


class NOAA_txt(AMVFiles):
    def __init__(self, files_path, file_time_span, band='14'):
        elem_name = None
        line_name = None
        lon_name = 'lon'
        lat_name = 'lat'
        press_name = 'PW'

        # params = ['pressure', 'wind_speed', 'wind_direction']
        params = ['BOX','SRCH','SPD','DIR','PW','LLCM','SPDG','DIRG','TBALB','MAXC','TRKM','PERR','HAMD','QINF','QIWF','QIC']
        out_params = ['Lon', 'Lat', 'Element', 'Line'] + params
        meta_dict = {'Lon': ('degrees east', 'f4'), 'Lat': ('degrees north', 'f4'), 'Element': (None, 'i4'), 'Line': (None, 'i4'),
                          'pressure': ('hPa', 'f4'), 'wind_speed': ('m s-1', 'f4'), 'wind_direction': ('degrees', 'f4')}

        super().__init__(files_path, file_time_span, 'ASCII_AMV*.CT', band=band, elem_name=elem_name, params=params,
                         line_name=line_name, lat_name=lat_name, lon_name=lon_name, press_name=press_name, out_params=out_params, meta_dict=meta_dict)

    def get_navigation(self):
        return GEOSNavigation(sub_lon=-75.0)

    def get_datetime(self, pathname):
        fname = os.path.split(pathname)[1]
        toks = fname.split('.')
        dtstr = toks[2] + '.' + toks[3]
        dto = datetime.datetime.strptime(dtstr, '%Y%j.%H%M').replace(tzinfo=timezone.utc)

        return dto


class CarrStereo(AMVFiles):
    def __init__(self, files_path, file_time_span, band='14'):
        elem_name = 'Element'
        line_name = 'Line'
        lon_name = 'Lon'
        lat_name = 'Lat'

        out_params = ['Lon', 'Lat', 'Element', 'Line', 'V_3D_u', 'V_3D_v', 'H_3D', 'pres', 'Fcst_Spd', 'Fcst_Dir', 'SatZen',
                           'InversionFlag', 'CloudPhase', 'CloudType']

        params = ['V_3D', 'H_3D', 'pres', 'Fcst_Spd', 'Fcst_Dir', 'SatZen',
                       'InversionFlag', 'CloudPhase', 'CloudType']

        meta_dict = {'H_3D': ('m', 'f4'), 'pres': ('hPa', 'f4'), 'Fcst_Spd': ('m s-1', 'f4'),
                          'Fcst_Dir': ('degree', 'f4'),
                          'SatZen': ('degree', 'f4'), 'InversionFlag': (None, 'u1'),
                          'CloudPhase': (None, 'u1'), 'CloudType': (None, 'u1'),
                          'V_3D_u': ('m s-1', 'f4'), 'V_3D_v': ('m s-1', 'f4'), 'Lon': ('degrees east', 'f4'),
                          'Lat': ('degrees north', 'f4'), 'Element': (None, 'i4'), 'Line': (None, 'i4')}

        super().__init__(files_path, file_time_span, 'tdw_qc_GOES*'+'ch_'+band+'.nc', band=band, elem_name=elem_name, params=params,
                         line_name=line_name, lat_name=lat_name, lon_name=lon_name, out_params=out_params, meta_dict=meta_dict)

    def get_navigation(self):
        return GEOSNavigation(sub_lon=-137.0)

    def get_datetime(self, pathname):
        fname = os.path.split(pathname)[1]
        toks = fname.split('_')
        dtstr = toks[3]
        dto = datetime.datetime.strptime(dtstr, '%Y%j.%H%M.ch').replace(tzinfo=timezone.utc)

        return dto


def get_datasource(files_path, source, file_time_span=10, band='14'):
    if source == 'OPS':
        return OPS(files_path, file_time_span, band=band)
    elif source == 'NOAA_TXT':
        return NOAA_txt(files_path, file_time_span, band=band)
    elif source == 'FMWK':
        return Framework(files_path, file_time_span, band=band)
    elif source == 'CARR':
        return CarrStereo(files_path, file_time_span, band=band)
    elif source == 'FMWK_CLD_HGT':
        return FrameworkCloudHeight(files_path, file_time_span)
    elif source == 'FMWK_CLD_PHASE':
        return FrameworkCloudPhase(files_path, file_time_span)
    elif source == 'OPS_CLD_PHASE':
        return OpsCloudPhase(files_path, file_time_span)
    elif source == 'GFS':
        return GFSfiles(files_path)
    elif source == 'RAOB':
        return RAOBfiles(files_path, file_time_span)
    else:
        raise GenericException('Unknown data source type')