Skip to content
Snippets Groups Projects
Select Git revision
  • 052e6d063e9d0d8210e74190d3247faaa56bb9e3
  • master default protected
  • use_flight_altitude
  • distribute
4 results

viirs_l1b_l2.py

Blame
  • user avatar
    tomrink authored
    82070dd5
    History
    viirs_l1b_l2.py 12.51 KiB
    import numpy as np
    import h5py
    from util.util import get_grid_values, get_grid_values_all, is_night, is_day, compute_lwc_iwc, get_fill_attrs
    import glob
    import os
    from aeolus.datasource import CLAVRx_VIIRS
    from icing.moon_phase import *
    from pathlib import Path
    
    # --- CLAVRx Radiometric parameters and metadata ------------------------------------------------
    l1b_ds_list = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'temp_13_3um_nom', 'temp_3_75um_nom',
                   'temp_6_2um_nom', 'temp_6_7um_nom', 'temp_7_3um_nom', 'temp_8_5um_nom', 'temp_9_7um_nom',
                   'refl_0_47um_nom', 'refl_0_65um_nom', 'refl_0_86um_nom', 'refl_1_38um_nom', 'refl_1_60um_nom']
    
    l1b_ds_types = {ds: 'f4' for ds in l1b_ds_list}
    l1b_ds_fill = {l1b_ds_list[i]: -32767 for i in range(10)}
    l1b_ds_fill.update({l1b_ds_list[i+10]: -32768 for i in range(5)})
    l1b_ds_range = {ds: 'actual_range' for ds in l1b_ds_list}
    
    # --- CLAVRx L2 parameters and metadata
    ds_list = ['cld_height_acha', 'cld_geo_thick', 'cld_press_acha', 'sensor_zenith_angle', 'supercooled_prob_acha',
               'supercooled_cloud_fraction', 'cld_temp_acha', 'cld_opd_acha', 'solar_zenith_angle',
               'cld_reff_acha', 'cld_reff_dcomp', 'cld_reff_dcomp_1', 'cld_reff_dcomp_2', 'cld_reff_dcomp_3',
               'cld_opd_dcomp', 'cld_opd_dcomp_1', 'cld_opd_dcomp_2', 'cld_opd_dcomp_3', 'cld_cwp_dcomp', 'iwc_dcomp',
               'lwc_dcomp', 'cld_emiss_acha', 'conv_cloud_fraction', 'cloud_type', 'cloud_phase', 'cloud_mask']
    
    ds_types = {ds_list[i]: 'f4' for i in range(23)}
    ds_types.update({ds_list[i+23]: 'i1' for i in range(3)})
    ds_fill = {ds_list[i]: -32768 for i in range(23)}
    ds_fill.update({ds_list[i+23]: -128 for i in range(3)})
    ds_range = {ds_list[i]: 'actual_range' for i in range(23)}
    ds_range.update({ds_list[i]: None for i in range(3)})
    
    ds_types.update(l1b_ds_types)
    ds_fill.update(l1b_ds_fill)
    ds_range.update(l1b_ds_range)
    
    ds_types.update({'temp_3_9um_nom': 'f4'})
    ds_types.update({'cloud_fraction': 'f4'})
    ds_fill.update({'temp_3_9um_nom': -32767})
    ds_fill.update({'cloud_fraction': -32768})
    ds_range.update({'temp_3_9um_nom': 'actual_range'})
    ds_range.update({'cloud_fraction': 'actual_range'})
    
    
    emis_params = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'temp_13_3um_nom', 'temp_3_9um_nom',
                   'temp_6_7um_nom']
    
    # refl_params = ['refl_0_47um_nom', 'refl_0_65um_nom', 'refl_0_86um_nom', 'refl_1_38um_nom', 'refl_1_60um_nom']
    
    # data_params = refl_params + emis_params
    # data_params = emis_params
    
    # l2_params = ['temp_11_0um_nom', 'temp_12_0um_nom', 'cloud_fraction']
    # l2_params = ['temp_11_0um_nom', 'temp_12_0um_nom', 'refl_0_65um_nom', 'cld_opd_dcomp']
    l2_params = ['temp_11_0um_nom', 'temp_12_0um_nom', 'refl_0_65um_nom', 'cloud_probability']
    # l2_params = ['temp_11_0um_nom', 'temp_12_0um_nom', 'refl_0_65um_nom', 'cloud_fraction']
    
    label_params = l2_params
    data_params = l2_params
    
    
    def keep_tile(param_s, tile):
        k = param_s.index('cloud_fraction')
        grd_k = tile[k, ].flatten()
        keep = np.invert(np.isnan(grd_k))
        total = np.sum(keep)
        if total == 0:
            return False
        np.where(np.invert(keep), 0, grd_k)
    
        keep = np.where(keep, np.invert(np.logical_and(0.1 < grd_k, grd_k < 0.9)), False)
        if np.sum(keep)/total > 0.75:
            return True
        else:
            return False
    
    
    def process_cld_prob(param_s, tile):
        k = param_s.index('cloud_probability')
        grd_k = tile[k,].copy()
        keep = np.invert(np.isnan(grd_k))
        total = np.sum(keep)
        if total == 0:
            return None
    
        grd_k = np.where(np.logical_and(keep, grd_k < 0.5), 0.0, 1.0)
        tile[k,] = grd_k
        return tile
    
    
    def run_all(directory, out_directory, day_night='ANY'):
    
        cnt = 10
        total_num_train_samples = 0
        total_num_valid_samples = 0
        num_keep_x_tiles = 8
    
        # pattern = 'clavrx*highres*.nc'
        # pattern = 'VNP02MOD*.uwssec.nc'
        # pattern = 'clavrx_*.nc'
        pattern = 'clavrx_VNP02MOD*.highres.nc.level2.nc'
        # pattern = 'clavrx_snpp_viirs*.uwssec*.nc'
        path = directory + '**' + '/' + pattern
    
        data_files = glob.glob(path, recursive=True)
    
        label_valid_tiles = []
        label_train_tiles = []
        data_valid_tiles = []
        data_train_tiles = []
        f_cnt = 0
    
        num_files = len(data_files)
    
        print('Start, number of files: ', num_files)
    
        for idx, data_f in enumerate(data_files):
            # if idx % 4 == 0:  # if we want to skip some files
            if True:
                # w_o_ext, ext = os.path.splitext(data_f)
                # pname, fname = os.path.split(data_f)
                # toks = fname.split('.')
                # label_f = pname + '/' + 'clavrx_VNP02MOD.' + toks[1]+'.'+toks[2]+'.'+toks[3]+'.'+toks[4]+'.'+'uwssec.highres.nc.level2.nc'
                # if not os.path.exists(label_f):
                #     continue
    
                try:
                    data_h5f = h5py.File(data_f, 'r')
                except:
                    print('cant open file: ', data_f)
                    continue
    
                # try:
                #     label_h5f = h5py.File(label_f, 'r')
                # except:
                #     print('cant open file: ', label_f)
                #     data_h5f.close()
                #     continue
    
                data_tiles = []
                label_tiles = []
    
                try:
                    run(data_h5f, data_params, data_train_tiles, data_valid_tiles, num_keep_x_tiles=num_keep_x_tiles, tile_width=128, kernel_size=7, day_night=day_night)
                    # run(data_h5f, label_params, label_data_tiles, label_valid_tiles, tile_width=128, kernel_size=7, day_night=day_night)
                except Exception as e:
                    print(e)
                    data_h5f.close()
                    # label_h5f.close()
                    continue
                print(data_f)
    
                data_h5f.close()
                # label_h5f.close()
    
                if len(data_train_tiles) == 0:
                    continue
    
                f_cnt += 1
                if (f_cnt % 5) == 0:
                    num_valid_samples = 0
                    if len(data_valid_tiles) > 0:
                        # label_valid = np.stack(label_valid_tiles)
                        data_valid = np.stack(data_valid_tiles)
                        np.save(out_directory + 'data_valid_' + str(cnt), data_valid)
                        # np.save(out_directory+'label_valid_' + str(cnt), label_valid)
                        num_valid_samples = data_valid.shape[0]
    
                    # label_train = np.stack(label_train_tiles)
                    # np.save(out_directory+'label_train_' + str(cnt), label_train)
                    data_train = np.stack(data_train_tiles)
                    np.save(out_directory+'data_train_' + str(cnt), data_train)
                    num_train_samples = data_train.shape[0]
    
                    label_valid_tiles = []
                    label_train_tiles = []
                    data_valid_tiles = []
                    data_train_tiles = []
    
                    print('  num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
                    total_num_train_samples += num_train_samples
                    total_num_valid_samples += num_valid_samples
                    print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
    
                    cnt += 1
    
        print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
    
    
    def run(data_h5f, param_s, train_tiles, valid_tiles, num_keep_x_tiles=8, tile_width=64, kernel_size=9, day_night='ANY'):
    
        border = int((kernel_size - 1)/2)
    
        param_name = param_s[0]
    
        num_lines = data_h5f[param_name].shape[0]
        num_pixels = data_h5f[param_name].shape[1]
    
        solzen = get_grid_values(data_h5f, 'solar_zenith_angle', 0, 0, None, num_lines, num_pixels)
    
        grd_s = []
        for param in param_s:
            fill_value, fill_value_name = get_fill_attrs(param)
            try:
                grd = get_grid_values(data_h5f, param, 0, 0, None, num_lines, num_pixels, fill_value_name=fill_value_name, fill_value=fill_value)
                grd_s.append(grd)
            except Exception as e:
                print(e)
                return
        data = np.stack(grd_s)
    
        tile_width += 2 * border
    
        i_skip = tile_width
        j_skip = tile_width
        i_start = int(num_pixels / 2) - int((num_keep_x_tiles * tile_width) / 2)
        j_start = 0
    
        num_keep_y_tiles = int(num_lines / tile_width) - 3
    
        num_y_valid = int(num_keep_y_tiles * 0.1) + 1
        num_y_train = num_keep_y_tiles - num_y_valid - 1
    
        for j in range(num_y_train):
            j_a = j_start + j * j_skip
            j_b = j_a + tile_width
    
            for i in range(num_keep_x_tiles):
                i_a = i_start + i * i_skip
                i_b = i_a + tile_width
    
                if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]):
                    continue
                elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]):
                    continue
    
                nda = data[:, j_a:j_b, i_a:i_b]
                # if not keep_tile(param_s, nda):
                #     continue
    
                nda = process_cld_prob(param_s, nda)
                if nda is not None:
                    train_tiles.append(nda)
    
        j_start = num_y_train * tile_width + 2*tile_width
        for j in range(num_y_valid):
            j_a = j_start + j * j_skip
            j_b = j_a + tile_width
    
            for i in range(num_keep_x_tiles):
                i_a = i_start + i * i_skip
                i_b = i_a + tile_width
    
                if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]):
                    continue
                elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]):
                    continue
    
                nda = data[:, j_a:j_b, i_a:i_b]
                # if not keep_tile(param_s, nda):
                #     continue
    
                nda = process_cld_prob(param_s, nda)
                if nda is not None:
                    valid_tiles.append(nda)
    
    
    def scan(directory):
    
        data_src = CLAVRx_VIIRS(directory)
        files = data_src.flist
    
        for idx, file in enumerate(files):
            h5f = h5py.File(file, 'r')
            ts = data_src.ftimes[idx][0]
            try:
                solzen = get_grid_values_all(h5f, 'solar_zenith_angle')
            except Exception as e:
                # print(e)
                h5f.close()
                continue
    
            # if is_day(solzen) and moon_phase(ts):
            if is_night(solzen) and moon_phase(ts):
                print(file)
            h5f.close()
    
    
    def scan_for_location(txt_file, lon_range=[111.0, 130.0], lat_range=[14.0, 32.0]):
        with open(txt_file) as file:
            for idx, fpath in enumerate(file):
                fpath = fpath.strip()
                h5f = h5py.File(fpath, 'r')
                try:
                    lon_s = get_grid_values_all(h5f, 'longitude', stride=4)
                    lat_s = get_grid_values_all(h5f, 'latitude', stride=4)
                    c_lon, c_lat = lon_s[406, 400], lat_s[406, 400]
                    if (lon_range[0] < c_lon < lon_range[1]) and (lat_range[0] < c_lat < lat_range[1]):
                        print(fpath)
    
                except Exception as e:
                    # print(e)
                    h5f.close()
                    continue
    
    
    def test_nlcomp(file):
        h5f = h5py.File(file, 'r')
    
        cld_phs = get_grid_values_all(h5f, 'cloud_phase', scale_factor_name=None, range_name=None)
        keep_0 = np.invert(np.isnan(cld_phs))
    
        reff = get_grid_values_all(h5f, 'cld_reff_nlcomp')
        keep_1 = np.invert(np.isnan(reff))
    
        opd = get_grid_values_all(h5f, 'cld_opd_nlcomp')
        keep_2 = np.invert(np.isnan(opd))
    
        cld_dz = get_grid_values_all(h5f, 'cld_geo_thick')
        keep_3 = np.logical_and(np.invert(np.isnan(cld_dz)), cld_dz > 5.0)
    
        keep = keep_0 & keep_1 & keep_2 & keep_3
    
        cld_phs = cld_phs[keep]
        reff = reff[keep]
        opd = opd[keep]
        cld_dz = cld_dz[keep]
    
        lwc_c, iwc_c = compute_lwc_iwc(cld_phs, reff, opd, cld_dz)
    
        return lwc_c, iwc_c
    
    
    # def run_mean_std(directory):
    #
    #     data_dct = {name: [] for name in mod_res_params}
    #     mean_dct = {name: 0 for name in mod_res_params}
    #     std_dct = {name: 0 for name in mod_res_params}
    #
    #     for p in os.scandir(directory):
    #         if not p.is_dir():
    #             continue
    #         mod_files = glob.glob(directory+p.name+'/'+'VNP02MOD*.uwssec.nc')
    #
    #         for idx, mfile in enumerate(mod_files):
    #             if idx % 8 == 0:
    #                 h5f = h5py.File(mfile, 'r')
    #                 for param in mod_res_params:
    #                     name = 'observation_data/'+param
    #                     gvals = get_grid_values_all(h5f, name, range_name=None, stride=10)
    #                     data_dct[param].append(gvals.flatten())
    #                 print(mfile)
    #                 h5f.close()
    #
    #     for param in mod_res_params:
    #         data = data_dct[param]
    #         data = np.concatenate(data)
    #
    #         mean_dct[param] = np.nanmean(data)
    #         std_dct[param] = np.nanstd(data)