Skip to content
Snippets Groups Projects
Select Git revision
  • f6b06655d748a05b9fb897a29e647a46fdb62495
  • master default protected
  • use_flight_altitude
  • distribute
4 results

abi_surfrad.py

Blame
  • user avatar
    tomrink authored
    b05c3f50
    History
    abi_surfrad.py 4.41 KiB
    import numpy as np
    import h5py
    from util.util import get_grid_values
    import glob
    
    
    target_param = 'cloud_probability'
    # target_param = 'cld_opd_dcomp'
    
    group_name_i = 'super/'
    group_name_m = 'orig/'
    
    solzen_name = group_name_m + 'solar_zenith'
    
    label_params = [group_name_i+target_param]
    data_params = [group_name_m+'temp_ch31', group_name_m+'refl_ch01', group_name_m+target_param]
    
    
    def keep_tile(param, param_s, tile):
        k = param_s.index(param)
        grd_k = tile[k, ].copy()
    
        if target_param == 'cloud_probability':
            grd_k = process_cld_prob_(grd_k)
        elif target_param == 'cld_opd_dcomp':
            grd_k = process_cld_opd_(grd_k)
    
        if grd_k is not None:
            tile[k, ] = grd_k
            return tile
        else:
            return None
    
    
    def process_cld_prob_(grd_k):
        keep = np.invert(np.isnan(grd_k))
        num_keep = np.sum(keep)
        # if num_keep / grd_k.size < 0.98:
        #     return None
        keep = np.where(keep, np.logical_and(0.10 < grd_k, grd_k < 0.90), False)
        if np.sum(keep)/num_keep < 0.25:
            return None
        grd_k = np.where(np.invert(keep), 0, grd_k)
        return grd_k
    
    
    def process_cld_opd_(grd_k):
        keep = np.invert(np.isnan(grd_k))
        num_keep = np.sum(keep)
        # if num_keep / grd_k.size < 0.98:
        #     return None
        grd_k = np.where(np.invert(keep), 0, grd_k)
        keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False)
        if np.sum(keep)/num_keep < 0.50:
            return None
        return grd_k
    
    
    def run_all(directory, out_directory, pattern='clavrx_*.nc', start=10):
        cnt = start
        total_num_samples = 0
    
        path = directory + '**' + '/' + pattern
    
        files = glob.glob(path, recursive=True)
    
        label_tiles = []
        data_tiles = []
        f_cnt = 0
    
        num_files = len(files)
        print('Start, number of files: ', num_files)
    
        for idx, f in enumerate(files):
            # if idx % 4 == 0:  # if we want to skip some files
            if True:
                try:
                    h5f = h5py.File(f, 'r')
                except:
                    print('cant open file: ', f)
                    continue
    
                try:
                    run(h5f, data_params, data_tiles, label_params, label_tiles, kernel_size=5)
                except Exception as e:
                    print(e)
                    h5f.close()
                    continue
    
                print(f)
                f_cnt += 1
                h5f.close()
    
                if len(data_tiles) == 0:
                    continue
    
                if (f_cnt % 100) == 0:
                    num_samples = 0
                    if len(data_tiles) > 0:
                        label = np.stack(label_tiles)
                        data = np.stack(data_tiles)
                        #np.save(out_directory + 'label_' + str(cnt), label)
                        #np.save(out_directory + 'data_' + str(cnt), data)
                        num_samples = data.shape[0]
    
                    label_tiles = []
                    data_tiles = []
    
                    print('  num_samples, progress % : ', num_samples, int((f_cnt/num_files)*100))
                    total_num_samples += num_samples
                    print('total_num_samples: ', total_num_samples)
                    print('------------------------------------------------------------')
    
                    cnt += 1
    
        print('** total_num_samples: ', total_num_samples)
    
    
    #  tile_width: Must be even!
    #  kernel_size: Must be odd!
    def run(h5f, param_s, tiles, lbl_param_s, lbl_tiles, kernel_size=3):
    
        border = int((kernel_size - 1)/2) + 1  # Need to add for interpolation with no edge effects
    
        param_name = param_s[0]
    
        num_lines = h5f[param_name].shape[0]
        num_pixels = h5f[param_name].shape[1]  # Must be even
    
        grd_s = []
        for param in param_s:
            try:
                grd = get_grid_values(h5f, param, 0, 0, None, num_lines, num_pixels)
                grd_s.append(grd)
            except Exception as e:
                print(e)
                return
        data = np.stack(grd_s)
    
        grd_s = []
        for param in lbl_param_s:
            try:
                grd = get_grid_values(h5f, param, 0, 0, None, num_lines*2, num_pixels*2)
                grd_s.append(grd)
            except Exception as e:
                print(e)
                return
        label = np.stack(grd_s)
    
        nda = data[:, :, :]
        nda = keep_tile(group_name_m + target_param, param_s, nda)
        if nda is None:  # if none, no need to check the next one
            return
    
        nda_lbl = label[:, :, :]
        nda_lbl = keep_tile(group_name_i + target_param, lbl_param_s, nda_lbl)
    
        if nda_lbl is not None:
            tiles.append(nda)
            lbl_tiles.append(nda_lbl)