Skip to content
Snippets Groups Projects
abi_surfrad.py 4.41 KiB
import numpy as np
import h5py
from util.util import get_grid_values
import glob


target_param = 'cloud_probability'
# target_param = 'cld_opd_dcomp'

group_name_i = 'super/'
group_name_m = 'orig/'

solzen_name = group_name_m + 'solar_zenith'

label_params = [group_name_i+target_param]
data_params = [group_name_m+'temp_ch31', group_name_m+'refl_ch01', group_name_m+target_param]


def keep_tile(param, param_s, tile):
    k = param_s.index(param)
    grd_k = tile[k, ].copy()

    if target_param == 'cloud_probability':
        grd_k = process_cld_prob_(grd_k)
    elif target_param == 'cld_opd_dcomp':
        grd_k = process_cld_opd_(grd_k)

    if grd_k is not None:
        tile[k, ] = grd_k
        return tile
    else:
        return None


def process_cld_prob_(grd_k):
    keep = np.invert(np.isnan(grd_k))
    num_keep = np.sum(keep)
    # if num_keep / grd_k.size < 0.98:
    #     return None
    keep = np.where(keep, np.logical_and(0.10 < grd_k, grd_k < 0.90), False)
    if np.sum(keep)/num_keep < 0.25:
        return None
    grd_k = np.where(np.invert(keep), 0, grd_k)
    return grd_k


def process_cld_opd_(grd_k):
    keep = np.invert(np.isnan(grd_k))
    num_keep = np.sum(keep)
    # if num_keep / grd_k.size < 0.98:
    #     return None
    grd_k = np.where(np.invert(keep), 0, grd_k)
    keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False)
    if np.sum(keep)/num_keep < 0.50:
        return None
    return grd_k


def run_all(directory, out_directory, pattern='clavrx_*.nc', start=10):
    cnt = start
    total_num_samples = 0

    path = directory + '**' + '/' + pattern

    files = glob.glob(path, recursive=True)

    label_tiles = []
    data_tiles = []
    f_cnt = 0

    num_files = len(files)
    print('Start, number of files: ', num_files)

    for idx, f in enumerate(files):
        # if idx % 4 == 0:  # if we want to skip some files
        if True:
            try:
                h5f = h5py.File(f, 'r')
            except:
                print('cant open file: ', f)
                continue

            try:
                run(h5f, data_params, data_tiles, label_params, label_tiles, kernel_size=5)
            except Exception as e:
                print(e)
                h5f.close()
                continue

            print(f)
            f_cnt += 1
            h5f.close()

            if len(data_tiles) == 0:
                continue

            if (f_cnt % 100) == 0:
                num_samples = 0
                if len(data_tiles) > 0:
                    label = np.stack(label_tiles)
                    data = np.stack(data_tiles)
                    #np.save(out_directory + 'label_' + str(cnt), label)
                    #np.save(out_directory + 'data_' + str(cnt), data)
                    num_samples = data.shape[0]

                label_tiles = []
                data_tiles = []

                print('  num_samples, progress % : ', num_samples, int((f_cnt/num_files)*100))
                total_num_samples += num_samples
                print('total_num_samples: ', total_num_samples)
                print('------------------------------------------------------------')

                cnt += 1

    print('** total_num_samples: ', total_num_samples)


#  tile_width: Must be even!
#  kernel_size: Must be odd!
def run(h5f, param_s, tiles, lbl_param_s, lbl_tiles, kernel_size=3):

    border = int((kernel_size - 1)/2) + 1  # Need to add for interpolation with no edge effects

    param_name = param_s[0]

    num_lines = h5f[param_name].shape[0]
    num_pixels = h5f[param_name].shape[1]  # Must be even

    grd_s = []
    for param in param_s:
        try:
            grd = get_grid_values(h5f, param, 0, 0, None, num_lines, num_pixels)
            grd_s.append(grd)
        except Exception as e:
            print(e)
            return
    data = np.stack(grd_s)

    grd_s = []
    for param in lbl_param_s:
        try:
            grd = get_grid_values(h5f, param, 0, 0, None, num_lines*2, num_pixels*2)
            grd_s.append(grd)
        except Exception as e:
            print(e)
            return
    label = np.stack(grd_s)

    nda = data[:, :, :]
    nda = keep_tile(group_name_m + target_param, param_s, nda)
    if nda is None:  # if none, no need to check the next one
        return

    nda_lbl = label[:, :, :]
    nda_lbl = keep_tile(group_name_i + target_param, lbl_param_s, nda_lbl)

    if nda_lbl is not None:
        tiles.append(nda)
        lbl_tiles.append(nda_lbl)