Skip to content
Snippets Groups Projects
abi_surfrad.py 13.3 KiB
Newer Older
tomrink's avatar
tomrink committed
import numpy as np
import h5py
tomrink's avatar
tomrink committed
from util.util import get_grid_values, is_day
tomrink's avatar
tomrink committed
import glob

tomrink's avatar
tomrink committed
target_param = 'cloud_probability'
tomrink's avatar
tomrink committed
# target_param = 'cld_opd_dcomp'
tomrink's avatar
tomrink committed
# target_param = 'cld_opd_dcomp_1'
tomrink's avatar
tomrink committed
# target_param = 'cld_opd_dcomp_2'
# target_param = 'cld_opd_dcomp_3'
tomrink's avatar
tomrink committed

group_name_i = 'super/'
group_name_m = 'orig/'

solzen_name = group_name_m + 'solar_zenith'
tomrink's avatar
tomrink committed
snow_class_name = group_name_m + 'snow_class'
tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed
params_i = [group_name_i+'temp_ch38', group_name_i+'refl_ch01', group_name_i+target_param]
tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed
params_m = [group_name_m+'temp_ch38', group_name_m+'refl_ch01',
            group_name_m+'refl_submin_ch01', group_name_m+'refl_submax_ch01', group_name_m+'refl_substddev_ch01',
tomrink's avatar
tomrink committed
            group_name_m+target_param,
            group_name_m+'cloud_fraction']
tomrink's avatar
tomrink committed

param_idx_m = params_m.index(group_name_m + target_param)
param_idx_i = params_i.index(group_name_i + target_param)

tomrink's avatar
tomrink committed
hist_range = [0.0, 1.0]
# hist_range = [0.0, 160.0]

tile_width = 32
kernel_size = 5
factor = 4
# tile_width = 64
# kernel_size = 7
# factor = 4

tomrink's avatar
tomrink committed
num_skip = 2
tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed
DO_WRITE_OUTFILE = True
tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed
def snow_covered(tile):
    return np.any(tile > 1)


tomrink's avatar
tomrink committed
def is_missing(p_idx, tile):
    keep = np.invert(np.isnan(tile[p_idx, ]))
    if np.sum(keep) / keep.size < 0.98:
        return True
tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed

def keep_tile(p_idx, tile):
    grd_k = tile[p_idx, ].copy()
tomrink's avatar
tomrink committed

    if target_param == 'cloud_probability':
tomrink's avatar
tomrink committed
        # grd_k = process_cld_prob(grd_k)
        grd_k = process_cloud_frac(grd_k)
tomrink's avatar
tomrink committed
    elif 'cld_opd_dcomp' in target_param:
tomrink's avatar
tomrink committed
        grd_k = process_cld_opd(grd_k)
tomrink's avatar
tomrink committed

    if grd_k is not None:
tomrink's avatar
tomrink committed
        tile[p_idx, ] = grd_k
tomrink's avatar
tomrink committed
        return tile
    else:
        return None


tomrink's avatar
tomrink committed
def process_cld_prob(grd_k):
tomrink's avatar
tomrink committed
    keep = np.invert(np.isnan(grd_k))
    num_keep = np.sum(keep)
tomrink's avatar
tomrink committed
    # keep_clr = np.where(keep, grd_k < 0.30, False)
tomrink's avatar
tomrink committed
    keep_cld = np.where(keep, grd_k > 0.70, False)
tomrink's avatar
tomrink committed
    # frac_clr = np.sum(keep_clr)/num_keep
tomrink's avatar
tomrink committed
    frac_cld = np.sum(keep_cld)/num_keep
tomrink's avatar
tomrink committed
    if not (0.30 <= frac_cld <= 0.90):
tomrink's avatar
tomrink committed
        return None
tomrink's avatar
tomrink committed
    grd_k = np.where(np.invert(keep), 0, grd_k)  # Convert NaN to 0
tomrink's avatar
tomrink committed
    return grd_k


tomrink's avatar
tomrink committed
def process_cloud_frac(grd_k):
tomrink's avatar
tomrink committed
    keep = np.invert(np.isnan(grd_k))

    grd_by_cat = get_cloud_frac_5cat(grd_k)
tomrink's avatar
tomrink committed
    keep_mix = np.logical_and(grd_by_cat > 0, grd_by_cat < 4)
    frac_mix = np.sum(keep_mix)/keep_mix.size
tomrink's avatar
tomrink committed
    if frac_mix < 0.13:  # at least 13% mixed to pass
tomrink's avatar
tomrink committed
        return None
tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed
    grd_k = np.where(np.invert(keep), 0, grd_k)  # Convert NaN to 0
    return grd_k
tomrink's avatar
tomrink committed


def get_cloud_frac_5cat(grd_k):
tomrink's avatar
tomrink committed
    grd_k = grd_k.copy()
tomrink's avatar
tomrink committed
    grd_k = np.where(np.isnan(grd_k), 0, grd_k)
    grd_k = np.where(grd_k < 0.5, 0, 1)

tomrink's avatar
tomrink committed
    s = grd_k[0::4, 0::4] + grd_k[1::4, 0::4] + grd_k[2::4, 0::4] + grd_k[3::4, 0::4] + \
        grd_k[0::4, 1::4] + grd_k[1::4, 1::4] + grd_k[2::4, 1::4] + grd_k[3::4, 1::4] + \
        grd_k[0::4, 2::4] + grd_k[1::4, 2::4] + grd_k[2::4, 2::4] + grd_k[3::4, 2::4] + \
        grd_k[0::4, 3::4] + grd_k[1::4, 3::4] + grd_k[2::4, 3::4] + grd_k[3::4, 3::4]
tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed
    cat_0 = np.logical_and(s >= 0, s < 1)
    cat_1 = np.logical_and(s >= 1, s < 6)
tomrink's avatar
tomrink committed
    cat_2 = np.logical_and(s >= 6, s < 11)
tomrink's avatar
tomrink committed
    cat_3 = np.logical_and(s >= 11, s <= 15)
    cat_4 = np.logical_and(s > 15, s <= 16)
tomrink's avatar
tomrink committed

    s[cat_0] = 0
    s[cat_1] = 1
    s[cat_2] = 2
    s[cat_3] = 3
    s[cat_4] = 4

    return s


tomrink's avatar
tomrink committed
def process_cld_opd(grd_k):
tomrink's avatar
tomrink committed
    keep = np.invert(np.isnan(grd_k))
    num_keep = np.sum(keep)
tomrink's avatar
tomrink committed
    keep_cld = np.where(keep, np.logical_and(2.0 < grd_k, grd_k < 158.0), False)
tomrink's avatar
tomrink committed
    # keep_cld = np.where(keep, 2.0 < grd_k, False)
tomrink's avatar
tomrink committed
    frac_cld = np.sum(keep_cld)/num_keep
tomrink's avatar
tomrink committed
    if not (0.20 < frac_cld < 0.90):
    # if not (0.20 < frac_cld):
tomrink's avatar
tomrink committed
        return None
tomrink's avatar
tomrink committed
    grd_k = np.where(np.invert(keep), 0, grd_k)  # Convert NaN to 0
tomrink's avatar
tomrink committed
    return grd_k


tomrink's avatar
tomrink committed
def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', start=10, is_snow_covered=None):
tomrink's avatar
tomrink committed
    cnt = start
tomrink's avatar
tomrink committed
    total_num_train_samples = 0
    total_num_valid_samples = 0
tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed
    # path = directory + '**' + '/' + pattern
tomrink's avatar
tomrink committed
    path = directory + '*_v3/202?/' + '**' + '/' + pattern
tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed
    all_files = glob.glob(path, recursive=True)
tomrink's avatar
tomrink committed
    test_files = glob.glob(directory + '*_v3/202?/*/01/*/*.nc', recursive=True)
    valid_files = glob.glob(directory + '*_v3/202?/*/0[2-6]/*/*.nc', recursive=True)
tomrink's avatar
tomrink committed
    train_files = [f for f in all_files if f not in valid_files + test_files]
tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed
    data_tiles_i = []
    data_tiles_m = []
tomrink's avatar
tomrink committed
    f_cnt = 0

tomrink's avatar
tomrink committed
    num_files = len(valid_files)
    print('Start, number of valid files: ', num_files)
tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed
    total_num_not_missing = 0

tomrink's avatar
tomrink committed
    param_train_hist = np.zeros([16], dtype=np.int64)
    param_valid_hist = np.zeros([16], dtype=np.int64)

tomrink's avatar
tomrink committed
    for idx, data_f in enumerate(valid_files):
tomrink's avatar
tomrink committed
        if idx % num_skip == 0:  # if we want to skip some files
tomrink's avatar
tomrink committed
            try:
tomrink's avatar
tomrink committed
                h5f = h5py.File(data_f, 'r')
tomrink's avatar
tomrink committed
            except:
tomrink's avatar
tomrink committed
                print('cant open file: ', data_f)
tomrink's avatar
tomrink committed
                continue

            try:
tomrink's avatar
tomrink committed
                num_not_missing, num_snow_covered = \
                    run(h5f, params_m, data_tiles_m, params_i, data_tiles_i,
tomrink's avatar
tomrink committed
                        tile_width=tile_width, kernel_size=kernel_size, factor=factor,
tomrink's avatar
tomrink committed
                        day_night=day_night, is_snow_covered=is_snow_covered)
tomrink's avatar
tomrink committed
            except Exception as e:
                print(e)
                h5f.close()
                continue
tomrink's avatar
tomrink committed
            print(data_f)
tomrink's avatar
tomrink committed
            f_cnt += 1
            h5f.close()

tomrink's avatar
tomrink committed
            total_num_not_missing += num_not_missing
tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed
            if len(data_tiles_m) == 0:
tomrink's avatar
tomrink committed
                continue
tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed
            if (f_cnt % 100) == 0:
tomrink's avatar
tomrink committed
                num_valid_samples = 0
tomrink's avatar
tomrink committed
                if len(data_tiles_m) > 0:
                    valid_i = np.stack(data_tiles_i)
                    valid_m = np.stack(data_tiles_m)
tomrink's avatar
tomrink committed
                    if valid_m.shape[0] != valid_i.shape[0]:
tomrink's avatar
tomrink committed
                        print('problem: number of samples dont match', valid_m.shape, valid_i.shape)
                        continue
tomrink's avatar
tomrink committed
                    if DO_WRITE_OUTFILE:
tomrink's avatar
tomrink committed
                        np.save(out_directory + 'valid_mres_' + f'{cnt:04d}', valid_m)
                        np.save(out_directory + 'valid_ires_' + f'{cnt:04d}', valid_i)
tomrink's avatar
tomrink committed
                    num_valid_samples = valid_m.shape[0]

tomrink's avatar
tomrink committed
                    param_valid_hist += np.histogram(valid_m[:, param_idx_m, :, :], bins=16, range=hist_range)[0]
tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed
                data_tiles_i = []
                data_tiles_m = []
tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed
                print('  num_valid_samples, progress % : ', num_valid_samples, int((f_cnt/(num_files/num_skip))*100))
tomrink's avatar
tomrink committed
                total_num_valid_samples += num_valid_samples
tomrink's avatar
tomrink committed
                print('total_num_valid_samples, total_num_not_missing: ', total_num_valid_samples, total_num_not_missing)
tomrink's avatar
tomrink committed
                print('--------------------------------------------------')
tomrink's avatar
tomrink committed

                cnt += 1

tomrink's avatar
tomrink committed
    # Write out leftover, if any. Maybe make this better someday
    num_valid_samples = 0
tomrink's avatar
tomrink committed
    if len(data_tiles_m) > 0:
        valid_i = np.stack(data_tiles_i)
        valid_m = np.stack(data_tiles_m)
tomrink's avatar
tomrink committed
        if valid_m.shape[0] != valid_i.shape[0]:
tomrink's avatar
tomrink committed
            print('problem: number of samples dont match', valid_m.shape, valid_i.shape)
        elif DO_WRITE_OUTFILE:
tomrink's avatar
tomrink committed
            np.save(out_directory + 'valid_mres_' + f'{cnt:04d}', valid_m)
            np.save(out_directory + 'valid_ires_' + f'{cnt:04d}', valid_i)
tomrink's avatar
tomrink committed
        num_valid_samples = valid_m.shape[0]
tomrink's avatar
tomrink committed
        param_valid_hist += np.histogram(valid_m[:, param_idx_m, :, :], bins=16, range=hist_range)[0]
tomrink's avatar
tomrink committed
    total_num_valid_samples += num_valid_samples
    print('total_num_valid_samples, total_num_not_missing: ', total_num_valid_samples, total_num_not_missing)
tomrink's avatar
tomrink committed
    print(param_valid_hist)
tomrink's avatar
tomrink committed
    print('--------------------------------------------------')
    print('----------------------------------------------------------------')
tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed
    data_tiles_i = []
    data_tiles_m = []
    f_cnt = 0
tomrink's avatar
tomrink committed
    cnt = start
tomrink's avatar
tomrink committed
    total_num_not_missing = 0
tomrink's avatar
tomrink committed
    num_files = len(train_files)
    print('Start, number of train files: ', num_files)
tomrink's avatar
tomrink committed

    for idx, data_f in enumerate(train_files):
tomrink's avatar
tomrink committed
        if idx % num_skip == 0:  # if we want to skip some files
tomrink's avatar
tomrink committed
            try:
                h5f = h5py.File(data_f, 'r')
            except:
                print('cant open file: ', data_f)
                continue

            try:
tomrink's avatar
tomrink committed
                num_not_missing, num_snow_covered = \
                    run(h5f, params_m, data_tiles_m, params_i, data_tiles_i,
tomrink's avatar
tomrink committed
                        tile_width=tile_width, kernel_size=kernel_size, factor=factor,
tomrink's avatar
tomrink committed
                        day_night=day_night, is_snow_covered=is_snow_covered)
tomrink's avatar
tomrink committed
            except Exception as e:
                print(e)
                h5f.close()
                continue
            print(data_f)
            f_cnt += 1
            h5f.close()

            total_num_not_missing += num_not_missing

            if len(data_tiles_m) == 0:
                continue

tomrink's avatar
tomrink committed
            if (f_cnt % 100) == 0:
tomrink's avatar
tomrink committed
                num_train_samples = 0
                if len(data_tiles_m) > 0:
                    train_i = np.stack(data_tiles_i)
                    train_m = np.stack(data_tiles_m)
tomrink's avatar
tomrink committed
                    if DO_WRITE_OUTFILE:
tomrink's avatar
tomrink committed
                        np.save(out_directory + 'train_ires_' + f'{cnt:04d}', train_i)
                        np.save(out_directory + 'train_mres_' + f'{cnt:04d}', train_m)
tomrink's avatar
tomrink committed
                    num_train_samples = train_m.shape[0]

tomrink's avatar
tomrink committed
                    param_train_hist += np.histogram(train_m[:, param_idx_m, :, :], bins=16, range=hist_range)[0]
tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed
                data_tiles_i = []
                data_tiles_m = []

tomrink's avatar
tomrink committed
                print('  num_train_samples, progress % : ', num_train_samples, int((f_cnt/(num_files/num_skip))*100))
tomrink's avatar
tomrink committed
                total_num_train_samples += num_train_samples
                print('total_num_train_samples, total_num_not_missing: ', total_num_train_samples, total_num_not_missing)
                print('--------------------------------------------------')

                cnt += 1

    # Write out leftover, if any. Maybe make this better someday
tomrink's avatar
tomrink committed
    num_train_samples = 0
tomrink's avatar
tomrink committed
    if len(data_tiles_m) > 0:
        train_i = np.stack(data_tiles_i)
        train_m = np.stack(data_tiles_m)
tomrink's avatar
tomrink committed
        if DO_WRITE_OUTFILE:
tomrink's avatar
tomrink committed
            np.save(out_directory + 'train_ires_' + f'{cnt:04d}', train_i)
            np.save(out_directory + 'train_mres_' + f'{cnt:04d}', train_m)
tomrink's avatar
tomrink committed
        num_train_samples = train_m.shape[0]
tomrink's avatar
tomrink committed
        param_train_hist += np.histogram(train_m[:, param_idx_m, :, :], bins=16, range=hist_range)[0]
tomrink's avatar
tomrink committed
    total_num_train_samples += num_train_samples
tomrink's avatar
tomrink committed
    print('total_num_train_samples,  total_num_not_missing: ', total_num_train_samples, total_num_not_missing)
tomrink's avatar
tomrink committed
    print(param_train_hist)
tomrink's avatar
tomrink committed
    print('--------------------------------------------------')

tomrink's avatar
tomrink committed
    print('*** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
tomrink's avatar
tomrink committed


#  tile_width: Must be even!
#  kernel_size: Must be odd!
tomrink's avatar
tomrink committed
def run(h5f, params_m, data_tiles_m, params_i, data_tiles_i, tile_width=64, kernel_size=3, factor=2,
tomrink's avatar
tomrink committed
        day_night='ANY', is_snow_covered=None):
tomrink's avatar
tomrink committed

    border = int((kernel_size - 1)/2) + 1  # Need to add for interpolation with no edge effects

tomrink's avatar
tomrink committed
    param_name = params_m[0]
tomrink's avatar
tomrink committed

    num_lines = h5f[param_name].shape[0]
    num_pixels = h5f[param_name].shape[1]  # Must be even

tomrink's avatar
tomrink committed
    if day_night != 'ANY':
        solzen = get_grid_values(h5f, solzen_name, 0, 0, None, num_lines, num_pixels)

tomrink's avatar
tomrink committed
    if is_snow_covered is not None:
        snow = get_grid_values(h5f, snow_class_name, 0, 0, None, num_lines, num_pixels)

tomrink's avatar
tomrink committed
    grd_s = []
tomrink's avatar
tomrink committed
    for param in params_m:
tomrink's avatar
tomrink committed
        try:
            grd = get_grid_values(h5f, param, 0, 0, None, num_lines, num_pixels)
            grd_s.append(grd)
        except Exception as e:
            print(e)
            return
tomrink's avatar
tomrink committed
    data_m = np.stack(grd_s)
tomrink's avatar
tomrink committed

    grd_s = []
tomrink's avatar
tomrink committed
    for param in params_i:
tomrink's avatar
tomrink committed
        try:
tomrink's avatar
tomrink committed
            grd = get_grid_values(h5f, param, 0, 0, None, num_lines*factor, num_pixels*factor)
tomrink's avatar
tomrink committed
            grd_s.append(grd)
        except Exception as e:
            print(e)
            return
tomrink's avatar
tomrink committed
    data_i = np.stack(grd_s)

    tile_width += 2 * border

    i_skip = tile_width
    j_skip = tile_width
    i_start = border - 1  # zero-based
    j_start = border - 1  # zero-based

tomrink's avatar
tomrink committed
    num_y_tiles = int(num_lines / tile_width)
    num_x_tiles = int(num_pixels / tile_width)
tomrink's avatar
tomrink committed

    num_not_missing = 0
tomrink's avatar
tomrink committed
    num_snow_covered = 0
tomrink's avatar
tomrink committed

    for j in range(num_y_tiles):
        j_a = j_start + j * j_skip
        j_b = j_a + tile_width

tomrink's avatar
tomrink committed
        for i in range(num_x_tiles):
tomrink's avatar
tomrink committed
            i_a = i_start + i * i_skip
            i_b = i_a + tile_width

tomrink's avatar
tomrink committed
            if is_snow_covered is not None:
                if is_snow_covered:
                    if not snow_covered(snow[j_a:j_b, i_a:i_b]):
                        continue
tomrink's avatar
tomrink committed
                    num_snow_covered += 1
tomrink's avatar
tomrink committed
                else:
                    if snow_covered(snow[j_a:j_b, i_a:i_b]):
tomrink's avatar
tomrink committed
                        num_snow_covered += 1
tomrink's avatar
tomrink committed
                        continue

tomrink's avatar
tomrink committed
            if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]):
                continue
            elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]):
                continue

            nda_m = data_m[:, j_a:j_b, i_a:i_b]
            nda_i = data_i[:, j_a*factor:j_b*factor, i_a*factor:i_b*factor]
            if is_missing(param_idx_i, nda_i):
                continue
            num_not_missing += 1
tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed
            nda_i = keep_tile(param_idx_i, nda_i)
            if nda_i is not None:
                data_tiles_m.append(nda_m)
                data_tiles_i.append(nda_i)
tomrink's avatar
tomrink committed

tomrink's avatar
tomrink committed
    return num_not_missing, num_snow_covered