diff --git a/modules/util/viirs_surfrad.py b/modules/util/viirs_surfrad.py index 1f781ae4c16c7b20c18cdeffbda7f55d37eb6d54..c46bd232c563ad587e22a6f3eb4eb7a7f7e8bdb0 100644 --- a/modules/util/viirs_surfrad.py +++ b/modules/util/viirs_surfrad.py @@ -18,47 +18,51 @@ solzen_name = group_name_m + 'solar_zenith' label_params = [group_name_i+target_param] data_params = [group_name_m+'temp_11_0um', group_name_m+'refl_0_65um', group_name_m+target_param] +param_idx_m = data_params.index(group_name_m + target_param) +param_idx_i = data_params.index(group_name_i + target_param) -def keep_tile(param, param_s, tile): - k = param_s.index(param) - grd_k = tile[k, ].copy() +def is_missing(p_idx, tile): + keep = np.invert(np.isnan(tile[p_idx, ])) + if np.sum(keep) / keep.size < 0.98: + return True + + +def keep_tile(p_idx, tile): + grd_k = tile[p_idx, ].copy() if target_param == 'cloud_probability': - grd_k, bflag = process_cld_prob_(grd_k) + grd_k = process_cld_prob(grd_k) elif target_param == 'cld_opd_dcomp': - grd_k, bflag = process_cld_opd_(grd_k) + grd_k = process_cld_opd(grd_k) if grd_k is not None: - tile[k, ] = grd_k - return tile, bflag + tile[p_idx, ] = grd_k + return tile else: - return None, bflag + return None -def process_cld_prob_(grd_k): +def process_cld_prob(grd_k): keep = np.invert(np.isnan(grd_k)) num_keep = np.sum(keep) - if num_keep / grd_k.size < 0.98: - return None, True keep_clr = np.where(keep, grd_k < 0.20, False) frac_keep = np.sum(keep_clr)/num_keep - if not (0.38 < frac_keep < 0.62): - return None, False - grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaNs to 0 - return grd_k, False + if not (0.35 < frac_keep < 0.65): + return None + grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaN to 0 + return grd_k -def process_cld_opd_(grd_k): +def process_cld_opd(grd_k): keep = np.invert(np.isnan(grd_k)) num_keep = np.sum(keep) - if num_keep / grd_k.size < 0.98: - return None, True grd_k = np.where(np.invert(keep), 0, grd_k) keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False) - if np.sum(keep)/num_keep < 0.50: - return None, False - return grd_k, False + frac_keep = np.sum(keep)/num_keep + if frac_keep < 0.50: + return None + return grd_k def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', start=10): @@ -103,6 +107,8 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st f_cnt += 1 h5f.close() + total_num_not_missing += num_not_missing + if len(data_train_tiles) == 0 and len(data_valid_tiles) == 0: continue @@ -131,7 +137,6 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100)) total_num_train_samples += num_train_samples total_num_valid_samples += num_valid_samples - total_num_not_missing += num_not_missing print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ', total_num_train_samples, total_num_valid_samples, total_num_not_missing) print('--------------------------------------------------') @@ -144,7 +149,7 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st # tile_width: Must be even! # kernel_size: Must be odd! def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lbl_valid_tiles, - num_keep_x_tiles=8, tile_width=64, kernel_size=3, day_night='DAY'): + num_keep_x_tiles=8, tile_width=64, kernel_size=3, day_night='ANY'): border = int((kernel_size - 1)/2) + 1 # Need to add for interpolation with no edge effects @@ -153,7 +158,7 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb num_lines = h5f[param_name].shape[0] num_pixels = h5f[param_name].shape[1] # Must be even - if day_night != 'BOTH': + if day_night != 'ANY': solzen = get_grid_values(h5f, solzen_name, 0, 0, None, num_lines, num_pixels) grd_s = [] @@ -204,10 +209,11 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb nda = data[:, j_a:j_b, i_a:i_b] nda_lbl = label[:, j_a*2:j_b*2, i_a*2:i_b*2] - nda_lbl, missing_flag = keep_tile(group_name_i+target_param, lbl_param_s, nda_lbl) - if not missing_flag: - num_not_missing += 1 + if is_missing(param_idx_i, nda_lbl): + continue + num_not_missing += 1 + nda_lbl = keep_tile(group_name_i+target_param, lbl_param_s, nda_lbl) if nda_lbl is not None: data_tiles.append(nda) lbl_tiles.append(nda_lbl)