diff --git a/modules/util/viirs_surfrad.py b/modules/util/viirs_surfrad.py index d8683e99a0816918339cfc5d78498da6e34f2674..45b88750d4063c8ec87b200ab7cfa3405f0580ca 100644 --- a/modules/util/viirs_surfrad.py +++ b/modules/util/viirs_surfrad.py @@ -65,12 +65,25 @@ def keep_tile(p_idx, tile): return None +# def process_cld_prob(grd_k): +# keep = np.invert(np.isnan(grd_k)) +# num_keep = np.sum(keep) +# keep_clr = np.where(keep, grd_k < 0.20, False) +# frac_keep = np.sum(keep_clr)/num_keep +# if not (0.30 < frac_keep < 0.70): +# return None +# grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaN to 0 +# return grd_k + + def process_cld_prob(grd_k): keep = np.invert(np.isnan(grd_k)) num_keep = np.sum(keep) keep_clr = np.where(keep, grd_k < 0.20, False) - frac_keep = np.sum(keep_clr)/num_keep - if not (0.30 < frac_keep < 0.70): + keep_cld = np.where(keep, grd_k > 0.80, False) + frac_clr = np.sum(keep_clr)/num_keep + frac_cld = np.sum(keep_cld)/num_keep + if not (frac_clr >= 0.23 and frac_cld >= 0.23): return None grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaN to 0 return grd_k @@ -166,6 +179,31 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st cnt += 1 + # Write out leftover, if any. Maybe make this better someday + num_valid_samples = 0 + if len(valid_tiles_m) > 0: + valid_i = np.stack(valid_tiles_i) + valid_m = np.stack(valid_tiles_m) + np.save(out_directory + 'valid_mres_' + str(cnt), valid_m) + np.save(out_directory + 'valid_ires_' + str(cnt), valid_i) + num_valid_samples = valid_m.shape[0] + + num_train_samples = 0 + if len(train_tiles_m) > 0: + train_i = np.stack(train_tiles_i) + train_m = np.stack(train_tiles_m) + np.save(out_directory + 'train_ires_' + str(cnt), train_i) + np.save(out_directory + 'train_mres' + str(cnt), train_m) + num_train_samples = train_m.shape[0] + + print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, + int((f_cnt / num_files) * 100)) + total_num_train_samples += num_train_samples + total_num_valid_samples += num_valid_samples + print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ', total_num_train_samples, + total_num_valid_samples, total_num_not_missing) + print('--------------------------------------------------') + print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)