From af8cacc4f28acf56f44ebed861dc3352a36d05b6 Mon Sep 17 00:00:00 2001 From: tomrink <rink@ssec.wisc.edu> Date: Tue, 14 Feb 2023 13:40:21 -0600 Subject: [PATCH] snapshot... --- modules/util/viirs_l1b_l2.py | 60 +++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/modules/util/viirs_l1b_l2.py b/modules/util/viirs_l1b_l2.py index 9e261564..a5d8696a 100644 --- a/modules/util/viirs_l1b_l2.py +++ b/modules/util/viirs_l1b_l2.py @@ -6,8 +6,8 @@ from aeolus.datasource import CLAVRx_VIIRS from icing.moon_phase import * -# target_param = 'cloud_probability' -target_param = 'cld_opd_dcomp' +target_param = 'cloud_probability' +# target_param = 'cld_opd_dcomp' # group_name = '' group_name = 'super/' @@ -21,7 +21,7 @@ label_params = l2_params data_params = l2_params -def keep_tile(param_s, tile): +def keep_tile(param_s, tile, hist_10): k = param_s.index(group_name + target_param) grd_k = tile[k, ].copy() @@ -48,11 +48,12 @@ def process_cld_prob(param_s, tile): return None -def process_cld_prob_(grd_k): +def process_cld_prob_(grd_k, hist_10): keep = np.invert(np.isnan(grd_k)) num_keep = np.sum(keep) if num_keep / grd_k.size < 0.98: return None + hist_10 += np.histogram(grd_k.flatten(), range=[0.0, 1.0], bins=10)[0] keep = np.where(keep, np.logical_and(0.05 < grd_k, grd_k < 0.95), False) if np.sum(keep)/num_keep < 0.50: return None @@ -102,6 +103,7 @@ def run_all(directory, out_directory, day_night='ANY', start=10): num_files = len(data_files) print('Start, number of files: ', num_files) + hist_10 = np.zeros((10), dtype=np.int64) for idx, data_f in enumerate(data_files): # if idx % 4 == 0: # if we want to skip some files @@ -113,12 +115,12 @@ def run_all(directory, out_directory, day_night='ANY', start=10): continue try: - total, kept = run(data_h5f, data_params, data_train_tiles, data_valid_tiles, num_keep_x_tiles=num_keep_x_tiles, tile_width=128, kernel_size=11, day_night=day_night) + total, kept = run(data_h5f, data_params, data_train_tiles, data_valid_tiles, hist_10, num_keep_x_tiles=num_keep_x_tiles, tile_width=128, kernel_size=11, day_night=day_night) except Exception as e: print(e) data_h5f.close() continue - print(data_f, int(100 * (kept/total))) + print(data_f, int(100 * (kept/total)), hist_10, (hist_10 / np.sum(hist_10))) f_cnt += 1 data_h5f.close() @@ -126,33 +128,33 @@ def run_all(directory, out_directory, day_night='ANY', start=10): if len(data_train_tiles) == 0: continue - if (f_cnt % 5) == 0: - num_valid_samples = 0 - if len(data_valid_tiles) > 0: - data_valid = np.stack(data_valid_tiles) - np.save(out_directory + 'data_valid_' + str(cnt), data_valid) - num_valid_samples = data_valid.shape[0] - - data_train = np.stack(data_train_tiles) - np.save(out_directory+'data_train_' + str(cnt), data_train) - num_train_samples = data_train.shape[0] - - data_valid_tiles = [] - data_train_tiles = [] - - print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100)) - total_num_train_samples += num_train_samples - total_num_valid_samples += num_valid_samples - print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples) - - cnt += 1 + # if (f_cnt % 5) == 0: + # num_valid_samples = 0 + # if len(data_valid_tiles) > 0: + # data_valid = np.stack(data_valid_tiles) + # np.save(out_directory + 'data_valid_' + str(cnt), data_valid) + # num_valid_samples = data_valid.shape[0] + # + # data_train = np.stack(data_train_tiles) + # np.save(out_directory+'data_train_' + str(cnt), data_train) + # num_train_samples = data_train.shape[0] + # + # data_valid_tiles = [] + # data_train_tiles = [] + # + # print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100)) + # total_num_train_samples += num_train_samples + # total_num_valid_samples += num_valid_samples + # print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples) + # + # cnt += 1 print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples) # tile_width: Must be even! # kernel_size: Must be odd! -def run(data_h5f, param_s, train_tiles, valid_tiles, num_keep_x_tiles=8, tile_width=64, kernel_size=9, day_night='ANY'): +def run(data_h5f, param_s, train_tiles, valid_tiles, hist_10, num_keep_x_tiles=8, tile_width=64, kernel_size=9, day_night='ANY'): border = int((kernel_size - 1)/2) @@ -205,7 +207,7 @@ def run(data_h5f, param_s, train_tiles, valid_tiles, num_keep_x_tiles=8, tile_wi continue nda = data[:, j_a:j_b, i_a:i_b] - nda = keep_tile(param_s, nda) + nda = keep_tile(param_s, nda, hist_10) if nda is not None: train_tiles.append(nda) cnt_kept += 1 @@ -227,7 +229,7 @@ def run(data_h5f, param_s, train_tiles, valid_tiles, num_keep_x_tiles=8, tile_wi continue nda = data[:, j_a:j_b, i_a:i_b] - nda = keep_tile(param_s, nda) + nda = keep_tile(param_s, nda, hist_10) if nda is not None: valid_tiles.append(nda) cnt_kept += 1 -- GitLab