From 9888cd950db16f136bf04f5f6bb515bee45d0e3c Mon Sep 17 00:00:00 2001 From: tomrink <rink@ssec.wisc.edu> Date: Tue, 14 Mar 2023 14:10:36 -0500 Subject: [PATCH] snapshot... --- modules/util/viirs_surfrad.py | 98 ++++++++++++++++++----------------- 1 file changed, 50 insertions(+), 48 deletions(-) diff --git a/modules/util/viirs_surfrad.py b/modules/util/viirs_surfrad.py index b90b3da1..d4b75f02 100644 --- a/modules/util/viirs_surfrad.py +++ b/modules/util/viirs_surfrad.py @@ -37,10 +37,11 @@ group_name_m = 'orig/' solzen_name = group_name_m + 'solar_zenith' -label_params = [group_name_i+target_param] -data_params = [group_name_m+'temp_11_0um', group_name_m+'refl_0_65um', group_name_m+target_param] -param_idx_m = data_params.index(group_name_m + target_param) -param_idx_i = label_params.index(group_name_i + target_param) +params_i = [group_name_i+target_param] +params_m = [group_name_m+'temp_11_0um', group_name_m+'refl_0_65um', group_name_m+target_param] + +param_idx_m = params_m.index(group_name_m + target_param) +param_idx_i = params_i.index(group_name_i + target_param) def is_missing(p_idx, tile): @@ -97,10 +98,10 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st all_files = glob.glob(path, recursive=True) data_files = [f for f in all_files if f not in keep_out] - label_valid_tiles = [] - label_train_tiles = [] - data_valid_tiles = [] - data_train_tiles = [] + valid_tiles_i = [] + train_tiles_i = [] + valid_tiles_m = [] + train_tiles_m = [] f_cnt = 0 num_files = len(data_files) @@ -118,8 +119,8 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st continue try: - num_not_missing = run(h5f, data_params, data_train_tiles, data_valid_tiles, - label_params, label_train_tiles, label_valid_tiles, + num_not_missing = run(h5f, params_m, train_tiles_m, valid_tiles_m, + params_i, train_tiles_i, valid_tiles_i, num_keep_x_tiles=num_keep_x_tiles, tile_width=64, kernel_size=7, day_night=day_night) except Exception as e: print(e) @@ -131,30 +132,30 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st total_num_not_missing += num_not_missing - if len(data_train_tiles) == 0 and len(data_valid_tiles) == 0: + if len(train_tiles_m) == 0 and len(valid_tiles_m) == 0: continue if (f_cnt % 5) == 0: num_valid_samples = 0 - if len(data_valid_tiles) > 0: - label_valid = np.stack(label_valid_tiles) - data_valid = np.stack(data_valid_tiles) - np.save(out_directory + 'data_valid_' + str(cnt), data_valid) - np.save(out_directory + 'label_valid_' + str(cnt), label_valid) - num_valid_samples = data_valid.shape[0] + if len(valid_tiles_m) > 0: + valid_i = np.stack(valid_tiles_i) + valid_m = np.stack(valid_tiles_m) + np.save(out_directory + 'valid_mres_' + str(cnt), valid_m) + np.save(out_directory + 'valid_ires_' + str(cnt), valid_i) + num_valid_samples = valid_m.shape[0] num_train_samples = 0 - if len(data_train_tiles) > 0: - label_train = np.stack(label_train_tiles) - data_train = np.stack(data_train_tiles) - np.save(out_directory + 'label_train_' + str(cnt), label_train) - np.save(out_directory + 'data_train_' + str(cnt), data_train) - num_train_samples = data_train.shape[0] - - label_valid_tiles = [] - label_train_tiles = [] - data_valid_tiles = [] - data_train_tiles = [] + if len(train_tiles_m) > 0: + train_i = np.stack(train_tiles_i) + train_m = np.stack(train_tiles_m) + np.save(out_directory + 'train_ires_' + str(cnt), train_i) + np.save(out_directory + 'train_mres' + str(cnt), train_m) + num_train_samples = train_m.shape[0] + + valid_tiles_i = [] + train_tiles_i = [] + valid_tiles_m = [] + train_tiles_m = [] print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100)) total_num_train_samples += num_train_samples @@ -170,12 +171,12 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st # tile_width: Must be even! # kernel_size: Must be odd! -def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lbl_valid_tiles, +def run(h5f, params_m, train_tiles_m, valid_tiles_m, params_i, train_tiles_i, valid_tiles_i, num_keep_x_tiles=8, tile_width=64, kernel_size=3, day_night='ANY'): border = int((kernel_size - 1)/2) + 1 # Need to add for interpolation with no edge effects - param_name = param_s[0] + param_name = params_m[0] num_lines = h5f[param_name].shape[0] num_pixels = h5f[param_name].shape[1] # Must be even @@ -184,24 +185,24 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb solzen = get_grid_values(h5f, solzen_name, 0, 0, None, num_lines, num_pixels) grd_s = [] - for param in param_s: + for param in params_m: try: grd = get_grid_values(h5f, param, 0, 0, None, num_lines, num_pixels) grd_s.append(grd) except Exception as e: print(e) return - data = np.stack(grd_s) + data_m = np.stack(grd_s) grd_s = [] - for param in lbl_param_s: + for param in params_i: try: grd = get_grid_values(h5f, param, 0, 0, None, num_lines*2, num_pixels*2) grd_s.append(grd) except Exception as e: print(e) return - label = np.stack(grd_s) + data_i = np.stack(grd_s) tile_width += 2 * border @@ -212,8 +213,8 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb num_y_tiles = int(num_lines / tile_width) - 1 - data_tiles = [] - lbl_tiles = [] + data_tiles_m = [] + data_tiles_i = [] num_not_missing = 0 for j in range(num_y_tiles): @@ -229,26 +230,27 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]): continue - nda = data[:, j_a:j_b, i_a:i_b] - nda_lbl = label[:, j_a*2:j_b*2, i_a*2:i_b*2] - if is_missing(param_idx_i, nda_lbl): + nda_m = data_m[:, j_a:j_b, i_a:i_b] + nda_i = data_i[:, j_a*2:j_b*2, i_a*2:i_b*2] + if is_missing(param_idx_i, nda_i): continue num_not_missing += 1 - nda_lbl = keep_tile(param_idx_i, nda_lbl) - if nda_lbl is not None: - data_tiles.append(nda) - lbl_tiles.append(nda_lbl) + nda_i = keep_tile(param_idx_i, nda_i) + if nda_i is not None: + data_tiles_m.append(nda_m) + data_tiles_i.append(nda_i) - num_tiles = len(lbl_tiles) + num_tiles = len(data_tiles_i) num_valid = int(num_tiles * 0.10) num_train = num_tiles - num_valid for k in range(num_train): - train_tiles.append(data_tiles[k]) - lbl_train_tiles.append(lbl_tiles[k]) + train_tiles_m.append(data_tiles_m[k]) + train_tiles_i.append(data_tiles_i[k]) + for k in range(num_valid): - valid_tiles.append(data_tiles[num_train + k]) - lbl_valid_tiles.append(lbl_tiles[num_train + k]) + valid_tiles_m.append(data_tiles_m[num_train + k]) + valid_tiles_i.append(data_tiles_i[num_train + k]) return num_not_missing -- GitLab