diff --git a/modules/util/viirs_surfrad.py b/modules/util/viirs_surfrad.py index 97a24f4753dffa1f6041978dcf5efa0f80479a86..d158de282b725f03d26afc74570b45033220153a 100644 --- a/modules/util/viirs_surfrad.py +++ b/modules/util/viirs_surfrad.py @@ -25,40 +25,40 @@ def keep_tile(param, param_s, tile): grd_k = tile[k, ].copy() if target_param == 'cloud_probability': - grd_k = process_cld_prob_(grd_k) + grd_k, bflag = process_cld_prob_(grd_k) elif target_param == 'cld_opd_dcomp': - grd_k = process_cld_opd_(grd_k) + grd_k, bflag = process_cld_opd_(grd_k) if grd_k is not None: tile[k, ] = grd_k - return tile + return tile, bflag else: - return None + return None, bflag def process_cld_prob_(grd_k): keep = np.invert(np.isnan(grd_k)) num_keep = np.sum(keep) if num_keep / grd_k.size < 0.98: - return None + return None, True keep_clr = np.where(keep, grd_k < 0.20, False) frac_keep = np.sum(keep_clr)/num_keep - if not (0.38 < frac_keep < 0.62): - return None + if not (0.40 < frac_keep < 0.60): + return None, False grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaNs to 0 - return grd_k + return grd_k, False def process_cld_opd_(grd_k): keep = np.invert(np.isnan(grd_k)) num_keep = np.sum(keep) if num_keep / grd_k.size < 0.98: - return None + return None, True grd_k = np.where(np.invert(keep), 0, grd_k) keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False) if np.sum(keep)/num_keep < 0.50: - return None - return grd_k + return None, False + return grd_k, False def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', start=10): @@ -80,6 +80,8 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st num_files = len(data_files) print('Start, number of files: ', num_files) + total_num_not_missing = 0 + for idx, data_f in enumerate(data_files): # if idx % 4 == 0: # if we want to skip some files if True: @@ -90,9 +92,9 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st continue try: - run(h5f, data_params, data_train_tiles, data_valid_tiles, - label_params, label_train_tiles, label_valid_tiles, - num_keep_x_tiles=num_keep_x_tiles, tile_width=64, kernel_size=7, day_night=day_night) + num_not_missing = run(h5f, data_params, data_train_tiles, data_valid_tiles, + label_params, label_train_tiles, label_valid_tiles, + num_keep_x_tiles=num_keep_x_tiles, tile_width=64, kernel_size=7, day_night=day_night) except Exception as e: print(e) h5f.close() @@ -129,7 +131,9 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100)) total_num_train_samples += num_train_samples total_num_valid_samples += num_valid_samples - print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples) + total_num_not_missing += num_not_missing + print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ', total_num_train_samples, + total_num_valid_samples, total_num_not_missing) print('--------------------------------------------------') cnt += 1 @@ -179,12 +183,13 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb i_start = int(num_pixels / 2) - int((num_keep_x_tiles * tile_width) / 2) j_start = 0 - num_keep_y_tiles = int(num_lines / tile_width) - 3 + num_y_tiles = int(num_lines / tile_width) - 1 - num_y_valid = int(num_keep_y_tiles * 0.15) + 1 - num_y_train = num_keep_y_tiles - num_y_valid - 1 + data_tiles = [] + lbl_tiles = [] + num_not_missing = 0 - for j in range(num_y_train): + for j in range(num_y_tiles): j_a = j_start + j * j_skip j_b = j_a + tile_width @@ -199,31 +204,23 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb nda = data[:, j_a:j_b, i_a:i_b] nda_lbl = label[:, j_a*2:j_b*2, i_a*2:i_b*2] - nda_lbl = keep_tile(group_name_i+target_param, lbl_param_s, nda_lbl) + nda_lbl, missing_flag = keep_tile(group_name_i+target_param, lbl_param_s, nda_lbl) + if not missing_flag: + num_not_missing += 1 if nda_lbl is not None: - train_tiles.append(nda) - lbl_train_tiles.append(nda_lbl) - - j_start = num_y_train * tile_width + 2*tile_width - for j in range(num_y_valid): - j_a = j_start + j * j_skip - j_b = j_a + tile_width + data_tiles.append(nda) + lbl_tiles.append(nda_lbl) - for i in range(num_keep_x_tiles): - i_a = i_start + i * i_skip - i_b = i_a + tile_width + num_tiles = len(lbl_tiles) + num_valid = int(num_tiles * 0.10) + num_train = num_tiles - num_valid - if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]): - continue - elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]): - continue - - nda = data[:, j_a:j_b, i_a:i_b] - nda_lbl = label[:, j_a * 2:j_b * 2, i_a * 2:i_b * 2] - nda_lbl = keep_tile(group_name_i+target_param, lbl_param_s, nda_lbl) - - if nda_lbl is not None: - valid_tiles.append(nda) - lbl_valid_tiles.append(nda_lbl) + for k in range(num_train): + train_tiles.append(data_tiles[k]) + lbl_train_tiles.append(lbl_tiles[k]) + for k in range(num_valid): + valid_tiles.append(data_tiles[num_train + k]) + lbl_valid_tiles.append(lbl_tiles[num_train + k]) + return num_not_missing