diff --git a/modules/util/abi_surfrad.py b/modules/util/abi_surfrad.py index 20339e08bed36bd10c123bbc1f03e19af1bea730..94dae3438aecd54b900a0a4df788ebeec07ae983 100644 --- a/modules/util/abi_surfrad.py +++ b/modules/util/abi_surfrad.py @@ -3,9 +3,9 @@ import h5py from util.util import get_grid_values, is_day import glob -target_param = 'cloud_probability' +# target_param = 'cloud_probability' # target_param = 'cld_opd_dcomp' -# target_param = 'cld_opd_dcomp_1' +target_param = 'cld_opd_dcomp_1' # target_param = 'cld_opd_dcomp_2' # target_param = 'cld_opd_dcomp_3' @@ -98,6 +98,9 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st total_num_not_missing = 0 num_skip = 2 + param_train_hist = np.zeros([20], dtype=np.int64) + param_valid_hist = np.zeros([20], dtype=np.int64) + for idx, data_f in enumerate(valid_files): if idx % num_skip == 0: # if we want to skip some files try: @@ -133,6 +136,8 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st np.save(out_directory + 'valid_ires_' + str(cnt), valid_i) num_valid_samples = valid_m.shape[0] + param_valid_hist += np.histogram(valid_m, bins=20, range=[0.0, 160.0])[0] + data_tiles_i = [] data_tiles_m = [] @@ -151,8 +156,10 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st np.save(out_directory + 'valid_mres_' + str(cnt), valid_m) np.save(out_directory + 'valid_ires_' + str(cnt), valid_i) num_valid_samples = valid_m.shape[0] + param_valid_hist += np.histogram(valid_m, bins=20, range=[0.0, 160.0])[0] total_num_valid_samples += num_valid_samples print('total_num_valid_samples, total_num_not_missing: ', total_num_valid_samples, total_num_not_missing) + print(param_valid_hist) print('--------------------------------------------------') print('----------------------------------------------------------------') @@ -198,6 +205,8 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st np.save(out_directory + 'train_mres_' + str(cnt), train_m) num_train_samples = train_m.shape[0] + param_train_hist += np.histogram(train_m, bins=20, range=[0.0, 160.0])[0] + data_tiles_i = [] data_tiles_m = [] @@ -216,8 +225,10 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st np.save(out_directory + 'train_ires_' + str(cnt), train_i) np.save(out_directory + 'train_mres_' + str(cnt), train_m) num_train_samples = train_m.shape[0] + param_train_hist += np.histogram(train_m, bins=20, range=[0.0, 160.0])[0] total_num_train_samples += num_train_samples print('total_num_train_samples, total_num_not_missing: ', total_num_train_samples, total_num_not_missing) + print(param_train_hist) print('--------------------------------------------------') print('*** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)