diff --git a/modules/util/viirs_surfrad.py b/modules/util/viirs_surfrad.py index f80d3910f8136df4ccefceda01590a9e17e16e24..54902c7274a0683ed3ec102e8650b14bbd380083 100644 --- a/modules/util/viirs_surfrad.py +++ b/modules/util/viirs_surfrad.py @@ -78,7 +78,6 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st num_files = len(data_files) print('Start, number of files: ', num_files) - kept_cnt = 0 for idx, data_f in enumerate(data_files): # if idx % 4 == 0: # if we want to skip some files @@ -90,49 +89,51 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st continue try: - total, kept = run(h5f, data_params, data_train_tiles, data_valid_tiles, - label_params, label_train_tiles, label_valid_tiles, - num_keep_x_tiles=num_keep_x_tiles, tile_width=64, kernel_size=5, day_night=day_night) + run(h5f, data_params, data_train_tiles, data_valid_tiles, + label_params, label_train_tiles, label_valid_tiles, + num_keep_x_tiles=num_keep_x_tiles, tile_width=64, kernel_size=5, day_night=day_night) except Exception as e: print(e) h5f.close() continue - kept_cnt += kept - print(data_f, kept_cnt, int(100 * (kept/total))) + print(data_f) f_cnt += 1 h5f.close() - if len(data_train_tiles) == 0: + if len(data_train_tiles) == 0 and len(data_valid_tiles) == 0: continue - # if (f_cnt % 5) == 0: - # num_valid_samples = 0 - # if len(data_valid_tiles) > 0: - # label_valid = np.stack(label_valid_tiles) - # data_valid = np.stack(data_valid_tiles) - # np.save(out_directory + 'data_valid_' + str(cnt), data_valid) - # np.save(out_directory + 'label_valid_' + str(cnt), label_valid) - # num_valid_samples = data_valid.shape[0] - # - # label_train = np.stack(label_train_tiles) - # data_train = np.stack(data_train_tiles) - # np.save(out_directory + 'label_train_' + str(cnt), label_train) - # np.save(out_directory + 'data_train_' + str(cnt), data_train) - # num_train_samples = data_train.shape[0] - # - # label_valid_tiles = [] - # label_train_tiles = [] - # data_valid_tiles = [] - # data_train_tiles = [] - # - # print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100)) - # total_num_train_samples += num_train_samples - # total_num_valid_samples += num_valid_samples - # print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples) - # - # cnt += 1 - # - # print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples) + if (f_cnt % 5) == 0: + num_valid_samples = 0 + if len(data_valid_tiles) > 0: + label_valid = np.stack(label_valid_tiles) + data_valid = np.stack(data_valid_tiles) + #np.save(out_directory + 'data_valid_' + str(cnt), data_valid) + #np.save(out_directory + 'label_valid_' + str(cnt), label_valid) + num_valid_samples = data_valid.shape[0] + + num_train_samples = 0 + if len(data_train_tiles) > 0: + label_train = np.stack(label_train_tiles) + data_train = np.stack(data_train_tiles) + #np.save(out_directory + 'label_train_' + str(cnt), label_train) + #np.save(out_directory + 'data_train_' + str(cnt), data_train) + num_train_samples = data_train.shape[0] + + label_valid_tiles = [] + label_train_tiles = [] + data_valid_tiles = [] + data_train_tiles = [] + + print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100)) + total_num_train_samples += num_train_samples + total_num_valid_samples += num_valid_samples + print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples) + print('--------------------------------------------------') + + cnt += 1 + + print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples) # tile_width: Must be even! @@ -182,8 +183,6 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb num_y_valid = int(num_keep_y_tiles * 0.1) + 1 num_y_train = num_keep_y_tiles - num_y_valid - 1 - cnt_total = 0 - cnt_kept = 0 for j in range(num_y_train): j_a = j_start + j * j_skip j_b = j_a + tile_width @@ -192,8 +191,6 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb i_a = i_start + i * i_skip i_b = i_a + tile_width - cnt_total += 1 - if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]): continue elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]): @@ -210,7 +207,6 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb if nda_lbl is not None: train_tiles.append(nda) lbl_train_tiles.append(nda_lbl) - cnt_kept += 1 j_start = num_y_train * tile_width + 2*tile_width for j in range(num_y_valid): @@ -221,8 +217,6 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb i_a = i_start + i * i_skip i_b = i_a + tile_width - cnt_total += 1 - if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]): continue elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]): @@ -239,9 +233,6 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb if nda_lbl is not None: valid_tiles.append(nda) lbl_valid_tiles.append(nda_lbl) - cnt_kept += 1 - - return cnt_total, cnt_kept # def run_mean_std(directory):