From 505190054ed5dcc0bcb17461933adf8d24144655 Mon Sep 17 00:00:00 2001 From: tomrink <rink@ssec.wisc.edu> Date: Thu, 6 Oct 2022 12:13:06 -0500 Subject: [PATCH] snapshot... --- modules/util/viirs_l1b_l2.py | 184 +++++++++++++++++------------------ 1 file changed, 91 insertions(+), 93 deletions(-) diff --git a/modules/util/viirs_l1b_l2.py b/modules/util/viirs_l1b_l2.py index 8804f92e..a9b08dd3 100644 --- a/modules/util/viirs_l1b_l2.py +++ b/modules/util/viirs_l1b_l2.py @@ -80,103 +80,101 @@ def run_all(directory, out_directory, day_night='ANY'): total_num_train_samples = 0 total_num_valid_samples = 0 - for p in os.scandir(directory): - if not p.is_dir(): - continue - print(p.name) - - # data_files = glob.glob(directory + p.name+'/'+'clavrx*highres*.nc') - data_files = glob.glob(directory + p.name+'/'+'clavrx_snpp_viirs*.uwssec*.nc') - # data_files = glob.glob(directory + p.name + '/' + 'VNP02MOD*.uwssec.nc') - - label_valid_tiles = [] - label_train_tiles = [] - data_valid_tiles = [] - data_train_tiles = [] - f_cnt = 0 - - for idx, data_f in enumerate(data_files): - # if idx % 4 == 0: # if we want to skip some files - if True: - # w_o_ext, ext = os.path.splitext(data_f) - # pname, fname = os.path.split(data_f) - # toks = fname.split('.') - # label_f = pname + '/' + 'clavrx_VNP02MOD.' + toks[1]+'.'+toks[2]+'.'+toks[3]+'.'+toks[4]+'.'+'uwssec.highres.nc.level2.nc' - # if not os.path.exists(label_f): - # continue - - try: - data_h5f = h5py.File(data_f, 'r') - except: - print('cant open file: ', data_f) - continue - - # try: - # label_h5f = h5py.File(label_f, 'r') - # except: - # print('cant open file: ', label_f) - # data_h5f.close() - # continue - - data_tiles = [] - label_tiles = [] - - try: - run(data_h5f, data_params, data_tiles, tile_width=128, kernel_size=7, day_night=day_night) - # run(data_h5f, label_params, label_tiles, tile_width=128, kernel_size=7, day_night=day_night) - except Exception as e: - print(e) - data_h5f.close() - #label_h5f.close() - continue + # pattern = 'clavrx*highres*.nc' + # pattern = 'VNP02MOD*.uwssec.nc' + pattern = 'clavrx_snpp_viirs*.uwssec*.nc' + path = directory + '**' + '/' + pattern + + data_files = glob.glob(path, recursive=True) + + label_valid_tiles = [] + label_train_tiles = [] + data_valid_tiles = [] + data_train_tiles = [] + f_cnt = 0 + + for idx, data_f in enumerate(data_files): + # if idx % 4 == 0: # if we want to skip some files + if True: + # w_o_ext, ext = os.path.splitext(data_f) + # pname, fname = os.path.split(data_f) + # toks = fname.split('.') + # label_f = pname + '/' + 'clavrx_VNP02MOD.' + toks[1]+'.'+toks[2]+'.'+toks[3]+'.'+toks[4]+'.'+'uwssec.highres.nc.level2.nc' + # if not os.path.exists(label_f): + # continue + + try: + data_h5f = h5py.File(data_f, 'r') + except: + print('cant open file: ', data_f) + continue + # try: + # label_h5f = h5py.File(label_f, 'r') + # except: + # print('cant open file: ', label_f) + # data_h5f.close() + # continue + + data_tiles = [] + label_tiles = [] + + try: + run(data_h5f, data_params, data_tiles, tile_width=128, kernel_size=7, day_night=day_night) + # run(data_h5f, label_params, label_tiles, tile_width=128, kernel_size=7, day_night=day_night) + except Exception as e: + print(e) data_h5f.close() #label_h5f.close() + continue + + data_h5f.close() + #label_h5f.close() + + # if len(data_tiles) == 0 or len(label_tiles) == 0: + # continue + # if len(data_tiles) != len(label_tiles): + # print('weirdness: ', data_f) + # continue + + if len(data_tiles) == 0: + continue - # if len(data_tiles) == 0 or len(label_tiles) == 0: - # continue - # if len(data_tiles) != len(label_tiles): - # print('weirdness: ', data_f) - # continue - - if len(data_tiles) == 0: - continue - - num = len(data_tiles) - n_vld = int(num * 0.1) - - # [label_valid_tiles.append(label_tiles[k]) for k in range(n_vld)] - # [label_train_tiles.append(label_tiles[k]) for k in range(n_vld, num)] - [data_valid_tiles.append(data_tiles[k]) for k in range(n_vld)] - [data_train_tiles.append(data_tiles[k]) for k in range(n_vld, num)] - - f_cnt += 1 - if f_cnt == 5: - f_cnt = 0 - - # label_valid = np.stack(label_valid_tiles) - # label_train = np.stack(label_train_tiles) - data_valid = np.stack(data_valid_tiles) - data_train = np.stack(data_train_tiles) - - np.save(out_directory+'data_train_' + str(cnt), data_train) - np.save(out_directory+'data_valid_' + str(cnt), data_valid) - # np.save(out_directory+'label_train_' + str(cnt), label_train) - # np.save(out_directory+'label_valid_' + str(cnt), label_valid) - - label_valid_tiles = [] - label_train_tiles = [] - data_valid_tiles = [] - data_train_tiles = [] - - num_train_samples = data_train.shape[0] - num_valid_samples = data_valid.shape[0] - print(' file # done: ', cnt) - print('num_train_samples, num_valid_samples: ', num_train_samples, num_valid_samples) - total_num_train_samples += num_train_samples - total_num_valid_samples += num_valid_samples - - cnt += 1 + num = len(data_tiles) + n_vld = int(num * 0.1) + + # [label_valid_tiles.append(label_tiles[k]) for k in range(n_vld)] + # [label_train_tiles.append(label_tiles[k]) for k in range(n_vld, num)] + [data_valid_tiles.append(data_tiles[k]) for k in range(n_vld)] + [data_train_tiles.append(data_tiles[k]) for k in range(n_vld, num)] + + f_cnt += 1 + if f_cnt == 5: + f_cnt = 0 + + # label_valid = np.stack(label_valid_tiles) + # label_train = np.stack(label_train_tiles) + data_valid = np.stack(data_valid_tiles) + data_train = np.stack(data_train_tiles) + + np.save(out_directory+'data_train_' + str(cnt), data_train) + np.save(out_directory+'data_valid_' + str(cnt), data_valid) + # np.save(out_directory+'label_train_' + str(cnt), label_train) + # np.save(out_directory+'label_valid_' + str(cnt), label_valid) + + label_valid_tiles = [] + label_train_tiles = [] + data_valid_tiles = [] + data_train_tiles = [] + + num_train_samples = data_train.shape[0] + num_valid_samples = data_valid.shape[0] + print(' file # done: ', cnt) + print('num_train_samples, num_valid_samples: ', num_train_samples, num_valid_samples) + total_num_train_samples += num_train_samples + total_num_valid_samples += num_valid_samples + + cnt += 1 print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples) -- GitLab