import numpy as np import h5py from util.util import get_grid_values, get_grid_values_all, is_night import glob import os from pathlib import Path emis_params = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'temp_13_3um_nom', 'temp_3_75um_nom', 'temp_6_7um_nom', 'temp_6_2um_nom', 'temp_7_3um_nom', 'temp_8_5um_nom', 'temp_9_7um_nom'] #refl_params = ['refl_0_47um_nom', 'refl_0_65um_nom', 'refl_0_86um_nom', 'refl_1_38um_nom', 'refl_1_60um_nom'] # data_params = refl_params + emis_params data_params = emis_params l2_params = ['cloud_fraction', 'cld_temp_acha', 'cld_press_acha', 'cld_opd_acha', 'cld_reff_acha'] label_params = l2_params # data_params = ['cloud_fraction'] # label_params = ['cloud_fraction'] # data_params = ['observation_data/M15'] # label_params = ['observation_data/M15_highres'] def run_all(directory, out_directory): num_train_samples, num_valid_samples = 0, 0 cnt = 10 for p in os.scandir(directory): if not p.is_dir(): continue data_files = glob.glob(directory + p.name+'/'+'clavrx_snpp_viirs*.uwssec*.h5') # data_files = glob.glob(directory + p.name + '/' + 'VNP02MOD*.uwssec.nc') label_valid_tiles = [] label_train_tiles = [] data_valid_tiles = [] data_train_tiles = [] f_cnt = 0 for idx, data_f in enumerate(data_files): # if idx % 8 == 0: # if we want to skip some files if True: w_o_ext, ext = os.path.splitext(data_f) label_f = data_f # label_f = w_o_ext+'.highres'+ext # label_f = label_f.replace('snpp_viirs', 'VNP02MOD') if not os.path.exists(label_f): continue try: data_h5f = h5py.File(data_f, 'r') except: print('cant open file: ', data_f) continue try: label_h5f = h5py.File(label_f, 'r') except: print('cant open file: ', label_f) data_h5f.close() continue data_tiles = [] label_tiles = [] try: run(data_h5f, label_h5f, data_tiles, label_tiles, mod_tile_width=16, border=7) except Exception as e: print(e) data_h5f.close() label_h5f.close() continue data_h5f.close() label_h5f.close() if len(data_tiles) == 0 or len(label_tiles) == 0: continue if len(data_tiles) != len(label_tiles): print('weirdness: ', data_f) continue num = len(data_tiles) [label_valid_tiles.append(label_tiles[k]) for k in range(4)] [label_train_tiles.append(label_tiles[k]) for k in range(4, num)] [data_valid_tiles.append(data_tiles[k]) for k in range(4)] [data_train_tiles.append(data_tiles[k]) for k in range(4, num)] # [label_valid_tiles.append(label_tiles[k]) for k in range(2)] # [label_train_tiles.append(label_tiles[k]) for k in range(2, num)] # [data_valid_tiles.append(data_tiles[k]) for k in range(2)] # [data_train_tiles.append(data_tiles[k]) for k in range(2, num)] if f_cnt == 40: cnt += 1 label_valid = np.stack(label_valid_tiles) label_train = np.stack(label_train_tiles) data_valid = np.stack(data_valid_tiles) data_train = np.stack(data_train_tiles) np.save(out_directory+'data_train_' + str(cnt), data_train) np.save(out_directory+'data_valid_' + str(cnt), data_valid) np.save(out_directory+'label_train_' + str(cnt), label_train) np.save(out_directory+'label_valid_' + str(cnt), label_valid) label_valid_tiles = [] label_train_tiles = [] data_valid_tiles = [] data_train_tiles = [] f_cnt = 0 else: f_cnt += 1 if len(label_train_tiles) == 0 or len(data_train_tiles) == 0: continue if len(label_train_tiles) != len(data_train_tiles): print('weirdness') continue label_valid = np.stack(label_valid_tiles) label_train = np.stack(label_train_tiles) data_valid = np.stack(data_valid_tiles) data_train = np.stack(data_train_tiles) cnt += 1 np.save(out_directory+'data_train_' + str(cnt), data_train) np.save(out_directory+'data_valid_' + str(cnt), data_valid) np.save(out_directory+'label_train_' + str(cnt), label_train) np.save(out_directory+'label_valid_' + str(cnt), label_valid) num_train_samples += data_train.shape[0] num_valid_samples += data_valid.shape[0] print('num_train_samples, num_valid_samples: ', num_train_samples, num_valid_samples) def run(data_h5f, label_h5f, data_tiles, label_tiles, mod_tile_width=64, border=9): if label_h5f is None: label_h5f = data_h5f l1b_param_name = data_params[0] l2_param_name = label_params[0] mod_num_lines = data_h5f[l1b_param_name].shape[0] mod_num_pixels = data_h5f[l1b_param_name].shape[1] img_num_lines = label_h5f[l2_param_name].shape[0] img_num_pixels = label_h5f[l2_param_name].shape[1] factor = int(img_num_pixels / mod_num_pixels) img_tile_width = mod_tile_width * factor # mod_num_y_tiles = int(mod_num_lines / mod_tile_width) # mod_num_x_tiles = int(mod_num_pixels / mod_tile_width) l1b_grd_s = [] l2_grd_s = [] for param in data_params: try: grd = get_grid_values(data_h5f, param, 0, 0, None, mod_num_lines, mod_num_pixels, range_name=None) l1b_grd_s.append(grd) except Exception as e: print(e) return for param in label_params: try: grd = get_grid_values(label_h5f, param, 0, 0, None, img_num_lines, img_num_pixels, range_name=None) l2_grd_s.append(grd) except Exception as e: print(e) return mod_data = np.stack(l1b_grd_s) img_data = np.stack(l2_grd_s) #num_keep_x_tiles = 3 num_keep_x_tiles = 1 i_skip = 3 * mod_tile_width #i_start = int(mod_num_pixels / 2) - int((num_keep_x_tiles * 3 * mod_tile_width) / 2) i_start = int(mod_num_pixels / 2) - int((mod_tile_width) / 2) #num_keep_y_tiles = 16 num_keep_y_tiles = 48 j_skip = 3 * mod_tile_width for j in range(num_keep_y_tiles): j_c = j * j_skip j_m = j_c + border j_i = j_m * factor for i in range(num_keep_x_tiles): i_c = i * i_skip + i_start i_m = i_c + border i_i = i_m * factor j_stop = j_m + mod_tile_width + border if j_stop > mod_num_lines - 1: continue i_stop = i_m + mod_tile_width + border if i_stop > mod_num_pixels - 1: continue nda = mod_data[:, j_m-border:j_stop, i_m-border:i_stop] data_tiles.append(nda) nda = img_data[:, j_i:j_i + img_tile_width, i_i:i_i + img_tile_width] label_tiles.append(nda) def scan(directory): for p in os.scandir(directory): if not p.is_dir(): continue files = glob.glob(directory + p.name+'/'+'clavrx_snpp_viirs*.uwssec*.h5') num_files = len(files) num_keep = 0 for idx, file in enumerate(files): h5f = h5py.File(file, 'r') try: solzen = get_grid_values_all(h5f, 'solar_zenith_angle') except Exception as e: # print(e) h5f.close() continue if is_night(solzen): num_keep += 1 print(file) h5f.close() print(num_files, num_keep) # def run_mean_std(directory): # # data_dct = {name: [] for name in mod_res_params} # mean_dct = {name: 0 for name in mod_res_params} # std_dct = {name: 0 for name in mod_res_params} # # for p in os.scandir(directory): # if not p.is_dir(): # continue # mod_files = glob.glob(directory+p.name+'/'+'VNP02MOD*.uwssec.nc') # # for idx, mfile in enumerate(mod_files): # if idx % 8 == 0: # h5f = h5py.File(mfile, 'r') # for param in mod_res_params: # name = 'observation_data/'+param # gvals = get_grid_values_all(h5f, name, range_name=None, stride=10) # data_dct[param].append(gvals.flatten()) # print(mfile) # h5f.close() # # for param in mod_res_params: # data = data_dct[param] # data = np.concatenate(data) # # mean_dct[param] = np.nanmean(data) # std_dct[param] = np.nanstd(data)