import numpy as np import h5py from util.util import get_grid_values, get_grid_values_all, is_night, is_day, compute_lwc_iwc, get_fill_attrs import glob import os from aeolus.datasource import CLAVRx_VIIRS from icing.moon_phase import * from pathlib import Path # --- CLAVRx Radiometric parameters and metadata ------------------------------------------------ l1b_ds_list = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'temp_13_3um_nom', 'temp_3_75um_nom', 'temp_6_2um_nom', 'temp_6_7um_nom', 'temp_7_3um_nom', 'temp_8_5um_nom', 'temp_9_7um_nom', 'refl_0_47um_nom', 'refl_0_65um_nom', 'refl_0_86um_nom', 'refl_1_38um_nom', 'refl_1_60um_nom'] l1b_ds_types = {ds: 'f4' for ds in l1b_ds_list} l1b_ds_fill = {l1b_ds_list[i]: -32767 for i in range(10)} l1b_ds_fill.update({l1b_ds_list[i+10]: -32768 for i in range(5)}) l1b_ds_range = {ds: 'actual_range' for ds in l1b_ds_list} # --- CLAVRx L2 parameters and metadata ds_list = ['cld_height_acha', 'cld_geo_thick', 'cld_press_acha', 'sensor_zenith_angle', 'supercooled_prob_acha', 'supercooled_cloud_fraction', 'cld_temp_acha', 'cld_opd_acha', 'solar_zenith_angle', 'cld_reff_acha', 'cld_reff_dcomp', 'cld_reff_dcomp_1', 'cld_reff_dcomp_2', 'cld_reff_dcomp_3', 'cld_opd_dcomp', 'cld_opd_dcomp_1', 'cld_opd_dcomp_2', 'cld_opd_dcomp_3', 'cld_cwp_dcomp', 'iwc_dcomp', 'lwc_dcomp', 'cld_emiss_acha', 'conv_cloud_fraction', 'cloud_type', 'cloud_phase', 'cloud_mask'] ds_types = {ds_list[i]: 'f4' for i in range(23)} ds_types.update({ds_list[i+23]: 'i1' for i in range(3)}) ds_fill = {ds_list[i]: -32768 for i in range(23)} ds_fill.update({ds_list[i+23]: -128 for i in range(3)}) ds_range = {ds_list[i]: 'actual_range' for i in range(23)} ds_range.update({ds_list[i]: None for i in range(3)}) ds_types.update(l1b_ds_types) ds_fill.update(l1b_ds_fill) ds_range.update(l1b_ds_range) ds_types.update({'temp_3_9um_nom': 'f4'}) ds_types.update({'cloud_fraction': 'f4'}) ds_fill.update({'temp_3_9um_nom': -32767}) ds_fill.update({'cloud_fraction': -32768}) ds_range.update({'temp_3_9um_nom': 'actual_range'}) ds_range.update({'cloud_fraction': 'actual_range'}) emis_params = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'temp_13_3um_nom', 'temp_3_9um_nom', 'temp_6_7um_nom'] # refl_params = ['refl_0_47um_nom', 'refl_0_65um_nom', 'refl_0_86um_nom', 'refl_1_38um_nom', 'refl_1_60um_nom'] # data_params = refl_params + emis_params # data_params = emis_params l2_params = ['temp_11_0um_nom', 'temp_12_0um_nom', 'cloud_fraction'] # l2_params = ['temp_11_0um_nom', 'temp_12_0um_nom', 'refl_0_65um_nom', 'cloud_fraction'] label_params = l2_params data_params = l2_params def keep_tile(param_s, tile): k = param_s.index('cloud_fraction') grd_k = tile[k, ].flatten() keep = np.invert(np.isnan(grd_k)) total = np.sum(keep) if total == 0: return False np.where(np.invert(keep), 0, grd_k) keep = np.where(keep, np.invert(np.logical_and(0.05 < grd_k, grd_k < 0.95)), False) if np.sum(keep)/total > 0.7: return True else: return False def run_all(directory, out_directory, day_night='ANY'): cnt = 10 total_num_train_samples = 0 total_num_valid_samples = 0 # pattern = 'clavrx*highres*.nc' # pattern = 'VNP02MOD*.uwssec.nc' pattern = 'clavrx_snpp_viirs*.uwssec*.nc' path = directory + '**' + '/' + pattern data_files = glob.glob(path, recursive=True) label_valid_tiles = [] label_train_tiles = [] data_valid_tiles = [] data_train_tiles = [] f_cnt = 0 for idx, data_f in enumerate(data_files): # if idx % 4 == 0: # if we want to skip some files if True: # w_o_ext, ext = os.path.splitext(data_f) # pname, fname = os.path.split(data_f) # toks = fname.split('.') # label_f = pname + '/' + 'clavrx_VNP02MOD.' + toks[1]+'.'+toks[2]+'.'+toks[3]+'.'+toks[4]+'.'+'uwssec.highres.nc.level2.nc' # if not os.path.exists(label_f): # continue try: data_h5f = h5py.File(data_f, 'r') except: print('cant open file: ', data_f) continue # try: # label_h5f = h5py.File(label_f, 'r') # except: # print('cant open file: ', label_f) # data_h5f.close() # continue data_tiles = [] label_tiles = [] try: run(data_h5f, data_params, data_tiles, tile_width=128, kernel_size=7, day_night=day_night) # run(data_h5f, label_params, label_tiles, tile_width=128, kernel_size=7, day_night=day_night) except Exception as e: print(e) data_h5f.close() #label_h5f.close() continue data_h5f.close() #label_h5f.close() # if len(data_tiles) == 0 or len(label_tiles) == 0: # continue # if len(data_tiles) != len(label_tiles): # print('weirdness: ', data_f) # continue if len(data_tiles) == 0: continue num = len(data_tiles) n_vld = int(num * 0.1) # [label_valid_tiles.append(label_tiles[k]) for k in range(n_vld)] # [label_train_tiles.append(label_tiles[k]) for k in range(n_vld, num)] [data_valid_tiles.append(data_tiles[k]) for k in range(n_vld)] [data_train_tiles.append(data_tiles[k]) for k in range(n_vld, num)] f_cnt += 1 if f_cnt == 5: f_cnt = 0 # label_valid = np.stack(label_valid_tiles) # label_train = np.stack(label_train_tiles) data_valid = np.stack(data_valid_tiles) data_train = np.stack(data_train_tiles) np.save(out_directory+'data_train_' + str(cnt), data_train) np.save(out_directory+'data_valid_' + str(cnt), data_valid) # np.save(out_directory+'label_train_' + str(cnt), label_train) # np.save(out_directory+'label_valid_' + str(cnt), label_valid) label_valid_tiles = [] label_train_tiles = [] data_valid_tiles = [] data_train_tiles = [] num_train_samples = data_train.shape[0] num_valid_samples = data_valid.shape[0] print(' file # done: ', cnt) print('num_train_samples, num_valid_samples: ', num_train_samples, num_valid_samples) total_num_train_samples += num_train_samples total_num_valid_samples += num_valid_samples cnt += 1 print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples) def run(data_h5f, param_s, tiles, tile_width=64, kernel_size=9, day_night='ANY'): border = int((kernel_size - 1)/2) param_name = param_s[0] num_lines = data_h5f[param_name].shape[0] num_pixels = data_h5f[param_name].shape[1] solzen = get_grid_values(data_h5f, 'solar_zenith_angle', 0, 0, None, num_lines, num_pixels) grd_s = [] for param in param_s: fill_value, fill_value_name = get_fill_attrs(param) try: grd = get_grid_values(data_h5f, param, 0, 0, None, num_lines, num_pixels, fill_value_name=fill_value_name, fill_value=fill_value) grd_s.append(grd) except Exception as e: print(e) return data = np.stack(grd_s) tile_width += 2 * border num_keep_x_tiles = 8 i_skip = tile_width j_skip = tile_width i_start = int(num_pixels / 2) - int((num_keep_x_tiles * tile_width) / 2) num_keep_y_tiles = int(num_lines / tile_width) - 1 for j in range(num_keep_y_tiles): j_a = j * j_skip j_b = j_a + tile_width for i in range(num_keep_x_tiles): i_a = i_start + i * i_skip i_b = i_a + tile_width if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]): continue elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]): continue nda = data[:, j_a:j_b, i_a:i_b] if not keep_tile(param_s, nda): continue tiles.append(nda) def scan(directory): data_src = CLAVRx_VIIRS(directory) files = data_src.flist for idx, file in enumerate(files): h5f = h5py.File(file, 'r') ts = data_src.ftimes[idx][0] try: solzen = get_grid_values_all(h5f, 'solar_zenith_angle') except Exception as e: # print(e) h5f.close() continue # if is_day(solzen) and moon_phase(ts): if is_night(solzen) and moon_phase(ts): print(file) h5f.close() def scan_for_location(txt_file, lon_range=[111.0, 130.0], lat_range=[14.0, 32.0]): with open(txt_file) as file: for idx, fpath in enumerate(file): fpath = fpath.strip() h5f = h5py.File(fpath, 'r') try: lon_s = get_grid_values_all(h5f, 'longitude', stride=4) lat_s = get_grid_values_all(h5f, 'latitude', stride=4) c_lon, c_lat = lon_s[406, 400], lat_s[406, 400] if (lon_range[0] < c_lon < lon_range[1]) and (lat_range[0] < c_lat < lat_range[1]): print(fpath) except Exception as e: # print(e) h5f.close() continue def test_nlcomp(file): h5f = h5py.File(file, 'r') cld_phs = get_grid_values_all(h5f, 'cloud_phase', scale_factor_name=None, range_name=None) keep_0 = np.invert(np.isnan(cld_phs)) reff = get_grid_values_all(h5f, 'cld_reff_nlcomp') keep_1 = np.invert(np.isnan(reff)) opd = get_grid_values_all(h5f, 'cld_opd_nlcomp') keep_2 = np.invert(np.isnan(opd)) cld_dz = get_grid_values_all(h5f, 'cld_geo_thick') keep_3 = np.logical_and(np.invert(np.isnan(cld_dz)), cld_dz > 5.0) keep = keep_0 & keep_1 & keep_2 & keep_3 cld_phs = cld_phs[keep] reff = reff[keep] opd = opd[keep] cld_dz = cld_dz[keep] lwc_c, iwc_c = compute_lwc_iwc(cld_phs, reff, opd, cld_dz) return lwc_c, iwc_c # def run_mean_std(directory): # # data_dct = {name: [] for name in mod_res_params} # mean_dct = {name: 0 for name in mod_res_params} # std_dct = {name: 0 for name in mod_res_params} # # for p in os.scandir(directory): # if not p.is_dir(): # continue # mod_files = glob.glob(directory+p.name+'/'+'VNP02MOD*.uwssec.nc') # # for idx, mfile in enumerate(mod_files): # if idx % 8 == 0: # h5f = h5py.File(mfile, 'r') # for param in mod_res_params: # name = 'observation_data/'+param # gvals = get_grid_values_all(h5f, name, range_name=None, stride=10) # data_dct[param].append(gvals.flatten()) # print(mfile) # h5f.close() # # for param in mod_res_params: # data = data_dct[param] # data = np.concatenate(data) # # mean_dct[param] = np.nanmean(data) # std_dct[param] = np.nanstd(data)