From 6f04c860ca4c7a053cf3dcc776479cf7eaa7be51 Mon Sep 17 00:00:00 2001 From: tomrink <rink@ssec.wisc.edu> Date: Tue, 31 Jan 2023 13:09:03 -0600 Subject: [PATCH] initial commit... --- modules/util/viirs_surfrad.py | 365 ++++++++++++++++++++++++++++++++++ 1 file changed, 365 insertions(+) create mode 100644 modules/util/viirs_surfrad.py diff --git a/modules/util/viirs_surfrad.py b/modules/util/viirs_surfrad.py new file mode 100644 index 00000000..bdc2ff8b --- /dev/null +++ b/modules/util/viirs_surfrad.py @@ -0,0 +1,365 @@ +import numpy as np +import h5py +from util.util import get_grid_values, get_grid_values_all, is_night, is_day, compute_lwc_iwc, get_fill_attrs +import glob +import os +from aeolus.datasource import CLAVRx_VIIRS +from icing.moon_phase import * +from pathlib import Path + +# --- CLAVRx Radiometric parameters and metadata ------------------------------------------------ +l1b_ds_list = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'temp_13_3um_nom', 'temp_3_75um_nom', + 'temp_6_2um_nom', 'temp_6_7um_nom', 'temp_7_3um_nom', 'temp_8_5um_nom', 'temp_9_7um_nom', + 'refl_0_47um_nom', 'refl_0_65um_nom', 'refl_0_86um_nom', 'refl_1_38um_nom', 'refl_1_60um_nom'] + +l1b_ds_types = {ds: 'f4' for ds in l1b_ds_list} +l1b_ds_fill = {l1b_ds_list[i]: -32767 for i in range(10)} +l1b_ds_fill.update({l1b_ds_list[i+10]: -32768 for i in range(5)}) +l1b_ds_range = {ds: 'actual_range' for ds in l1b_ds_list} + +# --- CLAVRx L2 parameters and metadata +ds_list = ['cld_height_acha', 'cld_geo_thick', 'cld_press_acha', 'sensor_zenith_angle', 'supercooled_prob_acha', + 'supercooled_cloud_fraction', 'cld_temp_acha', 'cld_opd_acha', 'solar_zenith_angle', + 'cld_reff_acha', 'cld_reff_dcomp', 'cld_reff_dcomp_1', 'cld_reff_dcomp_2', 'cld_reff_dcomp_3', + 'cld_opd_dcomp', 'cld_opd_dcomp_1', 'cld_opd_dcomp_2', 'cld_opd_dcomp_3', 'cld_cwp_dcomp', 'iwc_dcomp', + 'lwc_dcomp', 'cld_emiss_acha', 'conv_cloud_fraction', 'cloud_type', 'cloud_phase', 'cloud_mask'] + +ds_types = {ds_list[i]: 'f4' for i in range(23)} +ds_types.update({ds_list[i+23]: 'i1' for i in range(3)}) +ds_fill = {ds_list[i]: -32768 for i in range(23)} +ds_fill.update({ds_list[i+23]: -128 for i in range(3)}) +ds_range = {ds_list[i]: 'actual_range' for i in range(23)} +ds_range.update({ds_list[i]: None for i in range(3)}) + +ds_types.update(l1b_ds_types) +ds_fill.update(l1b_ds_fill) +ds_range.update(l1b_ds_range) + +ds_types.update({'temp_3_9um_nom': 'f4'}) +ds_types.update({'cloud_fraction': 'f4'}) +ds_fill.update({'temp_3_9um_nom': -32767}) +ds_fill.update({'cloud_fraction': -32768}) +ds_range.update({'temp_3_9um_nom': 'actual_range'}) +ds_range.update({'cloud_fraction': 'actual_range'}) + + +emis_params = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'temp_13_3um_nom', 'temp_3_9um_nom', + 'temp_6_7um_nom'] +# refl_params = ['refl_0_47um_nom', 'refl_0_65um_nom', 'refl_0_86um_nom', 'refl_1_38um_nom', 'refl_1_60um_nom'] +# data_params = refl_params + emis_params +# data_params = emis_params + +# target_param = 'cloud_probability' +target_param = 'cld_opd_dcomp' + +group_name_i = 'super/' +group_name_m = 'orig/' + +solzen_name = group_name_m + 'solar_zenith' + +label_params = group_name_i+target_param +data_params = [group_name_m+'temp_11_0um', group_name_m+'temp_12_0um', group_name_m+'refl_0_65um', group_name_m+target_param] + + +def keep_tile(param_s, tile): + k = param_s.index(group_name_m + target_param) + grd_k = tile[k, ].copy() + + if target_param == 'cloud_probability': + grd_k = process_cld_prob_(grd_k) + elif target_param == 'cld_opd_dcomp': + grd_k = process_cld_opd_(grd_k) + + if grd_k is not None: + tile[k, ] = grd_k + return tile + else: + return None + + +def process_cld_prob_(grd_k): + keep = np.invert(np.isnan(grd_k)) + num_keep = np.sum(keep) + if num_keep / grd_k.size < 0.98: + return None + keep = np.where(keep, np.logical_and(0.05 < grd_k, grd_k < 0.95), False) + if np.sum(keep)/num_keep < 0.50: + return None + grd_k = np.where(np.invert(keep), 0, grd_k) + return grd_k + + +def process_cld_opd_(grd_k): + keep = np.invert(np.isnan(grd_k)) + num_keep = np.sum(keep) + if num_keep / grd_k.size < 0.98: + return None + grd_k = np.where(np.invert(keep), 0, grd_k) + keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False) + if np.sum(keep)/num_keep < 0.50: + return None + return grd_k + + +def run_all(directory, out_directory, day_night='ANY', start=10): + cnt = start + total_num_train_samples = 0 + total_num_valid_samples = 0 + num_keep_x_tiles = 8 + + pattern = 'clavrx_VNP02MOD*.highres.nc.level2.nc' + pattern = 'clavrx_*.nc' + path = directory + '**' + '/' + pattern + + data_files = glob.glob(path, recursive=True) + + label_valid_tiles = [] + label_train_tiles = [] + data_valid_tiles = [] + data_train_tiles = [] + f_cnt = 0 + + num_files = len(data_files) + + print('Start, number of files: ', num_files) + + for idx, data_f in enumerate(data_files): + # if idx % 4 == 0: # if we want to skip some files + if True: + try: + h5f = h5py.File(data_f, 'r') + except: + print('cant open file: ', data_f) + continue + + try: + run(h5f, data_params, data_train_tiles, data_valid_tiles, label_params, label_train_tiles, label_valid_tiles, + num_keep_x_tiles=num_keep_x_tiles, tile_width=64, kernel_size=3, day_night=day_night) + except Exception as e: + print(e) + h5f.close() + # label_h5f.close() + continue + print(data_f) + f_cnt += 1 + + h5f.close() + # label_h5f.close() + + if len(data_train_tiles) == 0: + continue + + if (f_cnt % 5) == 0: + num_valid_samples = 0 + if len(data_valid_tiles) > 0: + # label_valid = np.stack(label_valid_tiles) + data_valid = np.stack(data_valid_tiles) + np.save(out_directory + 'data_valid_' + str(cnt), data_valid) + # np.save(out_directory+'label_valid_' + str(cnt), label_valid) + num_valid_samples = data_valid.shape[0] + + # label_train = np.stack(label_train_tiles) + # np.save(out_directory+'label_train_' + str(cnt), label_train) + data_train = np.stack(data_train_tiles) + np.save(out_directory+'data_train_' + str(cnt), data_train) + num_train_samples = data_train.shape[0] + + label_valid_tiles = [] + label_train_tiles = [] + data_valid_tiles = [] + data_train_tiles = [] + + print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100)) + total_num_train_samples += num_train_samples + total_num_valid_samples += num_valid_samples + print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples) + + cnt += 1 + + print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples) + + +# tile_width: Must be even! +# kernel_size: Must be odd! +def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lbl_valid_tiles, + num_keep_x_tiles=8, tile_width=64, kernel_size=3, day_night='DAY'): + + border = int((kernel_size - 1)/2) + 1 + + param_name = param_s[0] + + num_lines = h5f[param_name].shape[0] + num_pixels = h5f[param_name].shape[1] # Must be even + + if day_night != 'BOTH': + solzen = get_grid_values(h5f, solzen_name, 0, 0, None, num_lines, num_pixels) + + grd_s = [] + for param in param_s: + fill_value, fill_value_name = get_fill_attrs(param) + try: + grd = get_grid_values(h5f, param, 0, 0, None, num_lines, num_pixels, fill_value_name=fill_value_name, fill_value=fill_value) + grd_s.append(grd) + except Exception as e: + print(e) + return + data = np.stack(grd_s) + + grd_s = [] + for param in lbl_param_s: + fill_value, fill_value_name = get_fill_attrs(param) + try: + grd = get_grid_values(h5f, param, 0, 0, None, num_lines*2, num_pixels*2, fill_value_name=fill_value_name, fill_value=fill_value) + grd_s.append(grd) + except Exception as e: + print(e) + return + label = np.stack(grd_s) + + tile_width += 2 * border + + i_skip = tile_width + j_skip = tile_width + i_start = int(num_pixels / 2) - int((num_keep_x_tiles * tile_width) / 2) + j_start = 0 + + num_keep_y_tiles = int(num_lines / tile_width) - 3 + + num_y_valid = int(num_keep_y_tiles * 0.1) + 1 + num_y_train = num_keep_y_tiles - num_y_valid - 1 + + for j in range(num_y_train): + j_a = j_start + j * j_skip + j_b = j_a + tile_width + + for i in range(num_keep_x_tiles): + i_a = i_start + i * i_skip + i_b = i_a + tile_width + + if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]): + continue + elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]): + continue + + nda = data[:, j_a:j_b, i_a:i_b] + nda_lbl = label[:, j_a*2:j_b*2, i_a*2:i_b*2] + nda = keep_tile(param_s, nda) + if nda is not None: + train_tiles.append(nda) + lbl_train_tiles.append(nda_lbl) + + j_start = num_y_train * tile_width + 2*tile_width + for j in range(num_y_valid): + j_a = j_start + j * j_skip + j_b = j_a + tile_width + + for i in range(num_keep_x_tiles): + i_a = i_start + i * i_skip + i_b = i_a + tile_width + + if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]): + continue + elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]): + continue + + nda = data[:, j_a:j_b, i_a:i_b] + nda_lbl = label[:, j_a * 2:j_b * 2, i_a * 2:i_b * 2] + nda = keep_tile(param_s, nda) + if nda is not None: + valid_tiles.append(nda) + lbl_valid_tiles.append(nda_lbl) + + +def scan(directory): + + data_src = CLAVRx_VIIRS(directory) + files = data_src.flist + + for idx, file in enumerate(files): + h5f = h5py.File(file, 'r') + ts = data_src.ftimes[idx][0] + try: + solzen = get_grid_values_all(h5f, 'solar_zenith_angle') + except Exception as e: + # print(e) + h5f.close() + continue + + # if is_day(solzen) and moon_phase(ts): + if is_night(solzen) and moon_phase(ts): + print(file) + h5f.close() + + +def scan_for_location(txt_file, lon_range=[111.0, 130.0], lat_range=[14.0, 32.0]): + with open(txt_file) as file: + for idx, fpath in enumerate(file): + fpath = fpath.strip() + h5f = h5py.File(fpath, 'r') + try: + lon_s = get_grid_values_all(h5f, 'longitude', stride=4) + lat_s = get_grid_values_all(h5f, 'latitude', stride=4) + c_lon, c_lat = lon_s[406, 400], lat_s[406, 400] + if (lon_range[0] < c_lon < lon_range[1]) and (lat_range[0] < c_lat < lat_range[1]): + print(fpath) + + except Exception as e: + # print(e) + h5f.close() + continue + + +def test_nlcomp(file): + h5f = h5py.File(file, 'r') + + cld_phs = get_grid_values_all(h5f, 'cloud_phase', scale_factor_name=None, range_name=None) + keep_0 = np.invert(np.isnan(cld_phs)) + + reff = get_grid_values_all(h5f, 'cld_reff_nlcomp') + keep_1 = np.invert(np.isnan(reff)) + + opd = get_grid_values_all(h5f, 'cld_opd_nlcomp') + keep_2 = np.invert(np.isnan(opd)) + + cld_dz = get_grid_values_all(h5f, 'cld_geo_thick') + keep_3 = np.logical_and(np.invert(np.isnan(cld_dz)), cld_dz > 5.0) + + keep = keep_0 & keep_1 & keep_2 & keep_3 + + cld_phs = cld_phs[keep] + reff = reff[keep] + opd = opd[keep] + cld_dz = cld_dz[keep] + + lwc_c, iwc_c = compute_lwc_iwc(cld_phs, reff, opd, cld_dz) + + return lwc_c, iwc_c + + +# def run_mean_std(directory): +# +# data_dct = {name: [] for name in mod_res_params} +# mean_dct = {name: 0 for name in mod_res_params} +# std_dct = {name: 0 for name in mod_res_params} +# +# for p in os.scandir(directory): +# if not p.is_dir(): +# continue +# mod_files = glob.glob(directory+p.name+'/'+'VNP02MOD*.uwssec.nc') +# +# for idx, mfile in enumerate(mod_files): +# if idx % 8 == 0: +# h5f = h5py.File(mfile, 'r') +# for param in mod_res_params: +# name = 'observation_data/'+param +# gvals = get_grid_values_all(h5f, name, range_name=None, stride=10) +# data_dct[param].append(gvals.flatten()) +# print(mfile) +# h5f.close() +# +# for param in mod_res_params: +# data = data_dct[param] +# data = np.concatenate(data) +# +# mean_dct[param] = np.nanmean(data) +# std_dct[param] = np.nanstd(data) -- GitLab