From 910f1676e657e52fcccdb1858d40b7b8b4751113 Mon Sep 17 00:00:00 2001 From: tomrink <rink@ssec.wisc.edu> Date: Wed, 1 Feb 2023 13:51:23 -0600 Subject: [PATCH] initial commit --- modules/util/abi_surfrad.py | 167 ++++++++++++++++++++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 modules/util/abi_surfrad.py diff --git a/modules/util/abi_surfrad.py b/modules/util/abi_surfrad.py new file mode 100644 index 00000000..98f36e86 --- /dev/null +++ b/modules/util/abi_surfrad.py @@ -0,0 +1,167 @@ +import numpy as np +import h5py +from util.util import get_grid_values, get_grid_values_all, is_night, is_day, compute_lwc_iwc, get_fill_attrs +import glob +import os +from aeolus.datasource import CLAVRx_VIIRS +from icing.moon_phase import * +from pathlib import Path + + +# target_param = 'cloud_probability' +target_param = 'cld_opd_dcomp' + +group_name_i = 'super/' +group_name_m = 'orig/' + +solzen_name = group_name_m + 'solar_zenith' + +label_params = [group_name_i+target_param] +data_params = [group_name_m+'temp_11_0um', group_name_m+'refl_0_65um', group_name_m+target_param] + + +def keep_tile(param, param_s, tile): + k = param_s.index(param) + grd_k = tile[k, ].copy() + + if target_param == 'cloud_probability': + grd_k = process_cld_prob_(grd_k) + elif target_param == 'cld_opd_dcomp': + grd_k = process_cld_opd_(grd_k) + + if grd_k is not None: + tile[k, ] = grd_k + return tile + else: + return None + + +def process_cld_prob_(grd_k): + keep = np.invert(np.isnan(grd_k)) + num_keep = np.sum(keep) + if num_keep / grd_k.size < 0.98: + return None + keep = np.where(keep, np.logical_and(0.05 < grd_k, grd_k < 0.95), False) + if np.sum(keep)/num_keep < 0.50: + return None + grd_k = np.where(np.invert(keep), 0, grd_k) + return grd_k + + +def process_cld_opd_(grd_k): + keep = np.invert(np.isnan(grd_k)) + num_keep = np.sum(keep) + if num_keep / grd_k.size < 0.98: + return None + grd_k = np.where(np.invert(keep), 0, grd_k) + keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False) + if np.sum(keep)/num_keep < 0.50: + return None + return grd_k + + +def run_all(directory, out_directory, pattern='clavrx_*.nc', start=10): + cnt = start + total_num_train_samples = 0 + total_num_valid_samples = 0 + + path = directory + '**' + '/' + pattern + + files = glob.glob(path, recursive=True) + + label_tiles = [] + data_tiles = [] + f_cnt = 0 + + num_files = len(files) + print('Start, number of files: ', num_files) + + for idx, f in enumerate(files): + # if idx % 4 == 0: # if we want to skip some files + if True: + try: + h5f = h5py.File(f, 'r') + except: + print('cant open file: ', f) + continue + + try: + run(h5f, data_params, data_tiles, label_params, label_tiles, tile_width=64, kernel_size=5) + except Exception as e: + print(e) + h5f.close() + continue + + print(f) + f_cnt += 1 + h5f.close() + + if len(data_tiles) == 0: + continue + + if (f_cnt % 100) == 0: + num_valid_samples = 0 + + label = np.stack(label_tiles) + data = np.stack(data_tiles) + np.save(out_directory + 'label_train_' + str(cnt), label) + np.save(out_directory + 'data_train_' + str(cnt), data) + num_samples = data.shape[0] + + label_tiles = [] + data_tiles = [] + + # print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100)) + # total_num_train_samples += num_samples + # total_num_valid_samples += num_valid_samples + # print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples) + + cnt += 1 + + print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples) + + +# tile_width: Must be even! +# kernel_size: Must be odd! +def run(h5f, param_s, tiles, lbl_param_s, lbl_tiles, tile_width=64, kernel_size=3): + + border = int((kernel_size - 1)/2) + 1 # Need to add for interpolation with no edge effects + + param_name = param_s[0] + + num_lines = h5f[param_name].shape[0] + num_pixels = h5f[param_name].shape[1] # Must be even + + grd_s = [] + for param in param_s: + try: + grd = get_grid_values(h5f, param, 0, 0, None, num_lines, num_pixels) + grd_s.append(grd) + except Exception as e: + print(e) + return + data = np.stack(grd_s) + + grd_s = [] + for param in lbl_param_s: + try: + grd = get_grid_values(h5f, param, 0, 0, None, num_lines*2, num_pixels*2) + grd_s.append(grd) + except Exception as e: + print(e) + return + label = np.stack(grd_s) + + nda = data[:, :, :] + nda = keep_tile(group_name_m + target_param, param_s, nda) + if nda is None: # if none, no need to check the next one + return + + nda_lbl = label[:, :, :] + nda_lbl = keep_tile(group_name_i + target_param, lbl_param_s, nda_lbl) + + if nda_lbl is not None: + tiles.append(nda) + lbl_tiles.append(nda_lbl) + + -- GitLab