From 6f04c860ca4c7a053cf3dcc776479cf7eaa7be51 Mon Sep 17 00:00:00 2001
From: tomrink <rink@ssec.wisc.edu>
Date: Tue, 31 Jan 2023 13:09:03 -0600
Subject: [PATCH] initial commit...

---
 modules/util/viirs_surfrad.py | 365 ++++++++++++++++++++++++++++++++++
 1 file changed, 365 insertions(+)
 create mode 100644 modules/util/viirs_surfrad.py

diff --git a/modules/util/viirs_surfrad.py b/modules/util/viirs_surfrad.py
new file mode 100644
index 00000000..bdc2ff8b
--- /dev/null
+++ b/modules/util/viirs_surfrad.py
@@ -0,0 +1,365 @@
+import numpy as np
+import h5py
+from util.util import get_grid_values, get_grid_values_all, is_night, is_day, compute_lwc_iwc, get_fill_attrs
+import glob
+import os
+from aeolus.datasource import CLAVRx_VIIRS
+from icing.moon_phase import *
+from pathlib import Path
+
+# --- CLAVRx Radiometric parameters and metadata ------------------------------------------------
+l1b_ds_list = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'temp_13_3um_nom', 'temp_3_75um_nom',
+               'temp_6_2um_nom', 'temp_6_7um_nom', 'temp_7_3um_nom', 'temp_8_5um_nom', 'temp_9_7um_nom',
+               'refl_0_47um_nom', 'refl_0_65um_nom', 'refl_0_86um_nom', 'refl_1_38um_nom', 'refl_1_60um_nom']
+
+l1b_ds_types = {ds: 'f4' for ds in l1b_ds_list}
+l1b_ds_fill = {l1b_ds_list[i]: -32767 for i in range(10)}
+l1b_ds_fill.update({l1b_ds_list[i+10]: -32768 for i in range(5)})
+l1b_ds_range = {ds: 'actual_range' for ds in l1b_ds_list}
+
+# --- CLAVRx L2 parameters and metadata
+ds_list = ['cld_height_acha', 'cld_geo_thick', 'cld_press_acha', 'sensor_zenith_angle', 'supercooled_prob_acha',
+           'supercooled_cloud_fraction', 'cld_temp_acha', 'cld_opd_acha', 'solar_zenith_angle',
+           'cld_reff_acha', 'cld_reff_dcomp', 'cld_reff_dcomp_1', 'cld_reff_dcomp_2', 'cld_reff_dcomp_3',
+           'cld_opd_dcomp', 'cld_opd_dcomp_1', 'cld_opd_dcomp_2', 'cld_opd_dcomp_3', 'cld_cwp_dcomp', 'iwc_dcomp',
+           'lwc_dcomp', 'cld_emiss_acha', 'conv_cloud_fraction', 'cloud_type', 'cloud_phase', 'cloud_mask']
+
+ds_types = {ds_list[i]: 'f4' for i in range(23)}
+ds_types.update({ds_list[i+23]: 'i1' for i in range(3)})
+ds_fill = {ds_list[i]: -32768 for i in range(23)}
+ds_fill.update({ds_list[i+23]: -128 for i in range(3)})
+ds_range = {ds_list[i]: 'actual_range' for i in range(23)}
+ds_range.update({ds_list[i]: None for i in range(3)})
+
+ds_types.update(l1b_ds_types)
+ds_fill.update(l1b_ds_fill)
+ds_range.update(l1b_ds_range)
+
+ds_types.update({'temp_3_9um_nom': 'f4'})
+ds_types.update({'cloud_fraction': 'f4'})
+ds_fill.update({'temp_3_9um_nom': -32767})
+ds_fill.update({'cloud_fraction': -32768})
+ds_range.update({'temp_3_9um_nom': 'actual_range'})
+ds_range.update({'cloud_fraction': 'actual_range'})
+
+
+emis_params = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'temp_13_3um_nom', 'temp_3_9um_nom',
+               'temp_6_7um_nom']
+# refl_params = ['refl_0_47um_nom', 'refl_0_65um_nom', 'refl_0_86um_nom', 'refl_1_38um_nom', 'refl_1_60um_nom']
+# data_params = refl_params + emis_params
+# data_params = emis_params
+
+# target_param = 'cloud_probability'
+target_param = 'cld_opd_dcomp'
+
+group_name_i = 'super/'
+group_name_m = 'orig/'
+
+solzen_name = group_name_m + 'solar_zenith'
+
+label_params = group_name_i+target_param
+data_params = [group_name_m+'temp_11_0um', group_name_m+'temp_12_0um', group_name_m+'refl_0_65um', group_name_m+target_param]
+
+
+def keep_tile(param_s, tile):
+    k = param_s.index(group_name_m + target_param)
+    grd_k = tile[k, ].copy()
+
+    if target_param == 'cloud_probability':
+        grd_k = process_cld_prob_(grd_k)
+    elif target_param == 'cld_opd_dcomp':
+        grd_k = process_cld_opd_(grd_k)
+
+    if grd_k is not None:
+        tile[k, ] = grd_k
+        return tile
+    else:
+        return None
+
+
+def process_cld_prob_(grd_k):
+    keep = np.invert(np.isnan(grd_k))
+    num_keep = np.sum(keep)
+    if num_keep / grd_k.size < 0.98:
+        return None
+    keep = np.where(keep, np.logical_and(0.05 < grd_k, grd_k < 0.95), False)
+    if np.sum(keep)/num_keep < 0.50:
+        return None
+    grd_k = np.where(np.invert(keep), 0, grd_k)
+    return grd_k
+
+
+def process_cld_opd_(grd_k):
+    keep = np.invert(np.isnan(grd_k))
+    num_keep = np.sum(keep)
+    if num_keep / grd_k.size < 0.98:
+        return None
+    grd_k = np.where(np.invert(keep), 0, grd_k)
+    keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False)
+    if np.sum(keep)/num_keep < 0.50:
+        return None
+    return grd_k
+
+
+def run_all(directory, out_directory, day_night='ANY', start=10):
+    cnt = start
+    total_num_train_samples = 0
+    total_num_valid_samples = 0
+    num_keep_x_tiles = 8
+
+    pattern = 'clavrx_VNP02MOD*.highres.nc.level2.nc'
+    pattern = 'clavrx_*.nc'
+    path = directory + '**' + '/' + pattern
+
+    data_files = glob.glob(path, recursive=True)
+
+    label_valid_tiles = []
+    label_train_tiles = []
+    data_valid_tiles = []
+    data_train_tiles = []
+    f_cnt = 0
+
+    num_files = len(data_files)
+
+    print('Start, number of files: ', num_files)
+
+    for idx, data_f in enumerate(data_files):
+        # if idx % 4 == 0:  # if we want to skip some files
+        if True:
+            try:
+                h5f = h5py.File(data_f, 'r')
+            except:
+                print('cant open file: ', data_f)
+                continue
+
+            try:
+                run(h5f, data_params, data_train_tiles, data_valid_tiles, label_params, label_train_tiles, label_valid_tiles,
+                    num_keep_x_tiles=num_keep_x_tiles, tile_width=64, kernel_size=3, day_night=day_night)
+            except Exception as e:
+                print(e)
+                h5f.close()
+                # label_h5f.close()
+                continue
+            print(data_f)
+            f_cnt += 1
+
+            h5f.close()
+            # label_h5f.close()
+
+            if len(data_train_tiles) == 0:
+                continue
+
+            if (f_cnt % 5) == 0:
+                num_valid_samples = 0
+                if len(data_valid_tiles) > 0:
+                    # label_valid = np.stack(label_valid_tiles)
+                    data_valid = np.stack(data_valid_tiles)
+                    np.save(out_directory + 'data_valid_' + str(cnt), data_valid)
+                    # np.save(out_directory+'label_valid_' + str(cnt), label_valid)
+                    num_valid_samples = data_valid.shape[0]
+
+                # label_train = np.stack(label_train_tiles)
+                # np.save(out_directory+'label_train_' + str(cnt), label_train)
+                data_train = np.stack(data_train_tiles)
+                np.save(out_directory+'data_train_' + str(cnt), data_train)
+                num_train_samples = data_train.shape[0]
+
+                label_valid_tiles = []
+                label_train_tiles = []
+                data_valid_tiles = []
+                data_train_tiles = []
+
+                print('  num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
+                total_num_train_samples += num_train_samples
+                total_num_valid_samples += num_valid_samples
+                print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
+
+                cnt += 1
+
+    print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
+
+
+#  tile_width: Must be even!
+#  kernel_size: Must be odd!
+def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lbl_valid_tiles,
+        num_keep_x_tiles=8, tile_width=64, kernel_size=3, day_night='DAY'):
+
+    border = int((kernel_size - 1)/2) + 1
+
+    param_name = param_s[0]
+
+    num_lines = h5f[param_name].shape[0]
+    num_pixels = h5f[param_name].shape[1]  # Must be even
+
+    if day_night != 'BOTH':
+        solzen = get_grid_values(h5f, solzen_name, 0, 0, None, num_lines, num_pixels)
+
+    grd_s = []
+    for param in param_s:
+        fill_value, fill_value_name = get_fill_attrs(param)
+        try:
+            grd = get_grid_values(h5f, param, 0, 0, None, num_lines, num_pixels, fill_value_name=fill_value_name, fill_value=fill_value)
+            grd_s.append(grd)
+        except Exception as e:
+            print(e)
+            return
+    data = np.stack(grd_s)
+
+    grd_s = []
+    for param in lbl_param_s:
+        fill_value, fill_value_name = get_fill_attrs(param)
+        try:
+            grd = get_grid_values(h5f, param, 0, 0, None, num_lines*2, num_pixels*2, fill_value_name=fill_value_name, fill_value=fill_value)
+            grd_s.append(grd)
+        except Exception as e:
+            print(e)
+            return
+    label = np.stack(grd_s)
+
+    tile_width += 2 * border
+
+    i_skip = tile_width
+    j_skip = tile_width
+    i_start = int(num_pixels / 2) - int((num_keep_x_tiles * tile_width) / 2)
+    j_start = 0
+
+    num_keep_y_tiles = int(num_lines / tile_width) - 3
+
+    num_y_valid = int(num_keep_y_tiles * 0.1) + 1
+    num_y_train = num_keep_y_tiles - num_y_valid - 1
+
+    for j in range(num_y_train):
+        j_a = j_start + j * j_skip
+        j_b = j_a + tile_width
+
+        for i in range(num_keep_x_tiles):
+            i_a = i_start + i * i_skip
+            i_b = i_a + tile_width
+
+            if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]):
+                continue
+            elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]):
+                continue
+
+            nda = data[:, j_a:j_b, i_a:i_b]
+            nda_lbl = label[:, j_a*2:j_b*2, i_a*2:i_b*2]
+            nda = keep_tile(param_s, nda)
+            if nda is not None:
+                train_tiles.append(nda)
+                lbl_train_tiles.append(nda_lbl)
+
+    j_start = num_y_train * tile_width + 2*tile_width
+    for j in range(num_y_valid):
+        j_a = j_start + j * j_skip
+        j_b = j_a + tile_width
+
+        for i in range(num_keep_x_tiles):
+            i_a = i_start + i * i_skip
+            i_b = i_a + tile_width
+
+            if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]):
+                continue
+            elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]):
+                continue
+
+            nda = data[:, j_a:j_b, i_a:i_b]
+            nda_lbl = label[:, j_a * 2:j_b * 2, i_a * 2:i_b * 2]
+            nda = keep_tile(param_s, nda)
+            if nda is not None:
+                valid_tiles.append(nda)
+                lbl_valid_tiles.append(nda_lbl)
+
+
+def scan(directory):
+
+    data_src = CLAVRx_VIIRS(directory)
+    files = data_src.flist
+
+    for idx, file in enumerate(files):
+        h5f = h5py.File(file, 'r')
+        ts = data_src.ftimes[idx][0]
+        try:
+            solzen = get_grid_values_all(h5f, 'solar_zenith_angle')
+        except Exception as e:
+            # print(e)
+            h5f.close()
+            continue
+
+        # if is_day(solzen) and moon_phase(ts):
+        if is_night(solzen) and moon_phase(ts):
+            print(file)
+        h5f.close()
+
+
+def scan_for_location(txt_file, lon_range=[111.0, 130.0], lat_range=[14.0, 32.0]):
+    with open(txt_file) as file:
+        for idx, fpath in enumerate(file):
+            fpath = fpath.strip()
+            h5f = h5py.File(fpath, 'r')
+            try:
+                lon_s = get_grid_values_all(h5f, 'longitude', stride=4)
+                lat_s = get_grid_values_all(h5f, 'latitude', stride=4)
+                c_lon, c_lat = lon_s[406, 400], lat_s[406, 400]
+                if (lon_range[0] < c_lon < lon_range[1]) and (lat_range[0] < c_lat < lat_range[1]):
+                    print(fpath)
+
+            except Exception as e:
+                # print(e)
+                h5f.close()
+                continue
+
+
+def test_nlcomp(file):
+    h5f = h5py.File(file, 'r')
+
+    cld_phs = get_grid_values_all(h5f, 'cloud_phase', scale_factor_name=None, range_name=None)
+    keep_0 = np.invert(np.isnan(cld_phs))
+
+    reff = get_grid_values_all(h5f, 'cld_reff_nlcomp')
+    keep_1 = np.invert(np.isnan(reff))
+
+    opd = get_grid_values_all(h5f, 'cld_opd_nlcomp')
+    keep_2 = np.invert(np.isnan(opd))
+
+    cld_dz = get_grid_values_all(h5f, 'cld_geo_thick')
+    keep_3 = np.logical_and(np.invert(np.isnan(cld_dz)), cld_dz > 5.0)
+
+    keep = keep_0 & keep_1 & keep_2 & keep_3
+
+    cld_phs = cld_phs[keep]
+    reff = reff[keep]
+    opd = opd[keep]
+    cld_dz = cld_dz[keep]
+
+    lwc_c, iwc_c = compute_lwc_iwc(cld_phs, reff, opd, cld_dz)
+
+    return lwc_c, iwc_c
+
+
+# def run_mean_std(directory):
+#
+#     data_dct = {name: [] for name in mod_res_params}
+#     mean_dct = {name: 0 for name in mod_res_params}
+#     std_dct = {name: 0 for name in mod_res_params}
+#
+#     for p in os.scandir(directory):
+#         if not p.is_dir():
+#             continue
+#         mod_files = glob.glob(directory+p.name+'/'+'VNP02MOD*.uwssec.nc')
+#
+#         for idx, mfile in enumerate(mod_files):
+#             if idx % 8 == 0:
+#                 h5f = h5py.File(mfile, 'r')
+#                 for param in mod_res_params:
+#                     name = 'observation_data/'+param
+#                     gvals = get_grid_values_all(h5f, name, range_name=None, stride=10)
+#                     data_dct[param].append(gvals.flatten())
+#                 print(mfile)
+#                 h5f.close()
+#
+#     for param in mod_res_params:
+#         data = data_dct[param]
+#         data = np.concatenate(data)
+#
+#         mean_dct[param] = np.nanmean(data)
+#         std_dct[param] = np.nanstd(data)
-- 
GitLab