From 910f1676e657e52fcccdb1858d40b7b8b4751113 Mon Sep 17 00:00:00 2001
From: tomrink <rink@ssec.wisc.edu>
Date: Wed, 1 Feb 2023 13:51:23 -0600
Subject: [PATCH] initial commit

---
 modules/util/abi_surfrad.py | 167 ++++++++++++++++++++++++++++++++++++
 1 file changed, 167 insertions(+)
 create mode 100644 modules/util/abi_surfrad.py

diff --git a/modules/util/abi_surfrad.py b/modules/util/abi_surfrad.py
new file mode 100644
index 00000000..98f36e86
--- /dev/null
+++ b/modules/util/abi_surfrad.py
@@ -0,0 +1,167 @@
+import numpy as np
+import h5py
+from util.util import get_grid_values, get_grid_values_all, is_night, is_day, compute_lwc_iwc, get_fill_attrs
+import glob
+import os
+from aeolus.datasource import CLAVRx_VIIRS
+from icing.moon_phase import *
+from pathlib import Path
+
+
+# target_param = 'cloud_probability'
+target_param = 'cld_opd_dcomp'
+
+group_name_i = 'super/'
+group_name_m = 'orig/'
+
+solzen_name = group_name_m + 'solar_zenith'
+
+label_params = [group_name_i+target_param]
+data_params = [group_name_m+'temp_11_0um', group_name_m+'refl_0_65um', group_name_m+target_param]
+
+
+def keep_tile(param, param_s, tile):
+    k = param_s.index(param)
+    grd_k = tile[k, ].copy()
+
+    if target_param == 'cloud_probability':
+        grd_k = process_cld_prob_(grd_k)
+    elif target_param == 'cld_opd_dcomp':
+        grd_k = process_cld_opd_(grd_k)
+
+    if grd_k is not None:
+        tile[k, ] = grd_k
+        return tile
+    else:
+        return None
+
+
+def process_cld_prob_(grd_k):
+    keep = np.invert(np.isnan(grd_k))
+    num_keep = np.sum(keep)
+    if num_keep / grd_k.size < 0.98:
+        return None
+    keep = np.where(keep, np.logical_and(0.05 < grd_k, grd_k < 0.95), False)
+    if np.sum(keep)/num_keep < 0.50:
+        return None
+    grd_k = np.where(np.invert(keep), 0, grd_k)
+    return grd_k
+
+
+def process_cld_opd_(grd_k):
+    keep = np.invert(np.isnan(grd_k))
+    num_keep = np.sum(keep)
+    if num_keep / grd_k.size < 0.98:
+        return None
+    grd_k = np.where(np.invert(keep), 0, grd_k)
+    keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False)
+    if np.sum(keep)/num_keep < 0.50:
+        return None
+    return grd_k
+
+
+def run_all(directory, out_directory, pattern='clavrx_*.nc', start=10):
+    cnt = start
+    total_num_train_samples = 0
+    total_num_valid_samples = 0
+
+    path = directory + '**' + '/' + pattern
+
+    files = glob.glob(path, recursive=True)
+
+    label_tiles = []
+    data_tiles = []
+    f_cnt = 0
+
+    num_files = len(files)
+    print('Start, number of files: ', num_files)
+
+    for idx, f in enumerate(files):
+        # if idx % 4 == 0:  # if we want to skip some files
+        if True:
+            try:
+                h5f = h5py.File(f, 'r')
+            except:
+                print('cant open file: ', f)
+                continue
+
+            try:
+                run(h5f, data_params, data_tiles, label_params, label_tiles, tile_width=64, kernel_size=5)
+            except Exception as e:
+                print(e)
+                h5f.close()
+                continue
+
+            print(f)
+            f_cnt += 1
+            h5f.close()
+
+            if len(data_tiles) == 0:
+                continue
+
+            if (f_cnt % 100) == 0:
+                num_valid_samples = 0
+
+                label = np.stack(label_tiles)
+                data = np.stack(data_tiles)
+                np.save(out_directory + 'label_train_' + str(cnt), label)
+                np.save(out_directory + 'data_train_' + str(cnt), data)
+                num_samples = data.shape[0]
+
+                label_tiles = []
+                data_tiles = []
+
+                # print('  num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
+                # total_num_train_samples += num_samples
+                # total_num_valid_samples += num_valid_samples
+                # print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
+
+                cnt += 1
+
+    print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
+
+
+#  tile_width: Must be even!
+#  kernel_size: Must be odd!
+def run(h5f, param_s, tiles, lbl_param_s, lbl_tiles, tile_width=64, kernel_size=3):
+
+    border = int((kernel_size - 1)/2) + 1  # Need to add for interpolation with no edge effects
+
+    param_name = param_s[0]
+
+    num_lines = h5f[param_name].shape[0]
+    num_pixels = h5f[param_name].shape[1]  # Must be even
+
+    grd_s = []
+    for param in param_s:
+        try:
+            grd = get_grid_values(h5f, param, 0, 0, None, num_lines, num_pixels)
+            grd_s.append(grd)
+        except Exception as e:
+            print(e)
+            return
+    data = np.stack(grd_s)
+
+    grd_s = []
+    for param in lbl_param_s:
+        try:
+            grd = get_grid_values(h5f, param, 0, 0, None, num_lines*2, num_pixels*2)
+            grd_s.append(grd)
+        except Exception as e:
+            print(e)
+            return
+    label = np.stack(grd_s)
+
+    nda = data[:, :, :]
+    nda = keep_tile(group_name_m + target_param, param_s, nda)
+    if nda is None:  # if none, no need to check the next one
+        return
+
+    nda_lbl = label[:, :, :]
+    nda_lbl = keep_tile(group_name_i + target_param, lbl_param_s, nda_lbl)
+
+    if nda_lbl is not None:
+        tiles.append(nda)
+        lbl_tiles.append(nda_lbl)
+
+
-- 
GitLab