From b05c3f50a4b093eba8a5de0d69f32ee6ea351d59 Mon Sep 17 00:00:00 2001
From: tomrink <rink@ssec.wisc.edu>
Date: Wed, 15 Feb 2023 14:35:06 -0600
Subject: [PATCH] snapshot...

---
 modules/util/abi_surfrad.py | 53 +++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 29 deletions(-)

diff --git a/modules/util/abi_surfrad.py b/modules/util/abi_surfrad.py
index e952c995..43c34256 100644
--- a/modules/util/abi_surfrad.py
+++ b/modules/util/abi_surfrad.py
@@ -1,15 +1,11 @@
 import numpy as np
 import h5py
-from util.util import get_grid_values, get_grid_values_all, is_night, is_day, compute_lwc_iwc, get_fill_attrs
+from util.util import get_grid_values
 import glob
-import os
-from aeolus.datasource import CLAVRx_VIIRS
-from icing.moon_phase import *
-from pathlib import Path
 
 
-# target_param = 'cloud_probability'
-target_param = 'cld_opd_dcomp'
+target_param = 'cloud_probability'
+# target_param = 'cld_opd_dcomp'
 
 group_name_i = 'super/'
 group_name_m = 'orig/'
@@ -39,10 +35,10 @@ def keep_tile(param, param_s, tile):
 def process_cld_prob_(grd_k):
     keep = np.invert(np.isnan(grd_k))
     num_keep = np.sum(keep)
-    if num_keep / grd_k.size < 0.98:
-        return None
-    keep = np.where(keep, np.logical_and(0.05 < grd_k, grd_k < 0.95), False)
-    if np.sum(keep)/num_keep < 0.50:
+    # if num_keep / grd_k.size < 0.98:
+    #     return None
+    keep = np.where(keep, np.logical_and(0.10 < grd_k, grd_k < 0.90), False)
+    if np.sum(keep)/num_keep < 0.25:
         return None
     grd_k = np.where(np.invert(keep), 0, grd_k)
     return grd_k
@@ -51,8 +47,8 @@ def process_cld_prob_(grd_k):
 def process_cld_opd_(grd_k):
     keep = np.invert(np.isnan(grd_k))
     num_keep = np.sum(keep)
-    if num_keep / grd_k.size < 0.98:
-        return None
+    # if num_keep / grd_k.size < 0.98:
+    #     return None
     grd_k = np.where(np.invert(keep), 0, grd_k)
     keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False)
     if np.sum(keep)/num_keep < 0.50:
@@ -62,8 +58,7 @@ def process_cld_opd_(grd_k):
 
 def run_all(directory, out_directory, pattern='clavrx_*.nc', start=10):
     cnt = start
-    total_num_train_samples = 0
-    total_num_valid_samples = 0
+    total_num_samples = 0
 
     path = directory + '**' + '/' + pattern
 
@@ -86,7 +81,7 @@ def run_all(directory, out_directory, pattern='clavrx_*.nc', start=10):
                 continue
 
             try:
-                run(h5f, data_params, data_tiles, label_params, label_tiles, tile_width=64, kernel_size=5)
+                run(h5f, data_params, data_tiles, label_params, label_tiles, kernel_size=5)
             except Exception as e:
                 print(e)
                 h5f.close()
@@ -100,30 +95,30 @@ def run_all(directory, out_directory, pattern='clavrx_*.nc', start=10):
                 continue
 
             if (f_cnt % 100) == 0:
-                num_valid_samples = 0
-
-                label = np.stack(label_tiles)
-                data = np.stack(data_tiles)
-                np.save(out_directory + 'label_' + str(cnt), label)
-                np.save(out_directory + 'data_' + str(cnt), data)
-                num_samples = data.shape[0]
+                num_samples = 0
+                if len(data_tiles) > 0:
+                    label = np.stack(label_tiles)
+                    data = np.stack(data_tiles)
+                    #np.save(out_directory + 'label_' + str(cnt), label)
+                    #np.save(out_directory + 'data_' + str(cnt), data)
+                    num_samples = data.shape[0]
 
                 label_tiles = []
                 data_tiles = []
 
-                # print('  num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
-                # total_num_train_samples += num_samples
-                # total_num_valid_samples += num_valid_samples
-                # print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
+                print('  num_samples, progress % : ', num_samples, int((f_cnt/num_files)*100))
+                total_num_samples += num_samples
+                print('total_num_samples: ', total_num_samples)
+                print('------------------------------------------------------------')
 
                 cnt += 1
 
-    print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
+    print('** total_num_samples: ', total_num_samples)
 
 
 #  tile_width: Must be even!
 #  kernel_size: Must be odd!
-def run(h5f, param_s, tiles, lbl_param_s, lbl_tiles, tile_width=64, kernel_size=3):
+def run(h5f, param_s, tiles, lbl_param_s, lbl_tiles, kernel_size=3):
 
     border = int((kernel_size - 1)/2) + 1  # Need to add for interpolation with no edge effects
 
-- 
GitLab