From af8cacc4f28acf56f44ebed861dc3352a36d05b6 Mon Sep 17 00:00:00 2001
From: tomrink <rink@ssec.wisc.edu>
Date: Tue, 14 Feb 2023 13:40:21 -0600
Subject: [PATCH] snapshot...

---
 modules/util/viirs_l1b_l2.py | 60 +++++++++++++++++++-----------------
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/modules/util/viirs_l1b_l2.py b/modules/util/viirs_l1b_l2.py
index 9e261564..a5d8696a 100644
--- a/modules/util/viirs_l1b_l2.py
+++ b/modules/util/viirs_l1b_l2.py
@@ -6,8 +6,8 @@ from aeolus.datasource import CLAVRx_VIIRS
 from icing.moon_phase import *
 
 
-# target_param = 'cloud_probability'
-target_param = 'cld_opd_dcomp'
+target_param = 'cloud_probability'
+# target_param = 'cld_opd_dcomp'
 
 # group_name = ''
 group_name = 'super/'
@@ -21,7 +21,7 @@ label_params = l2_params
 data_params = l2_params
 
 
-def keep_tile(param_s, tile):
+def keep_tile(param_s, tile, hist_10):
     k = param_s.index(group_name + target_param)
     grd_k = tile[k, ].copy()
 
@@ -48,11 +48,12 @@ def process_cld_prob(param_s, tile):
         return None
 
 
-def process_cld_prob_(grd_k):
+def process_cld_prob_(grd_k, hist_10):
     keep = np.invert(np.isnan(grd_k))
     num_keep = np.sum(keep)
     if num_keep / grd_k.size < 0.98:
         return None
+    hist_10 += np.histogram(grd_k.flatten(), range=[0.0, 1.0], bins=10)[0]
     keep = np.where(keep, np.logical_and(0.05 < grd_k, grd_k < 0.95), False)
     if np.sum(keep)/num_keep < 0.50:
         return None
@@ -102,6 +103,7 @@ def run_all(directory, out_directory, day_night='ANY', start=10):
     num_files = len(data_files)
 
     print('Start, number of files: ', num_files)
+    hist_10 = np.zeros((10), dtype=np.int64)
 
     for idx, data_f in enumerate(data_files):
         # if idx % 4 == 0:  # if we want to skip some files
@@ -113,12 +115,12 @@ def run_all(directory, out_directory, day_night='ANY', start=10):
                 continue
 
             try:
-                total, kept = run(data_h5f, data_params, data_train_tiles, data_valid_tiles, num_keep_x_tiles=num_keep_x_tiles, tile_width=128, kernel_size=11, day_night=day_night)
+                total, kept = run(data_h5f, data_params, data_train_tiles, data_valid_tiles, hist_10, num_keep_x_tiles=num_keep_x_tiles, tile_width=128, kernel_size=11, day_night=day_night)
             except Exception as e:
                 print(e)
                 data_h5f.close()
                 continue
-            print(data_f, int(100 * (kept/total)))
+            print(data_f, int(100 * (kept/total)), hist_10, (hist_10 / np.sum(hist_10)))
             f_cnt += 1
 
             data_h5f.close()
@@ -126,33 +128,33 @@ def run_all(directory, out_directory, day_night='ANY', start=10):
             if len(data_train_tiles) == 0:
                 continue
 
-            if (f_cnt % 5) == 0:
-                num_valid_samples = 0
-                if len(data_valid_tiles) > 0:
-                    data_valid = np.stack(data_valid_tiles)
-                    np.save(out_directory + 'data_valid_' + str(cnt), data_valid)
-                    num_valid_samples = data_valid.shape[0]
-
-                data_train = np.stack(data_train_tiles)
-                np.save(out_directory+'data_train_' + str(cnt), data_train)
-                num_train_samples = data_train.shape[0]
-
-                data_valid_tiles = []
-                data_train_tiles = []
-
-                print('  num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
-                total_num_train_samples += num_train_samples
-                total_num_valid_samples += num_valid_samples
-                print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
-
-                cnt += 1
+            # if (f_cnt % 5) == 0:
+            #     num_valid_samples = 0
+            #     if len(data_valid_tiles) > 0:
+            #         data_valid = np.stack(data_valid_tiles)
+            #         np.save(out_directory + 'data_valid_' + str(cnt), data_valid)
+            #         num_valid_samples = data_valid.shape[0]
+            #
+            #     data_train = np.stack(data_train_tiles)
+            #     np.save(out_directory+'data_train_' + str(cnt), data_train)
+            #     num_train_samples = data_train.shape[0]
+            #
+            #     data_valid_tiles = []
+            #     data_train_tiles = []
+            #
+            #     print('  num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
+            #     total_num_train_samples += num_train_samples
+            #     total_num_valid_samples += num_valid_samples
+            #     print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
+            #
+            #     cnt += 1
 
     print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
 
 
 #  tile_width: Must be even!
 #  kernel_size: Must be odd!
-def run(data_h5f, param_s, train_tiles, valid_tiles, num_keep_x_tiles=8, tile_width=64, kernel_size=9, day_night='ANY'):
+def run(data_h5f, param_s, train_tiles, valid_tiles, hist_10, num_keep_x_tiles=8, tile_width=64, kernel_size=9, day_night='ANY'):
 
     border = int((kernel_size - 1)/2)
 
@@ -205,7 +207,7 @@ def run(data_h5f, param_s, train_tiles, valid_tiles, num_keep_x_tiles=8, tile_wi
                 continue
 
             nda = data[:, j_a:j_b, i_a:i_b]
-            nda = keep_tile(param_s, nda)
+            nda = keep_tile(param_s, nda, hist_10)
             if nda is not None:
                 train_tiles.append(nda)
                 cnt_kept += 1
@@ -227,7 +229,7 @@ def run(data_h5f, param_s, train_tiles, valid_tiles, num_keep_x_tiles=8, tile_wi
                 continue
 
             nda = data[:, j_a:j_b, i_a:i_b]
-            nda = keep_tile(param_s, nda)
+            nda = keep_tile(param_s, nda, hist_10)
             if nda is not None:
                 valid_tiles.append(nda)
                 cnt_kept += 1
-- 
GitLab