From f4dc032f201f391510294103741010ed1e7ed8f1 Mon Sep 17 00:00:00 2001
From: tomrink <rink@ssec.wisc.edu>
Date: Thu, 23 Feb 2023 12:01:23 -0600
Subject: [PATCH] snapshot...

---
 modules/util/viirs_l1b_l2.py | 43 +++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/modules/util/viirs_l1b_l2.py b/modules/util/viirs_l1b_l2.py
index 5fa881c0..6b35dd12 100644
--- a/modules/util/viirs_l1b_l2.py
+++ b/modules/util/viirs_l1b_l2.py
@@ -31,20 +31,20 @@ cld_opd_norm_hist = [7.31926378e-01, 9.52482193e-02, 4.62747706e-02, 3.15450036e
                      6.50404531e-04, 1.73557144e-02]
 
 
-def keep_tile(param_s, tile, dum):
+def keep_tile(param_s, tile):
     k = param_s.index(group_name + target_param)
     grd_k = tile[k, ].copy()
 
     if target_param == 'cloud_probability':
-        grd_k = process_cld_prob_(grd_k, dum)
+        grd_k, bflag = process_cld_prob_(grd_k)
     elif target_param == 'cld_opd_dcomp':
-        grd_k = process_cld_opd_(grd_k, dum)
+        grd_k, bflag = process_cld_opd_(grd_k)
 
     if grd_k is not None:
         tile[k, ] = grd_k
-        return tile
+        return tile, bflag
     else:
-        return None
+        return None, bflag
 
 
 def process_cld_prob(param_s, tile):
@@ -58,18 +58,17 @@ def process_cld_prob(param_s, tile):
         return None
 
 
-def process_cld_prob_(grd_k, dum):
+def process_cld_prob_(grd_k):
     keep = np.invert(np.isnan(grd_k))
     num_keep = np.sum(keep)
     if num_keep / grd_k.size < 0.98:
-        return None
-    # hist_10 += np.histogram(grd_k.flatten(), range=[0.0, 1.0], bins=10)[0]
+        return None, True
     keep_clr = np.where(keep, grd_k < 0.20, False)
     frac_keep = np.sum(keep_clr)/num_keep
     if not (0.40 < frac_keep < 0.60):
-        return None
+        return None, False
     grd_k = np.where(np.invert(keep), 0, grd_k)  # Convert NaN to 0
-    return grd_k
+    return grd_k, False
 
 
 def process_cld_opd(param_s, tile):
@@ -83,23 +82,24 @@ def process_cld_opd(param_s, tile):
         return None
 
 
-def process_cld_opd_(grd_k, dum):
+def process_cld_opd_(grd_k):
     keep = np.invert(np.isnan(grd_k))
     num_keep = np.sum(keep)
     if num_keep / grd_k.size < 0.98:
-        return None
+        return None, True
     grd_k = np.where(np.invert(keep), 0, grd_k)
     keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False)
     frac_keep = np.sum(keep)/num_keep
     if frac_keep < 0.50:
-        return None
-    return grd_k
+        return None, False
+    return grd_k, False
 
 
 def run_all(directory, out_directory, day_night='ANY', start=10):
     cnt = start
     total_num_train_samples = 0
     total_num_valid_samples = 0
+    total_num_not_missing = 0
     num_keep_x_tiles = 14
 
     # pattern = 'clavrx_VNP02MOD*.highres.nc.level2.nc'
@@ -127,7 +127,7 @@ def run_all(directory, out_directory, day_night='ANY', start=10):
                 continue
 
             try:
-                run(data_h5f, data_params, data_train_tiles, data_valid_tiles, None, num_keep_x_tiles=num_keep_x_tiles, tile_width=128, kernel_size=11, day_night=day_night)
+                num_not_missing = run(data_h5f, data_params, data_train_tiles, data_valid_tiles, num_keep_x_tiles=num_keep_x_tiles, tile_width=128, kernel_size=11, day_night=day_night)
             except Exception as e:
                 print(e)
                 data_h5f.close()
@@ -159,7 +159,9 @@ def run_all(directory, out_directory, day_night='ANY', start=10):
                 print('  num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
                 total_num_train_samples += num_train_samples
                 total_num_valid_samples += num_valid_samples
-                print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
+                total_num_not_missing += num_not_missing
+                print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ',
+                      total_num_train_samples, total_num_valid_samples, total_num_not_missing)
                 print('---------------------------------------------------------')
 
                 cnt += 1
@@ -169,7 +171,7 @@ def run_all(directory, out_directory, day_night='ANY', start=10):
 
 #  tile_width: Must be even!
 #  kernel_size: Must be odd!
-def run(data_h5f, param_s, train_tiles, valid_tiles, dum, num_keep_x_tiles=8, tile_width=64, kernel_size=9, day_night='ANY'):
+def run(data_h5f, param_s, train_tiles, valid_tiles, num_keep_x_tiles=8, tile_width=64, kernel_size=9, day_night='ANY'):
 
     border = int((kernel_size - 1)/2)
 
@@ -201,6 +203,7 @@ def run(data_h5f, param_s, train_tiles, valid_tiles, dum, num_keep_x_tiles=8, ti
     num_y_tiles = int(num_lines / tile_width) - 1
 
     tiles = []
+    num_not_missing = 0
 
     for j in range(num_y_tiles):
         j_a = j_start + j * j_skip
@@ -216,7 +219,9 @@ def run(data_h5f, param_s, train_tiles, valid_tiles, dum, num_keep_x_tiles=8, ti
                 continue
 
             nda = data[:, j_a:j_b, i_a:i_b]
-            nda = keep_tile(param_s, nda, dum)
+            nda, missing_flag = keep_tile(param_s, nda)
+            if not missing_flag:
+                num_not_missing += 1
             if nda is not None:
                 tiles.append(nda)
 
@@ -229,6 +234,8 @@ def run(data_h5f, param_s, train_tiles, valid_tiles, dum, num_keep_x_tiles=8, ti
     for k in range(num_valid):
         valid_tiles.append(tiles[num_train + k])
 
+    return num_not_missing
+
 
 def scan(directory):
 
-- 
GitLab