From 2f2f044b425270c65a3bada907d427b22f01b877 Mon Sep 17 00:00:00 2001
From: tomrink <rink@ssec.wisc.edu>
Date: Thu, 23 Feb 2023 13:55:11 -0600
Subject: [PATCH] snapshot...

---
 modules/util/viirs_surfrad.py | 81 +++++++++++++++++------------------
 1 file changed, 39 insertions(+), 42 deletions(-)

diff --git a/modules/util/viirs_surfrad.py b/modules/util/viirs_surfrad.py
index 97a24f47..d158de28 100644
--- a/modules/util/viirs_surfrad.py
+++ b/modules/util/viirs_surfrad.py
@@ -25,40 +25,40 @@ def keep_tile(param, param_s, tile):
     grd_k = tile[k, ].copy()
 
     if target_param == 'cloud_probability':
-        grd_k = process_cld_prob_(grd_k)
+        grd_k, bflag = process_cld_prob_(grd_k)
     elif target_param == 'cld_opd_dcomp':
-        grd_k = process_cld_opd_(grd_k)
+        grd_k, bflag = process_cld_opd_(grd_k)
 
     if grd_k is not None:
         tile[k, ] = grd_k
-        return tile
+        return tile, bflag
     else:
-        return None
+        return None, bflag
 
 
 def process_cld_prob_(grd_k):
     keep = np.invert(np.isnan(grd_k))
     num_keep = np.sum(keep)
     if num_keep / grd_k.size < 0.98:
-        return None
+        return None, True
     keep_clr = np.where(keep, grd_k < 0.20, False)
     frac_keep = np.sum(keep_clr)/num_keep
-    if not (0.38 < frac_keep < 0.62):
-        return None
+    if not (0.40 < frac_keep < 0.60):
+        return None, False
     grd_k = np.where(np.invert(keep), 0, grd_k)  # Convert NaNs to 0
-    return grd_k
+    return grd_k, False
 
 
 def process_cld_opd_(grd_k):
     keep = np.invert(np.isnan(grd_k))
     num_keep = np.sum(keep)
     if num_keep / grd_k.size < 0.98:
-        return None
+        return None, True
     grd_k = np.where(np.invert(keep), 0, grd_k)
     keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False)
     if np.sum(keep)/num_keep < 0.50:
-        return None
-    return grd_k
+        return None, False
+    return grd_k, False
 
 
 def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', start=10):
@@ -80,6 +80,8 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
     num_files = len(data_files)
     print('Start, number of files: ', num_files)
 
+    total_num_not_missing = 0
+
     for idx, data_f in enumerate(data_files):
         # if idx % 4 == 0:  # if we want to skip some files
         if True:
@@ -90,9 +92,9 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
                 continue
 
             try:
-                run(h5f, data_params, data_train_tiles, data_valid_tiles,
-                    label_params, label_train_tiles, label_valid_tiles,
-                    num_keep_x_tiles=num_keep_x_tiles, tile_width=64, kernel_size=7, day_night=day_night)
+                num_not_missing = run(h5f, data_params, data_train_tiles, data_valid_tiles,
+                                      label_params, label_train_tiles, label_valid_tiles,
+                                      num_keep_x_tiles=num_keep_x_tiles, tile_width=64, kernel_size=7, day_night=day_night)
             except Exception as e:
                 print(e)
                 h5f.close()
@@ -129,7 +131,9 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
                 print('  num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
                 total_num_train_samples += num_train_samples
                 total_num_valid_samples += num_valid_samples
-                print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
+                total_num_not_missing += num_not_missing
+                print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ', total_num_train_samples,
+                      total_num_valid_samples, total_num_not_missing)
                 print('--------------------------------------------------')
 
                 cnt += 1
@@ -179,12 +183,13 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb
     i_start = int(num_pixels / 2) - int((num_keep_x_tiles * tile_width) / 2)
     j_start = 0
 
-    num_keep_y_tiles = int(num_lines / tile_width) - 3
+    num_y_tiles = int(num_lines / tile_width) - 1
 
-    num_y_valid = int(num_keep_y_tiles * 0.15) + 1
-    num_y_train = num_keep_y_tiles - num_y_valid - 1
+    data_tiles = []
+    lbl_tiles = []
+    num_not_missing = 0
 
-    for j in range(num_y_train):
+    for j in range(num_y_tiles):
         j_a = j_start + j * j_skip
         j_b = j_a + tile_width
 
@@ -199,31 +204,23 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb
 
             nda = data[:, j_a:j_b, i_a:i_b]
             nda_lbl = label[:, j_a*2:j_b*2, i_a*2:i_b*2]
-            nda_lbl = keep_tile(group_name_i+target_param, lbl_param_s, nda_lbl)
+            nda_lbl, missing_flag = keep_tile(group_name_i+target_param, lbl_param_s, nda_lbl)
+            if not missing_flag:
+                num_not_missing += 1
 
             if nda_lbl is not None:
-                train_tiles.append(nda)
-                lbl_train_tiles.append(nda_lbl)
-
-    j_start = num_y_train * tile_width + 2*tile_width
-    for j in range(num_y_valid):
-        j_a = j_start + j * j_skip
-        j_b = j_a + tile_width
+                data_tiles.append(nda)
+                lbl_tiles.append(nda_lbl)
 
-        for i in range(num_keep_x_tiles):
-            i_a = i_start + i * i_skip
-            i_b = i_a + tile_width
+    num_tiles = len(lbl_tiles)
+    num_valid = int(num_tiles * 0.10)
+    num_train = num_tiles - num_valid
 
-            if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]):
-                continue
-            elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]):
-                continue
-
-            nda = data[:, j_a:j_b, i_a:i_b]
-            nda_lbl = label[:, j_a * 2:j_b * 2, i_a * 2:i_b * 2]
-            nda_lbl = keep_tile(group_name_i+target_param, lbl_param_s, nda_lbl)
-
-            if nda_lbl is not None:
-                valid_tiles.append(nda)
-                lbl_valid_tiles.append(nda_lbl)
+    for k in range(num_train):
+        train_tiles.append(data_tiles[k])
+        lbl_train_tiles.append(lbl_tiles[k])
+    for k in range(num_valid):
+        valid_tiles.append(data_tiles[num_train + k])
+        lbl_valid_tiles.append(lbl_tiles[num_train + k])
 
+    return num_not_missing
-- 
GitLab