From d7592b6bdfb1fedfdb521c691ecee0d860f8b184 Mon Sep 17 00:00:00 2001
From: tomrink <rink@ssec.wisc.edu>
Date: Wed, 1 Feb 2023 11:31:34 -0600
Subject: [PATCH] snapshot...

---
 modules/util/viirs_surfrad.py | 41 ++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 10 deletions(-)

diff --git a/modules/util/viirs_surfrad.py b/modules/util/viirs_surfrad.py
index 8b2e6b0e..3376c845 100644
--- a/modules/util/viirs_surfrad.py
+++ b/modules/util/viirs_surfrad.py
@@ -20,8 +20,8 @@ label_params = [group_name_i+target_param]
 data_params = [group_name_m+'temp_11_0um', group_name_m+'refl_0_65um', group_name_m+target_param]
 
 
-def keep_tile(param_s, tile):
-    k = param_s.index(group_name_m + target_param)
+def keep_tile(param, param_s, tile):
+    k = param_s.index(param)
     grd_k = tile[k, ].copy()
 
     if target_param == 'cloud_probability':
@@ -89,15 +89,16 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
                 continue
 
             try:
-                run(h5f, data_params, data_train_tiles, data_valid_tiles, label_params, label_train_tiles, label_valid_tiles,
-                    num_keep_x_tiles=num_keep_x_tiles, tile_width=64, kernel_size=5, day_night=day_night)
+                total, kept = run(h5f, data_params, data_train_tiles, data_valid_tiles,
+                                  label_params, label_train_tiles, label_valid_tiles,
+                                  num_keep_x_tiles=num_keep_x_tiles, tile_width=64, kernel_size=5, day_night=day_night)
             except Exception as e:
                 print(e)
                 h5f.close()
                 continue
-            print(data_f)
-            f_cnt += 1
 
+            print(data_f, int(100 * (kept/total)))
+            f_cnt += 1
             h5f.close()
 
             if len(data_train_tiles) == 0:
@@ -180,6 +181,8 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb
     num_y_valid = int(num_keep_y_tiles * 0.1) + 1
     num_y_train = num_keep_y_tiles - num_y_valid - 1
 
+    cnt_total = 0
+    cnt_kept = 0
     for j in range(num_y_train):
         j_a = j_start + j * j_skip
         j_b = j_a + tile_width
@@ -188,17 +191,25 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb
             i_a = i_start + i * i_skip
             i_b = i_a + tile_width
 
+            cnt_total += 1
+
             if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]):
                 continue
             elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]):
                 continue
 
             nda = data[:, j_a:j_b, i_a:i_b]
+            nda = keep_tile(group_name_m+target_param, param_s, nda)
+            if nda is None:  # if none, no need to check the next one
+                continue
+
             nda_lbl = label[:, j_a*2:j_b*2, i_a*2:i_b*2]
-            nda = keep_tile(param_s, nda)
-            if nda is not None:
+            nda_lbl = keep_tile(group_name_i+target_param, lbl_param_s, nda_lbl)
+
+            if nda_lbl is not None:
                 train_tiles.append(nda)
                 lbl_train_tiles.append(nda_lbl)
+                cnt_kept += 1
 
     j_start = num_y_train * tile_width + 2*tile_width
     for j in range(num_y_valid):
@@ -209,17 +220,27 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb
             i_a = i_start + i * i_skip
             i_b = i_a + tile_width
 
+            cnt_total += 1
+
             if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]):
                 continue
             elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]):
                 continue
 
             nda = data[:, j_a:j_b, i_a:i_b]
+            nda = keep_tile(group_name_m+target_param, param_s, nda)
+            if nda is None:
+                continue
+
             nda_lbl = label[:, j_a * 2:j_b * 2, i_a * 2:i_b * 2]
-            nda = keep_tile(param_s, nda)
-            if nda is not None:
+            nda_lbl = keep_tile(group_name_i+target_param, lbl_param_s, nda_lbl)
+
+            if nda_lbl is not None:
                 valid_tiles.append(nda)
                 lbl_valid_tiles.append(nda_lbl)
+                cnt_kept += 1
+
+    return cnt_total, cnt_kept
 
 
 def scan(directory):
-- 
GitLab