From 8ed9c2d7dcf5b539bd86e30cbfce11fbc52498fa Mon Sep 17 00:00:00 2001
From: tomrink <rink@ssec.wisc.edu>
Date: Wed, 15 Feb 2023 11:40:01 -0600
Subject: [PATCH] snapshot...

---
 modules/util/viirs_surfrad.py | 73 ++++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 36 deletions(-)

diff --git a/modules/util/viirs_surfrad.py b/modules/util/viirs_surfrad.py
index a9e71a03..d86c5d8b 100644
--- a/modules/util/viirs_surfrad.py
+++ b/modules/util/viirs_surfrad.py
@@ -39,10 +39,10 @@ def keep_tile(param, param_s, tile):
 def process_cld_prob_(grd_k):
     keep = np.invert(np.isnan(grd_k))
     num_keep = np.sum(keep)
-    if num_keep / grd_k.size < 0.98:
-        return None
-    keep = np.where(keep, np.logical_and(0.05 < grd_k, grd_k < 0.95), False)
-    if np.sum(keep)/num_keep < 0.50:
+    # if num_keep / grd_k.size < 0.98:
+    #     return None
+    keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 0.90), False)
+    if np.sum(keep)/num_keep < 0.25:
         return None
     grd_k = np.where(np.invert(keep), 0, grd_k)
     return grd_k
@@ -51,8 +51,8 @@ def process_cld_prob_(grd_k):
 def process_cld_opd_(grd_k):
     keep = np.invert(np.isnan(grd_k))
     num_keep = np.sum(keep)
-    if num_keep / grd_k.size < 0.98:
-        return None
+    # if num_keep / grd_k.size < 0.98:
+    #     return None
     grd_k = np.where(np.invert(keep), 0, grd_k)
     keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False)
     if np.sum(keep)/num_keep < 0.50:
@@ -78,6 +78,7 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
 
     num_files = len(data_files)
     print('Start, number of files: ', num_files)
+    kept_cnt = 0
 
     for idx, data_f in enumerate(data_files):
         # if idx % 4 == 0:  # if we want to skip some files
@@ -96,42 +97,42 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
                 print(e)
                 h5f.close()
                 continue
-
-            print(data_f, int(100 * (kept/total)))
+            kept_cnt += kept
+            print(data_f, kept_cnt, int(100 * (kept/total)))
             f_cnt += 1
             h5f.close()
 
             if len(data_train_tiles) == 0:
                 continue
 
-            if (f_cnt % 5) == 0:
-                num_valid_samples = 0
-                if len(data_valid_tiles) > 0:
-                    label_valid = np.stack(label_valid_tiles)
-                    data_valid = np.stack(data_valid_tiles)
-                    np.save(out_directory + 'data_valid_' + str(cnt), data_valid)
-                    np.save(out_directory + 'label_valid_' + str(cnt), label_valid)
-                    num_valid_samples = data_valid.shape[0]
-
-                label_train = np.stack(label_train_tiles)
-                data_train = np.stack(data_train_tiles)
-                np.save(out_directory + 'label_train_' + str(cnt), label_train)
-                np.save(out_directory + 'data_train_' + str(cnt), data_train)
-                num_train_samples = data_train.shape[0]
-
-                label_valid_tiles = []
-                label_train_tiles = []
-                data_valid_tiles = []
-                data_train_tiles = []
-
-                print('  num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
-                total_num_train_samples += num_train_samples
-                total_num_valid_samples += num_valid_samples
-                print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
-
-                cnt += 1
-
-    print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
+    #         if (f_cnt % 5) == 0:
+    #             num_valid_samples = 0
+    #             if len(data_valid_tiles) > 0:
+    #                 label_valid = np.stack(label_valid_tiles)
+    #                 data_valid = np.stack(data_valid_tiles)
+    #                 np.save(out_directory + 'data_valid_' + str(cnt), data_valid)
+    #                 np.save(out_directory + 'label_valid_' + str(cnt), label_valid)
+    #                 num_valid_samples = data_valid.shape[0]
+    #
+    #             label_train = np.stack(label_train_tiles)
+    #             data_train = np.stack(data_train_tiles)
+    #             np.save(out_directory + 'label_train_' + str(cnt), label_train)
+    #             np.save(out_directory + 'data_train_' + str(cnt), data_train)
+    #             num_train_samples = data_train.shape[0]
+    #
+    #             label_valid_tiles = []
+    #             label_train_tiles = []
+    #             data_valid_tiles = []
+    #             data_train_tiles = []
+    #
+    #             print('  num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
+    #             total_num_train_samples += num_train_samples
+    #             total_num_valid_samples += num_valid_samples
+    #             print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
+    #
+    #             cnt += 1
+    #
+    # print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
 
 
 #  tile_width: Must be even!
-- 
GitLab