From 9888cd950db16f136bf04f5f6bb515bee45d0e3c Mon Sep 17 00:00:00 2001
From: tomrink <rink@ssec.wisc.edu>
Date: Tue, 14 Mar 2023 14:10:36 -0500
Subject: [PATCH] snapshot...

---
 modules/util/viirs_surfrad.py | 98 ++++++++++++++++++-----------------
 1 file changed, 50 insertions(+), 48 deletions(-)

diff --git a/modules/util/viirs_surfrad.py b/modules/util/viirs_surfrad.py
index b90b3da1..d4b75f02 100644
--- a/modules/util/viirs_surfrad.py
+++ b/modules/util/viirs_surfrad.py
@@ -37,10 +37,11 @@ group_name_m = 'orig/'
 
 solzen_name = group_name_m + 'solar_zenith'
 
-label_params = [group_name_i+target_param]
-data_params = [group_name_m+'temp_11_0um', group_name_m+'refl_0_65um', group_name_m+target_param]
-param_idx_m = data_params.index(group_name_m + target_param)
-param_idx_i = label_params.index(group_name_i + target_param)
+params_i = [group_name_i+target_param]
+params_m = [group_name_m+'temp_11_0um', group_name_m+'refl_0_65um', group_name_m+target_param]
+
+param_idx_m = params_m.index(group_name_m + target_param)
+param_idx_i = params_i.index(group_name_i + target_param)
 
 
 def is_missing(p_idx, tile):
@@ -97,10 +98,10 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
     all_files = glob.glob(path, recursive=True)
     data_files = [f for f in all_files if f not in keep_out]
 
-    label_valid_tiles = []
-    label_train_tiles = []
-    data_valid_tiles = []
-    data_train_tiles = []
+    valid_tiles_i = []
+    train_tiles_i = []
+    valid_tiles_m = []
+    train_tiles_m = []
     f_cnt = 0
 
     num_files = len(data_files)
@@ -118,8 +119,8 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
                 continue
 
             try:
-                num_not_missing = run(h5f, data_params, data_train_tiles, data_valid_tiles,
-                                      label_params, label_train_tiles, label_valid_tiles,
+                num_not_missing = run(h5f, params_m, train_tiles_m, valid_tiles_m,
+                                      params_i, train_tiles_i, valid_tiles_i,
                                       num_keep_x_tiles=num_keep_x_tiles, tile_width=64, kernel_size=7, day_night=day_night)
             except Exception as e:
                 print(e)
@@ -131,30 +132,30 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
 
             total_num_not_missing += num_not_missing
 
-            if len(data_train_tiles) == 0 and len(data_valid_tiles) == 0:
+            if len(train_tiles_m) == 0 and len(valid_tiles_m) == 0:
                 continue
 
             if (f_cnt % 5) == 0:
                 num_valid_samples = 0
-                if len(data_valid_tiles) > 0:
-                    label_valid = np.stack(label_valid_tiles)
-                    data_valid = np.stack(data_valid_tiles)
-                    np.save(out_directory + 'data_valid_' + str(cnt), data_valid)
-                    np.save(out_directory + 'label_valid_' + str(cnt), label_valid)
-                    num_valid_samples = data_valid.shape[0]
+                if len(valid_tiles_m) > 0:
+                    valid_i = np.stack(valid_tiles_i)
+                    valid_m = np.stack(valid_tiles_m)
+                    np.save(out_directory + 'valid_mres_' + str(cnt), valid_m)
+                    np.save(out_directory + 'valid_ires_' + str(cnt), valid_i)
+                    num_valid_samples = valid_m.shape[0]
 
                 num_train_samples = 0
-                if len(data_train_tiles) > 0:
-                    label_train = np.stack(label_train_tiles)
-                    data_train = np.stack(data_train_tiles)
-                    np.save(out_directory + 'label_train_' + str(cnt), label_train)
-                    np.save(out_directory + 'data_train_' + str(cnt), data_train)
-                    num_train_samples = data_train.shape[0]
-
-                label_valid_tiles = []
-                label_train_tiles = []
-                data_valid_tiles = []
-                data_train_tiles = []
+                if len(train_tiles_m) > 0:
+                    train_i = np.stack(train_tiles_i)
+                    train_m = np.stack(train_tiles_m)
+                    np.save(out_directory + 'train_ires_' + str(cnt), train_i)
+                    np.save(out_directory + 'train_mres' + str(cnt), train_m)
+                    num_train_samples = train_m.shape[0]
+
+                valid_tiles_i = []
+                train_tiles_i = []
+                valid_tiles_m = []
+                train_tiles_m = []
 
                 print('  num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
                 total_num_train_samples += num_train_samples
@@ -170,12 +171,12 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
 
 #  tile_width: Must be even!
 #  kernel_size: Must be odd!
-def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lbl_valid_tiles,
+def run(h5f, params_m, train_tiles_m, valid_tiles_m, params_i, train_tiles_i, valid_tiles_i,
         num_keep_x_tiles=8, tile_width=64, kernel_size=3, day_night='ANY'):
 
     border = int((kernel_size - 1)/2) + 1  # Need to add for interpolation with no edge effects
 
-    param_name = param_s[0]
+    param_name = params_m[0]
 
     num_lines = h5f[param_name].shape[0]
     num_pixels = h5f[param_name].shape[1]  # Must be even
@@ -184,24 +185,24 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb
         solzen = get_grid_values(h5f, solzen_name, 0, 0, None, num_lines, num_pixels)
 
     grd_s = []
-    for param in param_s:
+    for param in params_m:
         try:
             grd = get_grid_values(h5f, param, 0, 0, None, num_lines, num_pixels)
             grd_s.append(grd)
         except Exception as e:
             print(e)
             return
-    data = np.stack(grd_s)
+    data_m = np.stack(grd_s)
 
     grd_s = []
-    for param in lbl_param_s:
+    for param in params_i:
         try:
             grd = get_grid_values(h5f, param, 0, 0, None, num_lines*2, num_pixels*2)
             grd_s.append(grd)
         except Exception as e:
             print(e)
             return
-    label = np.stack(grd_s)
+    data_i = np.stack(grd_s)
 
     tile_width += 2 * border
 
@@ -212,8 +213,8 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb
 
     num_y_tiles = int(num_lines / tile_width) - 1
 
-    data_tiles = []
-    lbl_tiles = []
+    data_tiles_m = []
+    data_tiles_i = []
     num_not_missing = 0
 
     for j in range(num_y_tiles):
@@ -229,26 +230,27 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb
             elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]):
                 continue
 
-            nda = data[:, j_a:j_b, i_a:i_b]
-            nda_lbl = label[:, j_a*2:j_b*2, i_a*2:i_b*2]
-            if is_missing(param_idx_i, nda_lbl):
+            nda_m = data_m[:, j_a:j_b, i_a:i_b]
+            nda_i = data_i[:, j_a*2:j_b*2, i_a*2:i_b*2]
+            if is_missing(param_idx_i, nda_i):
                 continue
             num_not_missing += 1
 
-            nda_lbl = keep_tile(param_idx_i, nda_lbl)
-            if nda_lbl is not None:
-                data_tiles.append(nda)
-                lbl_tiles.append(nda_lbl)
+            nda_i = keep_tile(param_idx_i, nda_i)
+            if nda_i is not None:
+                data_tiles_m.append(nda_m)
+                data_tiles_i.append(nda_i)
 
-    num_tiles = len(lbl_tiles)
+    num_tiles = len(data_tiles_i)
     num_valid = int(num_tiles * 0.10)
     num_train = num_tiles - num_valid
 
     for k in range(num_train):
-        train_tiles.append(data_tiles[k])
-        lbl_train_tiles.append(lbl_tiles[k])
+        train_tiles_m.append(data_tiles_m[k])
+        train_tiles_i.append(data_tiles_i[k])
+
     for k in range(num_valid):
-        valid_tiles.append(data_tiles[num_train + k])
-        lbl_valid_tiles.append(lbl_tiles[num_train + k])
+        valid_tiles_m.append(data_tiles_m[num_train + k])
+        valid_tiles_i.append(data_tiles_i[num_train + k])
 
     return num_not_missing
-- 
GitLab