From 0d7af0c9609f7dd550e8e082f98738fde4f6cdbe Mon Sep 17 00:00:00 2001
From: tomrink <rink@ssec.wisc.edu>
Date: Thu, 13 Apr 2023 13:50:00 -0500
Subject: [PATCH] snapshot...

---
 modules/util/abi_surfrad.py | 166 +++++++++++++++++++-----------------
 1 file changed, 86 insertions(+), 80 deletions(-)

diff --git a/modules/util/abi_surfrad.py b/modules/util/abi_surfrad.py
index fb03c1ec..19faccf1 100644
--- a/modules/util/abi_surfrad.py
+++ b/modules/util/abi_surfrad.py
@@ -3,38 +3,14 @@ import h5py
 from util.util import get_grid_values, is_day
 import glob
 
-keep_out_opd = ['/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/arm/2019/11/02/clavrx_VNP02IMG.A2019306.1912.001.2019307003236.uwssec.nc',
-                '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/arm/2019/04/13/clavrx_VNP02IMG.A2019103.1918.001.2019104005120.uwssec.nc',
-                '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/sioux_falls/2019/05/25/clavrx_VNP02IMG.A2019145.1936.001.2019146005424.uwssec.nc',
-                '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/sioux_falls/2019/11/01/clavrx_VNP02IMG.A2019305.1936.001.2019306005913.uwssec.nc',
-                '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/sioux_falls/2019/03/01/clavrx_VNP02IMG.A2019060.1930.001.2019061005942.uwssec.nc',
-                '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/table_mountain/2019/12/01/clavrx_VNP02IMG.A2019335.2012.001.2019336013827.uwssec.nc',
-                '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/table_mountain/2019/05/18/clavrx_VNP02IMG.A2019138.2006.001.2019139013059.uwssec.nc',
-                '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/fort_peck/2019/01/28/clavrx_VNP02IMG.A2019028.1930.001.2019029005408.uwssec.nc',
-                '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/fort_peck/2019/08/08/clavrx_VNP02IMG.A2019220.1930.001.2019221010714.uwssec.nc',
-                '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/madison/2019/10/13/clavrx_VNP02IMG.A2019286.1848.001.2019287001722.uwssec.nc',
-                '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/madison/2019/03/20/clavrx_VNP02IMG.A2019079.1830.001.2019079235918.uwssec.nc',
-                '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/madison/2019/12/26/clavrx_VNP02IMG.A2019360.1900.001.2019361001327.uwssec.nc',
-                '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/desert_rock/2019/02/05/clavrx_VNP02IMG.A2019036.2018.001.2019037030301.uwssec.nc',
-                '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/desert_rock/2019/03/30/clavrx_VNP02IMG.A2019089.2024.001.2019090015614.uwssec.nc',
-                '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/bondville_il/2019/11/03/clavrx_VNP02IMG.A2019307.1854.001.2019308001716.uwssec.nc',
-                '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/goodwin_creek/2019/04/15/clavrx_VNP02IMG.A2019105.1842.001.2019106001003.uwssec.nc',
-                '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/penn_state/2019/07/18/clavrx_VNP02IMG.A2019199.1742.001.2019199230925.uwssec.nc',
-                '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/penn_state/2019/02/02/clavrx_VNP02IMG.A2019033.1754.001.2019034011318.uwssec.nc']
-
-keep_out = keep_out_opd
-
-
-# target_param = 'cloud_probability'
-target_param = 'cld_opd_dcomp'
+target_param = 'cloud_probability'
+# target_param = 'cld_opd_dcomp'
 
 group_name_i = 'super/'
 group_name_m = 'orig/'
 
 solzen_name = group_name_m + 'solar_zenith'
 
-# params_i = [group_name_i+'temp_11_0um', group_name_i+'refl_0_65um', group_name_i+target_param]
-# params_m = [group_name_m+'temp_11_0um', group_name_m+'refl_0_65um', group_name_m+target_param]
 params_i = [group_name_i+'temp_ch38', group_name_i+'refl_ch01', group_name_i+target_param]
 params_m = [group_name_m+'temp_ch38', group_name_m+'refl_ch01', group_name_m+target_param]
 
@@ -86,7 +62,6 @@ def process_cld_opd(grd_k):
     grd_k = np.where(np.invert(keep), 0, grd_k)  # Convert NaN to 0
     return grd_k
 
-# glob.glob('/ships19/cloud/scratch/cphillips/super_abi_l2/goodwin_creek_v3/2020/*/0[1-4]/*/*.nc', recursive=True)
 
 def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', start=10):
     cnt = start
@@ -97,21 +72,19 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
     path = directory + '**' + '/' + pattern
 
     all_files = glob.glob(path, recursive=True)
-    data_files = [f for f in all_files if f not in keep_out]
-    # data_files = glob.glob(path, recursive=True)
+    valid_files = glob.glob(directory + '*/0[1-4]/*/*.nc', recursive=True)
+    train_files = [f for f in all_files if f not in valid_files]
 
-    valid_tiles_i = []
-    train_tiles_i = []
-    valid_tiles_m = []
-    train_tiles_m = []
+    data_tiles_i = []
+    data_tiles_m = []
     f_cnt = 0
 
-    num_files = len(data_files)
+    num_files = len(all_files)
     print('Start, number of files: ', num_files)
 
     total_num_not_missing = 0
 
-    for idx, data_f in enumerate(data_files):
+    for idx, data_f in enumerate(valid_files):
         # if idx % 4 == 0:  # if we want to skip some files
         if True:
             try:
@@ -121,8 +94,7 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
                 continue
 
             try:
-                num_not_missing = run(h5f, params_m, train_tiles_m, valid_tiles_m,
-                                      params_i, train_tiles_i, valid_tiles_i,
+                num_not_missing = run(h5f, params_m, data_tiles_m, params_i, data_tiles_i,
                                       num_keep_x_tiles=num_keep_x_tiles, tile_width=16, kernel_size=4, factor=4, day_night=day_night)
             except Exception as e:
                 print(e)
@@ -134,59 +106,98 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
 
             total_num_not_missing += num_not_missing
 
-            if len(train_tiles_m) == 0 and len(valid_tiles_m) == 0:
+            if len(data_tiles_m) == 0:
                 continue
 
             if (f_cnt % 20) == 0:
                 num_valid_samples = 0
-                if len(valid_tiles_m) > 0:
-                    valid_i = np.stack(valid_tiles_i)
-                    valid_m = np.stack(valid_tiles_m)
+                if len(data_tiles_m) > 0:
+                    valid_i = np.stack(data_tiles_i)
+                    valid_m = np.stack(data_tiles_m)
                     np.save(out_directory + 'valid_mres_' + str(cnt), valid_m)
                     np.save(out_directory + 'valid_ires_' + str(cnt), valid_i)
                     num_valid_samples = valid_m.shape[0]
 
-                num_train_samples = 0
-                if len(train_tiles_m) > 0:
-                    train_i = np.stack(train_tiles_i)
-                    train_m = np.stack(train_tiles_m)
-                    np.save(out_directory + 'train_ires_' + str(cnt), train_i)
-                    np.save(out_directory + 'train_mres_' + str(cnt), train_m)
-                    num_train_samples = train_m.shape[0]
-
-                valid_tiles_i = []
-                train_tiles_i = []
-                valid_tiles_m = []
-                train_tiles_m = []
+                data_tiles_i = []
+                data_tiles_m = []
 
-                print('  num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
-                total_num_train_samples += num_train_samples
+                print('  num_valid_samples, progress % : ', num_valid_samples, int((f_cnt/num_files)*100))
                 total_num_valid_samples += num_valid_samples
-                print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ', total_num_train_samples,
-                      total_num_valid_samples, total_num_not_missing)
+                print('total_num_valid_samples, total_num_not_missing: ', total_num_valid_samples, total_num_not_missing)
                 print('--------------------------------------------------')
 
                 cnt += 1
 
     # Write out leftover, if any. Maybe make this better someday
     num_valid_samples = 0
-    if len(valid_tiles_m) > 0:
-        valid_i = np.stack(valid_tiles_i)
-        valid_m = np.stack(valid_tiles_m)
+    if len(data_tiles_m) > 0:
+        valid_i = np.stack(data_tiles_i)
+        valid_m = np.stack(data_tiles_m)
         np.save(out_directory + 'valid_mres_' + str(cnt), valid_m)
         np.save(out_directory + 'valid_ires_' + str(cnt), valid_i)
         num_valid_samples = valid_m.shape[0]
 
+    data_tiles_i = []
+    data_tiles_m = []
+    f_cnt = 0
+    total_num_not_missing = 0
+
+    for idx, data_f in enumerate(train_files):
+        # if idx % 4 == 0:  # if we want to skip some files
+        if True:
+            try:
+                h5f = h5py.File(data_f, 'r')
+            except:
+                print('cant open file: ', data_f)
+                continue
+
+            try:
+                num_not_missing = run(h5f, params_m, data_tiles_m, params_i, data_tiles_i,
+                                      num_keep_x_tiles=num_keep_x_tiles, tile_width=16, kernel_size=4, factor=4, day_night=day_night)
+            except Exception as e:
+                print(e)
+                h5f.close()
+                continue
+            print(data_f)
+            f_cnt += 1
+            h5f.close()
+
+            total_num_not_missing += num_not_missing
+
+            if len(data_tiles_m) == 0:
+                continue
+
+            if (f_cnt % 20) == 0:
+                num_train_samples = 0
+                if len(data_tiles_m) > 0:
+                    train_i = np.stack(data_tiles_i)
+                    train_m = np.stack(data_tiles_m)
+                    np.save(out_directory + 'train_ires_' + str(cnt), train_i)
+                    np.save(out_directory + 'train_mres_' + str(cnt), train_m)
+                    num_train_samples = train_m.shape[0]
+
+                data_tiles_i = []
+                data_tiles_m = []
+
+                print('  num_train_samples, progress % : ', num_train_samples, int((f_cnt/num_files)*100))
+                total_num_train_samples += num_train_samples
+                print('total_num_train_samples, total_num_not_missing: ', total_num_train_samples, total_num_not_missing)
+                print('--------------------------------------------------')
+
+                cnt += 1
+
+    # Write out leftover, if any. Maybe make this better someday
     num_train_samples = 0
-    if len(train_tiles_m) > 0:
-        train_i = np.stack(train_tiles_i)
-        train_m = np.stack(train_tiles_m)
+    if len(data_tiles_m) > 0:
+        train_i = np.stack(data_tiles_i)
+        train_m = np.stack(data_tiles_m)
         np.save(out_directory + 'train_ires_' + str(cnt), train_i)
         np.save(out_directory + 'train_mres_' + str(cnt), train_m)
         num_train_samples = train_m.shape[0]
 
     print('  num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples,
           int((f_cnt / num_files) * 100))
+
     total_num_train_samples += num_train_samples
     total_num_valid_samples += num_valid_samples
     print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ', total_num_train_samples,
@@ -198,8 +209,7 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
 
 #  tile_width: Must be even!
 #  kernel_size: Must be odd!
-def run(h5f, params_m, train_tiles_m, valid_tiles_m, params_i, train_tiles_i, valid_tiles_i,
-        num_keep_x_tiles=8, tile_width=64, kernel_size=3, factor=2, day_night='ANY'):
+def run(h5f, params_m, data_tiles_m, params_i, data_tiles_i, num_keep_x_tiles=8, tile_width=64, kernel_size=3, factor=2, day_night='ANY'):
 
     border = int((kernel_size - 1)/2) + 1  # Need to add for interpolation with no edge effects
 
@@ -235,15 +245,11 @@ def run(h5f, params_m, train_tiles_m, valid_tiles_m, params_i, train_tiles_i, va
 
     i_skip = tile_width
     j_skip = tile_width
-    # i_start = int(num_pixels / 2) - int((num_keep_x_tiles * tile_width) / 2)
-    # j_start = 0
     i_start = border - 1  # zero-based
     j_start = border - 1  # zero-based
 
     num_y_tiles = int(num_lines / tile_width) - 1
 
-    data_tiles_m = []
-    data_tiles_i = []
     num_not_missing = 0
 
     for j in range(num_y_tiles):
@@ -270,16 +276,16 @@ def run(h5f, params_m, train_tiles_m, valid_tiles_m, params_i, train_tiles_i, va
                 data_tiles_m.append(nda_m)
                 data_tiles_i.append(nda_i)
 
-    num_tiles = len(data_tiles_i)
-    num_valid = int(num_tiles * 0.10)
-    num_train = num_tiles - num_valid
-
-    for k in range(num_train):
-        train_tiles_m.append(data_tiles_m[k])
-        train_tiles_i.append(data_tiles_i[k])
-
-    for k in range(num_valid):
-        valid_tiles_m.append(data_tiles_m[num_train + k])
-        valid_tiles_i.append(data_tiles_i[num_train + k])
+    # num_tiles = len(data_tiles_i)
+    # num_valid = int(num_tiles * 0.10)
+    # num_train = num_tiles - num_valid
+    #
+    # for k in range(num_train):
+    #     train_tiles_m.append(data_tiles_m[k])
+    #     train_tiles_i.append(data_tiles_i[k])
+    #
+    # for k in range(num_valid):
+    #     valid_tiles_m.append(data_tiles_m[num_train + k])
+    #     valid_tiles_i.append(data_tiles_i[num_train + k])
 
     return num_not_missing
-- 
GitLab