From 540fefe96a0bef7767e5d99390e44de90949a6d4 Mon Sep 17 00:00:00 2001
From: tomrink <rink@ssec.wisc.edu>
Date: Tue, 21 Nov 2023 12:49:40 -0600
Subject: [PATCH] snapshot...

---
 modules/icing/pirep_goes.py | 149 +++++++++---------------------------
 1 file changed, 38 insertions(+), 111 deletions(-)

diff --git a/modules/icing/pirep_goes.py b/modules/icing/pirep_goes.py
index 6bfa7fa1..e22d28ad 100644
--- a/modules/icing/pirep_goes.py
+++ b/modules/icing/pirep_goes.py
@@ -1779,7 +1779,7 @@ def fov_extract(icing_files, no_icing_files, trnfile='/home/rink/fovs_l1b_train.
 
 
 def tile_extract(icing_files, no_icing_files, trnfile='/home/rink/tiles_train.h5', vldfile='/home/rink/tiles_valid.h5', tstfile='/home/rink/tiles_test.h5', L1B_or_L2='L1B',
-                 cld_mask_name='cloud_mask', augment=False, do_split=True):
+                 cld_mask_name='cloud_mask', augment=False, do_split=True, has_test=True):
     # 16x16
     n_a, n_b = 12, 28
     m_a, m_b = 12, 28
@@ -1908,9 +1908,10 @@ def tile_extract(icing_files, no_icing_files, trnfile='/home/rink/tiles_train.h5
     icing_alt = icing_alt[ds_indexes]
 
     if do_split:
-        trn_idxs, vld_idxs, tst_idxs = split_data(icing_times)
+        trn_idxs, vld_idxs, tst_idxs = split_data(icing_times, has_test)
         # Below for no test data, just train and valid
-        # trn_idxs = np.concatenate([trn_idxs, tst_idxs])
+        if has_test:  # for no test data, just train and valid
+            trn_idxs = np.concatenate([trn_idxs, tst_idxs])
     else:
         trn_idxs = np.arange(icing_intensity.shape[0])
         tst_idxs = None
@@ -1996,28 +1997,27 @@ def tile_extract(icing_files, no_icing_files, trnfile='/home/rink/tiles_train.h5
     write_file(trnfile, params, param_types, trn_data_dct, trn_icing_intensity, trn_icing_times, trn_icing_lons, trn_icing_lats, trn_icing_alt)
 
     if do_split:
-        # --- Comment out this block for no test data, just valid and train
-        tst_data_dct = {}
-        for ds_name in params:
-            tst_data_dct[ds_name] = data_dct[ds_name][tst_idxs,]
-        tst_icing_intensity = icing_intensity[tst_idxs,]
-        tst_icing_times = icing_times[tst_idxs,]
-        tst_icing_lons = icing_lons[tst_idxs,]
-        tst_icing_lats = icing_lats[tst_idxs,]
-        tst_icing_alt = icing_alt[tst_idxs,]
-
-        # do sort
-        ds_indexes = np.argsort(tst_icing_times)
-        for ds_name in params:
-            tst_data_dct[ds_name] = tst_data_dct[ds_name][ds_indexes]
-        tst_icing_intensity = tst_icing_intensity[ds_indexes]
-        tst_icing_times = tst_icing_times[ds_indexes]
-        tst_icing_lons = tst_icing_lons[ds_indexes]
-        tst_icing_lats = tst_icing_lats[ds_indexes]
-        tst_icing_alt = tst_icing_alt[ds_indexes]
+        if has_test:
+            tst_data_dct = {}
+            for ds_name in params:
+                tst_data_dct[ds_name] = data_dct[ds_name][tst_idxs,]
+            tst_icing_intensity = icing_intensity[tst_idxs,]
+            tst_icing_times = icing_times[tst_idxs,]
+            tst_icing_lons = icing_lons[tst_idxs,]
+            tst_icing_lats = icing_lats[tst_idxs,]
+            tst_icing_alt = icing_alt[tst_idxs,]
+
+            # do sort
+            ds_indexes = np.argsort(tst_icing_times)
+            for ds_name in params:
+                tst_data_dct[ds_name] = tst_data_dct[ds_name][ds_indexes]
+            tst_icing_intensity = tst_icing_intensity[ds_indexes]
+            tst_icing_times = tst_icing_times[ds_indexes]
+            tst_icing_lons = tst_icing_lons[ds_indexes]
+            tst_icing_lats = tst_icing_lats[ds_indexes]
+            tst_icing_alt = tst_icing_alt[ds_indexes]
 
-        write_file(tstfile, params, param_types, tst_data_dct, tst_icing_intensity, tst_icing_times, tst_icing_lons, tst_icing_lats, tst_icing_alt)
-        # -------------------------------------------------------------------
+            write_file(tstfile, params, param_types, tst_data_dct, tst_icing_intensity, tst_icing_times, tst_icing_lons, tst_icing_lats, tst_icing_alt)
 
         vld_data_dct = {}
         for ds_name in params:
@@ -2268,28 +2268,8 @@ def run_mean_std_3(train_file_path, check_cloudy=False, params=train_params_day)
     pickle.dump(mean_std_lo_hi_dct, f)
     f.close()
 
-# Special VIIRS date/times -----------------------------------------------------------
-# time_ranges = [[get_timestamp('2019-03-12_00:00'), get_timestamp('2019-03-15_23:59')],
-#                [get_timestamp('2019-03-20_00:00'), get_timestamp('2019-03-26_23:59')],
-#                [get_timestamp('2019-03-29_00:00'), get_timestamp('2019-03-29_23:59')],
-#                [get_timestamp('2019-04-17_00:00'), get_timestamp('2019-04-17_23:59')],
-#                [get_timestamp('2019-04-20_00:00'), get_timestamp('2019-04-20_23:59')],
-#                [get_timestamp('2019-04-23_00:00'), get_timestamp('2019-04-23_23:59')],
-#                [get_timestamp('2019-04-27_00:00'), get_timestamp('2019-04-27_23:59')],
-#                [get_timestamp('2019-04-29_00:00'), get_timestamp('2019-04-29_23:59')],
-#                [get_timestamp('2019-05-09_00:00'), get_timestamp('2019-05-09_23:59')],
-#                [get_timestamp('2019-05-12_00:00'), get_timestamp('2019-05-12_23:59')],
-#                [get_timestamp('2019-05-14_00:00'), get_timestamp('2019-05-14_23:59')],
-#                [get_timestamp('2019-05-19_00:00'), get_timestamp('2019-05-19_23:59')],
-#                [get_timestamp('2019-06-13_00:00'), get_timestamp('2019-06-13_23:59')],
-#                [get_timestamp('2019-07-22_00:00'), get_timestamp('2019-07-22_23:59')],
-#                [get_timestamp('2019-07-24_00:00'), get_timestamp('2019-07-24_29:59')],
-#                [get_timestamp('2019-08-20_00:00'), get_timestamp('2019-08-20_23:59')],
-#                [get_timestamp('2019-09-09_00:00'), get_timestamp('2019-09-09_23:59')],
-#                [get_timestamp('2019-09-09_00:00'), get_timestamp('2019-09-11_23:59')]]
-
-
-def split_data(times):
+
+def split_data(times, has_test):
     time_idxs = np.arange(times.shape[0])
 
     time_ranges = [[get_timestamp('2018-01-01_00:00'), get_timestamp('2018-01-07_23:59')],
@@ -2310,35 +2290,6 @@ def split_data(times):
                    [get_timestamp('2022-03-01_00:00'), get_timestamp('2022-03-07_23:59')],
                    [get_timestamp('2022-04-01_00:00'), get_timestamp('2022-04-04_23:59')]]
 
-    # time_ranges_lunar = [[get_timestamp('2018-01-01_00:00'), get_timestamp('2018-01-06_23:59')],
-    #                [get_timestamp('2018-03-01_00:00'), get_timestamp('2018-03-06_23:59')],
-    #                [get_timestamp('2018-05-01_00:00'), get_timestamp('2018-05-06_23:59')],
-    #                [get_timestamp('2018-07-01_00:00'), get_timestamp('2018-07-06_23:59')],
-    #                [get_timestamp('2018-09-01_00:00'), get_timestamp('2018-09-06_23:59')],
-    #                [get_timestamp('2018-11-01_00:00'), get_timestamp('2018-11-06_23:59')],
-    #                [get_timestamp('2019-01-01_00:00'), get_timestamp('2019-01-06_23:59')],
-    #                #[get_timestamp('2019-03-01_00:00'), get_timestamp('2019-03-07_23:59')],
-    #                [get_timestamp('2019-03-12_00:00'), get_timestamp('2019-03-15_23:59')],
-    #                [get_timestamp('2019-03-20_00:00'), get_timestamp('2019-03-26_23:59')],
-    #                [get_timestamp('2019-03-29_00:00'), get_timestamp('2019-03-29_23:59')],
-    #                [get_timestamp('2019-05-01_00:00'), get_timestamp('2019-05-03_23:59')],
-    #                [get_timestamp('2019-05-09_00:00'), get_timestamp('2019-05-09_23:59')],
-    #                [get_timestamp('2019-05-12_00:00'), get_timestamp('2019-05-12_23:59')],
-    #                [get_timestamp('2019-05-14_00:00'), get_timestamp('2019-05-14_23:59')],
-    #                [get_timestamp('2019-05-19_00:00'), get_timestamp('2019-05-19_23:59')],
-    #                [get_timestamp('2019-07-01_00:00'), get_timestamp('2019-07-05_23:59')],
-    #                [get_timestamp('2019-07-22_00:00'), get_timestamp('2019-07-22_23:59')],
-    #                [get_timestamp('2019-07-24_00:00'), get_timestamp('2019-07-24_23:59')],
-    #                [get_timestamp('2019-09-01_00:00'), get_timestamp('2019-09-05_23:59')],
-    #                [get_timestamp('2019-09-09_00:00'), get_timestamp('2019-09-09_23:59')],
-    #                [get_timestamp('2019-09-09_00:00'), get_timestamp('2019-09-11_23:59')],
-    #                [get_timestamp('2019-11-01_00:00'), get_timestamp('2019-11-07_23:59')],
-    #                [get_timestamp('2021-09-24_00:00'), get_timestamp('2021-10-01_23:59')],
-    #                [get_timestamp('2021-11-01_00:00'), get_timestamp('2021-11-07_23:59')],
-    #                [get_timestamp('2022-01-01_00:00'), get_timestamp('2022-01-07_23:59')],
-    #                [get_timestamp('2022-03-01_00:00'), get_timestamp('2022-03-07_23:59')],
-    #                [get_timestamp('2022-04-01_00:00'), get_timestamp('2022-04-04_23:59')]]
-
     keep_out = 10800  # 3 hrs
 
     vld_time_idxs = []
@@ -2348,7 +2299,6 @@ def split_data(times):
         tidxs = np.searchsorted(times, t_rng)
         vld_time_idxs.append(np.arange(tidxs[0], tidxs[1], 1))
     vld_time_idxs = np.concatenate(vld_time_idxs, axis=None)
-    # train_time_idxs = time_idxs[np.in1d(time_idxs, vld_time_idxs, invert=True)]
 
     time_ranges = [[get_timestamp('2018-02-01_00:00'), get_timestamp('2018-02-04_23:59')],
                    [get_timestamp('2018-04-01_00:00'), get_timestamp('2018-04-04_23:59')],
@@ -2374,42 +2324,19 @@ def split_data(times):
                    [get_timestamp('2022-11-01_00:00'), get_timestamp('2022-11-05_23:59')],
                    [get_timestamp('2022-11-10_00:00'), get_timestamp('2022-12-01_23:59')]]
 
-    # time_ranges_lunar = [[get_timestamp('2018-02-01_00:00'), get_timestamp('2018-02-04_23:59')],
-    #                [get_timestamp('2018-04-01_00:00'), get_timestamp('2018-04-04_23:59')],
-    #                [get_timestamp('2018-06-01_00:00'), get_timestamp('2018-06-04_23:59')],
-    #                [get_timestamp('2018-08-01_00:00'), get_timestamp('2018-08-04_23:59')],
-    #                [get_timestamp('2018-10-01_00:00'), get_timestamp('2018-10-04_23:59')],
-    #                [get_timestamp('2018-12-01_00:00'), get_timestamp('2018-12-04_23:59')],
-    #                [get_timestamp('2019-02-01_00:00'), get_timestamp('2019-02-04_23:59')],
-    #                #[get_timestamp('2019-04-01_00:00'), get_timestamp('2019-04-04_23:59')],
-    #                [get_timestamp('2019-04-17_00:00'), get_timestamp('2019-04-17_23:59')],
-    #                [get_timestamp('2019-04-20_00:00'), get_timestamp('2019-04-20_23:59')],
-    #                [get_timestamp('2019-04-23_00:00'), get_timestamp('2019-04-23_23:59')],
-    #                [get_timestamp('2019-04-27_00:00'), get_timestamp('2019-04-27_23:59')],
-    #                [get_timestamp('2019-04-29_00:00'), get_timestamp('2019-04-29_23:59')],
-    #                [get_timestamp('2019-06-01_00:00'), get_timestamp('2019-06-03_23:59')],
-    #                [get_timestamp('2019-06-13_00:00'), get_timestamp('2019-06-13_23:59')],
-    #                [get_timestamp('2019-08-01_00:00'), get_timestamp('2019-08-03_23:59')],
-    #                [get_timestamp('2019-08-20_00:00'), get_timestamp('2019-08-20_23:59')],
-    #                [get_timestamp('2019-10-01_00:00'), get_timestamp('2019-10-04_23:59')],
-    #                [get_timestamp('2019-12-01_00:00'), get_timestamp('2019-12-04_23:59')],
-    #
-    #                [get_timestamp('2021-10-05_00:00'), get_timestamp('2021-10-10_23:59')],
-    #                [get_timestamp('2021-12-01_00:00'), get_timestamp('2021-12-04_23:59')],
-    #                [get_timestamp('2022-02-01_00:00'), get_timestamp('2022-02-04_23:59')],
-    #                [get_timestamp('2022-03-26_00:00'), get_timestamp('2022-03-30_23:59')],
-    #                [get_timestamp('2022-04-07_00:00'), get_timestamp('2022-04-10_23:59')]]
-
     tst_time_idxs = []
-    for t_rng in time_ranges:
-        t_rng[0] -= keep_out
-        t_rng[1] += keep_out
-        tidxs = np.searchsorted(times, t_rng)
-        tst_time_idxs.append(np.arange(tidxs[0], tidxs[1], 1))
-    tst_time_idxs = np.concatenate(tst_time_idxs, axis=None)
-
-    vld_tst_time_idxs = np.concatenate([vld_time_idxs, tst_time_idxs])
-    vld_tst_time_idxs = np.sort(vld_tst_time_idxs)
+    if has_test:
+        for t_rng in time_ranges:
+            t_rng[0] -= keep_out
+            t_rng[1] += keep_out
+            tidxs = np.searchsorted(times, t_rng)
+            tst_time_idxs.append(np.arange(tidxs[0], tidxs[1], 1))
+        tst_time_idxs = np.concatenate(tst_time_idxs, axis=None)
+
+        vld_tst_time_idxs = np.concatenate([vld_time_idxs, tst_time_idxs])
+        vld_tst_time_idxs = np.sort(vld_tst_time_idxs)
+
+    vld_tst_time_idxs = np.sort(vld_time_idxs)
 
     train_time_idxs = time_idxs[np.in1d(time_idxs, vld_tst_time_idxs, invert=True)]
 
-- 
GitLab