From 540fefe96a0bef7767e5d99390e44de90949a6d4 Mon Sep 17 00:00:00 2001 From: tomrink <rink@ssec.wisc.edu> Date: Tue, 21 Nov 2023 12:49:40 -0600 Subject: [PATCH] snapshot... --- modules/icing/pirep_goes.py | 149 +++++++++--------------------------- 1 file changed, 38 insertions(+), 111 deletions(-) diff --git a/modules/icing/pirep_goes.py b/modules/icing/pirep_goes.py index 6bfa7fa1..e22d28ad 100644 --- a/modules/icing/pirep_goes.py +++ b/modules/icing/pirep_goes.py @@ -1779,7 +1779,7 @@ def fov_extract(icing_files, no_icing_files, trnfile='/home/rink/fovs_l1b_train. def tile_extract(icing_files, no_icing_files, trnfile='/home/rink/tiles_train.h5', vldfile='/home/rink/tiles_valid.h5', tstfile='/home/rink/tiles_test.h5', L1B_or_L2='L1B', - cld_mask_name='cloud_mask', augment=False, do_split=True): + cld_mask_name='cloud_mask', augment=False, do_split=True, has_test=True): # 16x16 n_a, n_b = 12, 28 m_a, m_b = 12, 28 @@ -1908,9 +1908,10 @@ def tile_extract(icing_files, no_icing_files, trnfile='/home/rink/tiles_train.h5 icing_alt = icing_alt[ds_indexes] if do_split: - trn_idxs, vld_idxs, tst_idxs = split_data(icing_times) + trn_idxs, vld_idxs, tst_idxs = split_data(icing_times, has_test) # Below for no test data, just train and valid - # trn_idxs = np.concatenate([trn_idxs, tst_idxs]) + if has_test: # for no test data, just train and valid + trn_idxs = np.concatenate([trn_idxs, tst_idxs]) else: trn_idxs = np.arange(icing_intensity.shape[0]) tst_idxs = None @@ -1996,28 +1997,27 @@ def tile_extract(icing_files, no_icing_files, trnfile='/home/rink/tiles_train.h5 write_file(trnfile, params, param_types, trn_data_dct, trn_icing_intensity, trn_icing_times, trn_icing_lons, trn_icing_lats, trn_icing_alt) if do_split: - # --- Comment out this block for no test data, just valid and train - tst_data_dct = {} - for ds_name in params: - tst_data_dct[ds_name] = data_dct[ds_name][tst_idxs,] - tst_icing_intensity = icing_intensity[tst_idxs,] - tst_icing_times = icing_times[tst_idxs,] - tst_icing_lons = icing_lons[tst_idxs,] - tst_icing_lats = icing_lats[tst_idxs,] - tst_icing_alt = icing_alt[tst_idxs,] - - # do sort - ds_indexes = np.argsort(tst_icing_times) - for ds_name in params: - tst_data_dct[ds_name] = tst_data_dct[ds_name][ds_indexes] - tst_icing_intensity = tst_icing_intensity[ds_indexes] - tst_icing_times = tst_icing_times[ds_indexes] - tst_icing_lons = tst_icing_lons[ds_indexes] - tst_icing_lats = tst_icing_lats[ds_indexes] - tst_icing_alt = tst_icing_alt[ds_indexes] + if has_test: + tst_data_dct = {} + for ds_name in params: + tst_data_dct[ds_name] = data_dct[ds_name][tst_idxs,] + tst_icing_intensity = icing_intensity[tst_idxs,] + tst_icing_times = icing_times[tst_idxs,] + tst_icing_lons = icing_lons[tst_idxs,] + tst_icing_lats = icing_lats[tst_idxs,] + tst_icing_alt = icing_alt[tst_idxs,] + + # do sort + ds_indexes = np.argsort(tst_icing_times) + for ds_name in params: + tst_data_dct[ds_name] = tst_data_dct[ds_name][ds_indexes] + tst_icing_intensity = tst_icing_intensity[ds_indexes] + tst_icing_times = tst_icing_times[ds_indexes] + tst_icing_lons = tst_icing_lons[ds_indexes] + tst_icing_lats = tst_icing_lats[ds_indexes] + tst_icing_alt = tst_icing_alt[ds_indexes] - write_file(tstfile, params, param_types, tst_data_dct, tst_icing_intensity, tst_icing_times, tst_icing_lons, tst_icing_lats, tst_icing_alt) - # ------------------------------------------------------------------- + write_file(tstfile, params, param_types, tst_data_dct, tst_icing_intensity, tst_icing_times, tst_icing_lons, tst_icing_lats, tst_icing_alt) vld_data_dct = {} for ds_name in params: @@ -2268,28 +2268,8 @@ def run_mean_std_3(train_file_path, check_cloudy=False, params=train_params_day) pickle.dump(mean_std_lo_hi_dct, f) f.close() -# Special VIIRS date/times ----------------------------------------------------------- -# time_ranges = [[get_timestamp('2019-03-12_00:00'), get_timestamp('2019-03-15_23:59')], -# [get_timestamp('2019-03-20_00:00'), get_timestamp('2019-03-26_23:59')], -# [get_timestamp('2019-03-29_00:00'), get_timestamp('2019-03-29_23:59')], -# [get_timestamp('2019-04-17_00:00'), get_timestamp('2019-04-17_23:59')], -# [get_timestamp('2019-04-20_00:00'), get_timestamp('2019-04-20_23:59')], -# [get_timestamp('2019-04-23_00:00'), get_timestamp('2019-04-23_23:59')], -# [get_timestamp('2019-04-27_00:00'), get_timestamp('2019-04-27_23:59')], -# [get_timestamp('2019-04-29_00:00'), get_timestamp('2019-04-29_23:59')], -# [get_timestamp('2019-05-09_00:00'), get_timestamp('2019-05-09_23:59')], -# [get_timestamp('2019-05-12_00:00'), get_timestamp('2019-05-12_23:59')], -# [get_timestamp('2019-05-14_00:00'), get_timestamp('2019-05-14_23:59')], -# [get_timestamp('2019-05-19_00:00'), get_timestamp('2019-05-19_23:59')], -# [get_timestamp('2019-06-13_00:00'), get_timestamp('2019-06-13_23:59')], -# [get_timestamp('2019-07-22_00:00'), get_timestamp('2019-07-22_23:59')], -# [get_timestamp('2019-07-24_00:00'), get_timestamp('2019-07-24_29:59')], -# [get_timestamp('2019-08-20_00:00'), get_timestamp('2019-08-20_23:59')], -# [get_timestamp('2019-09-09_00:00'), get_timestamp('2019-09-09_23:59')], -# [get_timestamp('2019-09-09_00:00'), get_timestamp('2019-09-11_23:59')]] - - -def split_data(times): + +def split_data(times, has_test): time_idxs = np.arange(times.shape[0]) time_ranges = [[get_timestamp('2018-01-01_00:00'), get_timestamp('2018-01-07_23:59')], @@ -2310,35 +2290,6 @@ def split_data(times): [get_timestamp('2022-03-01_00:00'), get_timestamp('2022-03-07_23:59')], [get_timestamp('2022-04-01_00:00'), get_timestamp('2022-04-04_23:59')]] - # time_ranges_lunar = [[get_timestamp('2018-01-01_00:00'), get_timestamp('2018-01-06_23:59')], - # [get_timestamp('2018-03-01_00:00'), get_timestamp('2018-03-06_23:59')], - # [get_timestamp('2018-05-01_00:00'), get_timestamp('2018-05-06_23:59')], - # [get_timestamp('2018-07-01_00:00'), get_timestamp('2018-07-06_23:59')], - # [get_timestamp('2018-09-01_00:00'), get_timestamp('2018-09-06_23:59')], - # [get_timestamp('2018-11-01_00:00'), get_timestamp('2018-11-06_23:59')], - # [get_timestamp('2019-01-01_00:00'), get_timestamp('2019-01-06_23:59')], - # #[get_timestamp('2019-03-01_00:00'), get_timestamp('2019-03-07_23:59')], - # [get_timestamp('2019-03-12_00:00'), get_timestamp('2019-03-15_23:59')], - # [get_timestamp('2019-03-20_00:00'), get_timestamp('2019-03-26_23:59')], - # [get_timestamp('2019-03-29_00:00'), get_timestamp('2019-03-29_23:59')], - # [get_timestamp('2019-05-01_00:00'), get_timestamp('2019-05-03_23:59')], - # [get_timestamp('2019-05-09_00:00'), get_timestamp('2019-05-09_23:59')], - # [get_timestamp('2019-05-12_00:00'), get_timestamp('2019-05-12_23:59')], - # [get_timestamp('2019-05-14_00:00'), get_timestamp('2019-05-14_23:59')], - # [get_timestamp('2019-05-19_00:00'), get_timestamp('2019-05-19_23:59')], - # [get_timestamp('2019-07-01_00:00'), get_timestamp('2019-07-05_23:59')], - # [get_timestamp('2019-07-22_00:00'), get_timestamp('2019-07-22_23:59')], - # [get_timestamp('2019-07-24_00:00'), get_timestamp('2019-07-24_23:59')], - # [get_timestamp('2019-09-01_00:00'), get_timestamp('2019-09-05_23:59')], - # [get_timestamp('2019-09-09_00:00'), get_timestamp('2019-09-09_23:59')], - # [get_timestamp('2019-09-09_00:00'), get_timestamp('2019-09-11_23:59')], - # [get_timestamp('2019-11-01_00:00'), get_timestamp('2019-11-07_23:59')], - # [get_timestamp('2021-09-24_00:00'), get_timestamp('2021-10-01_23:59')], - # [get_timestamp('2021-11-01_00:00'), get_timestamp('2021-11-07_23:59')], - # [get_timestamp('2022-01-01_00:00'), get_timestamp('2022-01-07_23:59')], - # [get_timestamp('2022-03-01_00:00'), get_timestamp('2022-03-07_23:59')], - # [get_timestamp('2022-04-01_00:00'), get_timestamp('2022-04-04_23:59')]] - keep_out = 10800 # 3 hrs vld_time_idxs = [] @@ -2348,7 +2299,6 @@ def split_data(times): tidxs = np.searchsorted(times, t_rng) vld_time_idxs.append(np.arange(tidxs[0], tidxs[1], 1)) vld_time_idxs = np.concatenate(vld_time_idxs, axis=None) - # train_time_idxs = time_idxs[np.in1d(time_idxs, vld_time_idxs, invert=True)] time_ranges = [[get_timestamp('2018-02-01_00:00'), get_timestamp('2018-02-04_23:59')], [get_timestamp('2018-04-01_00:00'), get_timestamp('2018-04-04_23:59')], @@ -2374,42 +2324,19 @@ def split_data(times): [get_timestamp('2022-11-01_00:00'), get_timestamp('2022-11-05_23:59')], [get_timestamp('2022-11-10_00:00'), get_timestamp('2022-12-01_23:59')]] - # time_ranges_lunar = [[get_timestamp('2018-02-01_00:00'), get_timestamp('2018-02-04_23:59')], - # [get_timestamp('2018-04-01_00:00'), get_timestamp('2018-04-04_23:59')], - # [get_timestamp('2018-06-01_00:00'), get_timestamp('2018-06-04_23:59')], - # [get_timestamp('2018-08-01_00:00'), get_timestamp('2018-08-04_23:59')], - # [get_timestamp('2018-10-01_00:00'), get_timestamp('2018-10-04_23:59')], - # [get_timestamp('2018-12-01_00:00'), get_timestamp('2018-12-04_23:59')], - # [get_timestamp('2019-02-01_00:00'), get_timestamp('2019-02-04_23:59')], - # #[get_timestamp('2019-04-01_00:00'), get_timestamp('2019-04-04_23:59')], - # [get_timestamp('2019-04-17_00:00'), get_timestamp('2019-04-17_23:59')], - # [get_timestamp('2019-04-20_00:00'), get_timestamp('2019-04-20_23:59')], - # [get_timestamp('2019-04-23_00:00'), get_timestamp('2019-04-23_23:59')], - # [get_timestamp('2019-04-27_00:00'), get_timestamp('2019-04-27_23:59')], - # [get_timestamp('2019-04-29_00:00'), get_timestamp('2019-04-29_23:59')], - # [get_timestamp('2019-06-01_00:00'), get_timestamp('2019-06-03_23:59')], - # [get_timestamp('2019-06-13_00:00'), get_timestamp('2019-06-13_23:59')], - # [get_timestamp('2019-08-01_00:00'), get_timestamp('2019-08-03_23:59')], - # [get_timestamp('2019-08-20_00:00'), get_timestamp('2019-08-20_23:59')], - # [get_timestamp('2019-10-01_00:00'), get_timestamp('2019-10-04_23:59')], - # [get_timestamp('2019-12-01_00:00'), get_timestamp('2019-12-04_23:59')], - # - # [get_timestamp('2021-10-05_00:00'), get_timestamp('2021-10-10_23:59')], - # [get_timestamp('2021-12-01_00:00'), get_timestamp('2021-12-04_23:59')], - # [get_timestamp('2022-02-01_00:00'), get_timestamp('2022-02-04_23:59')], - # [get_timestamp('2022-03-26_00:00'), get_timestamp('2022-03-30_23:59')], - # [get_timestamp('2022-04-07_00:00'), get_timestamp('2022-04-10_23:59')]] - tst_time_idxs = [] - for t_rng in time_ranges: - t_rng[0] -= keep_out - t_rng[1] += keep_out - tidxs = np.searchsorted(times, t_rng) - tst_time_idxs.append(np.arange(tidxs[0], tidxs[1], 1)) - tst_time_idxs = np.concatenate(tst_time_idxs, axis=None) - - vld_tst_time_idxs = np.concatenate([vld_time_idxs, tst_time_idxs]) - vld_tst_time_idxs = np.sort(vld_tst_time_idxs) + if has_test: + for t_rng in time_ranges: + t_rng[0] -= keep_out + t_rng[1] += keep_out + tidxs = np.searchsorted(times, t_rng) + tst_time_idxs.append(np.arange(tidxs[0], tidxs[1], 1)) + tst_time_idxs = np.concatenate(tst_time_idxs, axis=None) + + vld_tst_time_idxs = np.concatenate([vld_time_idxs, tst_time_idxs]) + vld_tst_time_idxs = np.sort(vld_tst_time_idxs) + + vld_tst_time_idxs = np.sort(vld_time_idxs) train_time_idxs = time_idxs[np.in1d(time_idxs, vld_tst_time_idxs, invert=True)] -- GitLab