diff --git a/modules/icing/pirep_goes.py b/modules/icing/pirep_goes.py index 612b704681a4c286bdd1de7fbc1a9704f76cc47b..58ad8fea9d706f42de12c8d0a43486dce04d82d9 100644 --- a/modules/icing/pirep_goes.py +++ b/modules/icing/pirep_goes.py @@ -1224,11 +1224,12 @@ def tile_extract(trnfile='/home/rink/tiles_l1b_train.h5', tstfile='/home/rink/ti icing_lons = icing_lons[ds_indexes] icing_lats = icing_lats[ds_indexes] - #trn_idxs, tst_idxs = split_data(icing_intensity.shape[0], shuffle=False, perc=split) - all_idxs = np.arange(icing_intensity.shape[0]) - splt_idx = int(icing_intensity.shape[0] * (1-split)) - trn_idxs = all_idxs[0:splt_idx] - tst_idxs = all_idxs[splt_idx:] + # #trn_idxs, tst_idxs = split_data(icing_intensity.shape[0], shuffle=False, perc=split) + trn_idxs, tst_idxs = split_data(icing_times) + # all_idxs = np.arange(icing_intensity.shape[0]) + # splt_idx = int(icing_intensity.shape[0] * (1-split)) + # trn_idxs = all_idxs[0:splt_idx] + # tst_idxs = all_idxs[splt_idx:] # --------------------------------------------- trn_data_dct = {} @@ -1492,29 +1493,51 @@ def run_mean_std_2(check_cloudy=False, no_icing_to_icing_ratio=5, params=train_p # return mean_std_dct -def split_data(num_obs, perc=0.2, skip=1, shuffle=True, seed=None): - idxs = np.arange(num_obs) - idxs = list(idxs) - - num_test = int(num_obs * perc) - - test_idxs = idxs[::int(num_obs / num_test)] - - test_set = set(test_idxs) - train_set = (set(idxs)).difference(test_set) - train_idxs = list(train_set) - - test_idxs = np.array(test_idxs) - train_idxs = np.array(train_idxs) - - if seed is not None: - np.random.seed(seed) - - if shuffle: - np.random.shuffle(test_idxs) - np.random.shuffle(train_idxs) - - return train_idxs[::skip], test_idxs[::skip] +# def split_data(num_obs, perc=0.2, skip=1, shuffle=True, seed=None): +# idxs = np.arange(num_obs) +# idxs = list(idxs) +# +# num_test = int(num_obs * perc) +# +# test_idxs = idxs[::int(num_obs / num_test)] +# +# test_set = set(test_idxs) +# train_set = (set(idxs)).difference(test_set) +# train_idxs = list(train_set) +# +# test_idxs = np.array(test_idxs) +# train_idxs = np.array(train_idxs) +# +# if seed is not None: +# np.random.seed(seed) +# +# if shuffle: +# np.random.shuffle(test_idxs) +# np.random.shuffle(train_idxs) +# +# return train_idxs[::skip], test_idxs[::skip] + + +def split_data(times): + time_idxs = np.arange(times.shape[0]) + + time_ranges = [[get_timestamp('2018-01-01_00:00'), get_timestamp('2018-01-07_23:59')], + [get_timestamp('2018-04-01_00:00'), get_timestamp('2018-04-07_23:59')], + [get_timestamp('2018-07-01_00:00'), get_timestamp('2018-07-07_23:59')], + [get_timestamp('2018-10-01_00:00'), get_timestamp('2018-10-07_23:59')], + [get_timestamp('2019-01-01_00:00'), get_timestamp('2019-01-07_23:59')], + [get_timestamp('2019-04-01_00:00'), get_timestamp('2019-04-07_23:59')], + [get_timestamp('2019-07-01_00:00'), get_timestamp('2019-07-07_23:59')], + [get_timestamp('2019-10-01_00:00'), get_timestamp('2019-10-07_23:59')]] + + test_time_idxs = [] + for t_rng in time_ranges: + tidxs = np.searchsorted(times, t_rng) + test_time_idxs.append(np.arange(tidxs[0], tidxs[1], 1)) + test_time_idxs = np.concatenate(test_time_idxs, axis=None) + train_time_idxs = time_idxs[np.in1d(time_idxs, test_time_idxs, invert=True)] + + return train_time_idxs, test_time_idxs def normalize(data, param, mean_std_dict, add_noise=False, noise_scale=1.0, seed=None):