diff --git a/modules/deeplearning/unet_l1b_l2.py b/modules/deeplearning/unet_l1b_l2.py index 42cc1e8bbc22a8f12dc7093c24a45226349c91c1..a8a05b0b6ce1ef97e79ef2177f93520142d49a7f 100644 --- a/modules/deeplearning/unet_l1b_l2.py +++ b/modules/deeplearning/unet_l1b_l2.py @@ -37,11 +37,24 @@ DO_AUGMENT = True img_width = 16 -mean_std_file = home_dir+'/viirs_emis_rad_mean_std.pkl' +# setup scaling parameters dictionary +mean_std_dct = {} +mean_std_file = ancillary_path+'mean_std_lo_hi_l2.pkl' f = open(mean_std_file, 'rb') -mean_std_dct = pickle.load(f) +mean_std_dct_l2 = pickle.load(f) f.close() +mean_std_file = ancillary_path+'mean_std_lo_hi_l1b.pkl' +f = open(mean_std_file, 'rb') +mean_std_dct_l1b = pickle.load(f) +f.close() + +mean_std_dct.update(mean_std_dct_l1b) +mean_std_dct.update(mean_std_dct_l2) + +emis_params = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'temp_13_3um_nom', 'temp_3_9um_nom', + 'temp_6_7um_nom'] + # -- Zero out params (Experimentation Only) ------------ zero_out_params = ['cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp', 'lwc_dcomp'] DO_ZERO_OUT = False @@ -164,14 +177,14 @@ class UNET: self.test_label_nda = None # self.n_chans = len(self.train_params) - self.n_chans = 1 + self.n_chans = 6 if TRIPLET: self.n_chans *= 3 self.X_img = tf.keras.Input(shape=(None, None, self.n_chans)) self.inputs.append(self.X_img) # self.inputs.append(tf.keras.Input(shape=(None, None, 5))) - self.inputs.append(tf.keras.Input(shape=(None, None, 1))) + self.inputs.append(tf.keras.Input(shape=(None, None, 6))) self.flight_level = 0 @@ -198,56 +211,6 @@ class UNET: # # Memory growth must be set before GPUs have been initialized # print(e) - # def get_in_mem_data_batch(self, idxs, is_training): - # - # # sort these to use as numpy indexing arrays - # nd_idxs = np.array(idxs) - # nd_idxs = np.sort(nd_idxs) - # - # data = [] - # for param in self.train_params: - # nda = self.get_parameter_data(param, nd_idxs, is_training) - # nda = normalize(nda, param, mean_std_dct) - # if DO_ZERO_OUT and is_training: - # try: - # zero_out_params.index(param) - # nda[:,] = 0.0 - # except ValueError: - # pass - # data.append(nda) - # data = np.stack(data) - # data = data.astype(np.float32) - # data = np.transpose(data, axes=(1, 2, 3, 0)) - # - # data_alt = self.get_scalar_data(nd_idxs, is_training) - # - # label = self.get_label_data(nd_idxs, is_training) - # label = np.where(label == -1, 0, label) - # - # # binary, two class - # if NumClasses == 2: - # label = np.where(label != 0, 1, label) - # label = label.reshape((label.shape[0], 1)) - # elif NumClasses == 3: - # label = np.where(np.logical_or(label == 1, label == 2), 1, label) - # label = np.where(np.invert(np.logical_or(label == 0, label == 1)), 2, label) - # label = label.reshape((label.shape[0], 1)) - # - # if is_training and DO_AUGMENT: - # data_ud = np.flip(data, axis=1) - # data_alt_ud = np.copy(data_alt) - # label_ud = np.copy(label) - # - # data_lr = np.flip(data, axis=2) - # data_alt_lr = np.copy(data_alt) - # label_lr = np.copy(label) - # - # data = np.concatenate([data, data_ud, data_lr]) - # data_alt = np.concatenate([data_alt, data_alt_ud, data_alt_lr]) - # label = np.concatenate([label, label_ud, label_lr]) - # - # return data, data_alt, label - def get_in_mem_data_batch(self, idxs, is_training): if is_training: train_data = [] @@ -259,10 +222,10 @@ class UNET: f = self.train_label_files[k] nda = np.load(f) - train_label.append(nda) + train_label.append(nda[:, 0, :, :]) data = np.concatenate(train_data) - data = np.expand_dims(data, axis=3) + label = np.concatenate(train_label) label = np.expand_dims(label, axis=3) else: @@ -275,10 +238,9 @@ class UNET: f = self.test_label_files[k] nda = np.load(f) - test_label.append(nda) + test_label.append(nda[:, 0, :, :]) data = np.concatenate(test_data) - data = np.expand_dims(data, axis=3) label = np.concatenate(test_label) label = np.expand_dims(label, axis=3) @@ -286,8 +248,13 @@ class UNET: data = data.astype(np.float32) label = label.astype(np.float32) - data = normalize(data, 'M15', mean_std_dct) - label = normalize(label, 'M15', mean_std_dct) + data_norm = [] + for idx, param in enumerate(emis_params): + tmp = normalize(data[:, idx, :, :], param, mean_std_dct) + data_norm.append(tmp) + data = np.stack(data_norm, axis=3) + + # label = normalize(label, 'M15', mean_std_dct) if is_training and DO_AUGMENT: data_ud = np.flip(data, axis=1) @@ -301,38 +268,6 @@ class UNET: return data, data, label - # def get_parameter_data(self, param, nd_idxs, is_training): - # if is_training: - # if param in self.train_params_l1b: - # h5f = self.h5f_l1b_trn - # else: - # h5f = self.h5f_l2_trn - # else: - # if param in self.train_params_l1b: - # h5f = self.h5f_l1b_tst - # else: - # h5f = self.h5f_l2_tst - # - # nda = h5f[param][nd_idxs,] - # return nda - # - # def get_label_data(self, nd_idxs, is_training): - # # Note: labels will be same for nd_idxs across both L1B and L2 - # if is_training: - # if self.h5f_l1b_trn is not None: - # h5f = self.h5f_l1b_trn - # else: - # h5f = self.h5f_l2_trn - # else: - # if self.h5f_l1b_tst is not None: - # h5f = self.h5f_l1b_tst - # else: - # h5f = self.h5f_l2_tst - # - # label = h5f['icing_intensity'][nd_idxs] - # label = label.astype(np.int32) - # return label - def get_in_mem_data_batch_train(self, idxs): return self.get_in_mem_data_batch(idxs, True) @@ -402,55 +337,6 @@ class UNET: dataset = dataset.map(self.data_function_evaluate, num_parallel_calls=8) self.eval_dataset = dataset - # def setup_pipeline(self, filename_l1b_trn, filename_l1b_tst, filename_l2_trn, filename_l2_tst, trn_idxs=None, tst_idxs=None, seed=None): - # if filename_l1b_trn is not None: - # self.h5f_l1b_trn = h5py.File(filename_l1b_trn, 'r') - # if filename_l1b_tst is not None: - # self.h5f_l1b_tst = h5py.File(filename_l1b_tst, 'r') - # if filename_l2_trn is not None: - # self.h5f_l2_trn = h5py.File(filename_l2_trn, 'r') - # if filename_l2_tst is not None: - # self.h5f_l2_tst = h5py.File(filename_l2_tst, 'r') - # - # if trn_idxs is None: - # # Note: time is same across both L1B and L2 for idxs - # if self.h5f_l1b_trn is not None: - # h5f = self.h5f_l1b_trn - # else: - # h5f = self.h5f_l2_trn - # time = h5f['time'] - # trn_idxs = np.arange(time.shape[0]) - # if seed is not None: - # np.random.seed(seed) - # np.random.shuffle(trn_idxs) - # - # if self.h5f_l1b_tst is not None: - # h5f = self.h5f_l1b_tst - # else: - # h5f = self.h5f_l2_tst - # time = h5f['time'] - # tst_idxs = np.arange(time.shape[0]) - # if seed is not None: - # np.random.seed(seed) - # np.random.shuffle(tst_idxs) - # - # self.num_data_samples = trn_idxs.shape[0] - # - # self.get_train_dataset(trn_idxs) - # self.get_test_dataset(tst_idxs) - # - # print('datetime: ', now) - # print('training and test data: ') - # print(filename_l1b_trn) - # print(filename_l1b_tst) - # print(filename_l2_trn) - # print(filename_l2_tst) - # print('---------------------------') - # print('num train samples: ', self.num_data_samples) - # print('BATCH SIZE: ', BATCH_SIZE) - # print('num test samples: ', tst_idxs.shape[0]) - # print('setup_pipeline: Done') - def setup_pipeline(self, data_nda, label_nda, perc=0.20): num_samples = data_nda.shape[0] @@ -484,6 +370,9 @@ class UNET: num_test_files = int(num_files * perc) num_train_files = num_files - num_test_files + num_test_files = 1 + num_train_files = 3 + self.train_data_files = data_files[0:num_train_files] self.train_label_files = label_files[0:num_train_files] self.test_data_files = data_files[num_train_files:] @@ -496,7 +385,7 @@ class UNET: self.get_train_dataset(trn_idxs) self.get_test_dataset(tst_idxs) - self.num_data_samples = num_train_files * 30 # approximately + self.num_data_samples = num_train_files * 1000 # approximately print('datetime: ', now) print('training and test data: ') @@ -1007,8 +896,8 @@ class UNET: self.do_training() def run_test(self, directory): - data_files = glob.glob(directory+'mod_res*.npy') - label_files = [f.replace('mod', 'img') for f in data_files] + data_files = glob.glob(directory+'l1b_*.npy') + label_files = [f.replace('l1b', 'l2') for f in data_files] self.setup_pipeline_files(data_files, label_files) self.build_model() self.build_training()