diff --git a/modules/deeplearning/icing_cnn.py b/modules/deeplearning/icing_cnn.py index d63051373da855731b985f1c8fae042bfda1c144..311db2305a73962d4200aba6067fee279ced5415 100644 --- a/modules/deeplearning/icing_cnn.py +++ b/modules/deeplearning/icing_cnn.py @@ -44,7 +44,7 @@ f = open(mean_std_file, 'rb') mean_std_dct_l2 = pickle.load(f) f.close() -mean_std_file = homedir+'data/icing/mean_std_no_ice.pkl' +mean_std_file = homedir+'data/icing/mean_std_l1b_no_ice.pkl' f = open(mean_std_file, 'rb') mean_std_dct_l1b = pickle.load(f) f.close() @@ -70,6 +70,7 @@ train_params_l1b = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'te # train_params_l1b = ['cld_height_acha', 'cld_geo_thick', 'cld_temp_acha', 'cld_press_acha', 'supercooled_cloud_fraction', # 'cld_emiss_acha', 'conv_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp', 'lwc_dcomp'] # --------------------------------------------- + train_params = train_params_l1b # -- Zero out params (Experimentation Only) ------------ zero_out_params = ['cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp', 'lwc_dcomp'] @@ -138,11 +139,6 @@ class IcingIntensityNN: self.handle = None self.inner_handle = None self.in_mem_batch = None - self.filename_trn = None - self.h5f_trn = None - self.filename_tst = None - self.h5f_tst = None - self.h5f_l1b = None self.h5f_l1b_trn = None self.h5f_l1b_tst = None @@ -234,9 +230,6 @@ class IcingIntensityNN: print(e) def get_in_mem_data_batch(self, idxs, is_training): - h5f = self.h5f_trn - if not is_training: - h5f = self.h5f_tst if CACHE_DATA_IN_MEM: key = frozenset(idxs) @@ -250,7 +243,7 @@ class IcingIntensityNN: data = [] for param in train_params: - nda = h5f[param][nd_idxs, ] + nda = self.get_parameter_data(param, nd_idxs, is_training) if NOISE_TRAINING and is_training: nda = normalize(nda, param, mean_std_dct, add_noise=True, noise_scale=0.01, seed=42) else: @@ -266,8 +259,7 @@ class IcingIntensityNN: data = data.astype(np.float32) data = np.transpose(data, axes=(1, 2, 3, 0)) - label = h5f['icing_intensity'][nd_idxs] - label = label.astype(np.int32) + label = self.get_label_data(nd_idxs, is_training) label = np.where(label == -1, 0, label) # binary, two class @@ -299,6 +291,23 @@ class IcingIntensityNN: nda = h5f[param][nd_idxs,] return nda + def get_label_data(self, nd_idxs, is_training): + # Note: labels will be same for nd_idxs across both L1B and L2 + if is_training: + if self.h5f_l1b_trn is not None: + h5f = self.h5f_l1b_trn + else: + h5f = self.h5f_l2_trn + else: + if self.h5f_l1b_tst is not None: + h5f = self.h5f_l1b_tst + else: + h5f = self.h5f_l2_tst + + label = h5f['icing_intensity'][nd_idxs] + label = label.astype(np.int32) + return label + def get_in_mem_data_batch_train(self, idxs): return self.get_in_mem_data_batch(idxs, True) @@ -365,21 +374,33 @@ class IcingIntensityNN: dataset = dataset.cache() self.eval_dataset = dataset - def setup_pipeline(self, filename_trn, filename_tst, trn_idxs=None, tst_idxs=None, seed=None): - self.filename_trn = filename_trn - self.h5f_trn = h5py.File(filename_trn, 'r') - - self.filename_tst = filename_tst - self.h5f_tst = h5py.File(filename_tst, 'r') + def setup_pipeline(self, filename_l1b_trn, filename_l1b_tst, filename_l2_trn, filename_l2_tst, trn_idxs=None, tst_idxs=None, seed=None): + if filename_l1b_trn is not None: + self.h5f_l1b_trn = h5py.File(filename_l1b_trn, 'r') + if filename_l1b_tst is not None: + self.h5f_l1b_tst = h5py.File(filename_l1b_tst, 'r') + if filename_l2_trn is not None: + self.h5f_l2_trn = h5py.File(filename_l2_trn, 'r') + if filename_l2_tst is not None: + self.h5f_l2_tst = h5py.File(filename_l2_tst, 'r') if trn_idxs is None: - time = self.h5f_trn['time'] + # Note: time is same across both L1B and L2 for idxs + if self.h5f_l1b_trn is not None: + h5f = self.h5f_l1b_trn + else: + h5f = self.h5f_l2_trn + time = h5f['time'] trn_idxs = np.arange(time.shape[0]) if seed is not None: np.random.seed(seed) np.random.shuffle(trn_idxs) - time = self.h5f_tst['time'] + if self.h5f_l1b_tst is not None: + h5f = self.h5f_l1b_tst + else: + h5f = self.h5f_l2_tst + time = h5f['time'] tst_idxs = np.arange(time.shape[0]) if seed is not None: np.random.seed(seed) @@ -395,11 +416,18 @@ class IcingIntensityNN: print('num test samples: ', tst_idxs.shape[0]) print('setup_pipeline: Done') - def setup_test_pipeline(self, filename, seed=None, shuffle=False): - self.filename_tst = filename - self.h5f_tst = h5py.File(filename, 'r') + def setup_test_pipeline(self, filename_l1b, filename_l2, seed=None, shuffle=False): - time = self.h5f_tst['time'] + if filename_l1b is not None: + self.h5f_l1b_tst = h5py.File(filename_l1b, 'r') + if filename_l2 is not None: + self.h5f_l2_tst = h5py.File(filename_l2, 'r') + + if self.h5f_l1b_tst is not None: + h5f = self.h5f_l1b_tst + else: + h5f = self.h5f_l2_tst + time = h5f['time'] tst_idxs = np.arange(time.shape[0]) self.num_data_samples = len(tst_idxs) if seed is not None: @@ -800,8 +828,14 @@ class IcingIntensityNN: self.writer_train.close() self.writer_valid.close() - self.h5f_trn.close() - self.h5f_tst.close() + if self.h5f_l1b_trn is not None: + self.h5f_l1b_trn.close() + if self.h5f_l1b_tst is not None: + self.h5f_l1b_tst.close() + if self.h5f_l2_trn is not None: + self.h5f_l2_trn.close() + if self.h5f_l2_tst is not None: + self.h5f_l2_tst.close() f = open('/home/rink/best_stats_'+now+'.pkl', 'wb') pickle.dump((best_test_loss, best_test_acc, best_test_recall, best_test_precision, best_test_auc, best_test_f1, best_test_mcc), f) @@ -871,21 +905,25 @@ class IcingIntensityNN: preds = np.argmax(preds, axis=1) self.test_preds = preds - def run(self, filename_trn, filename_tst): + def run(self, filename_l1b_trn, filename_l1b_tst, filename_l2_trn, filename_l2_tst): with tf.device('/device:GPU:'+str(self.gpu_device)): - self.setup_pipeline(filename_trn, filename_tst) + self.setup_pipeline(filename_l1b_trn, filename_l1b_tst, filename_l2_trn, filename_l2_tst) self.build_model() self.build_training() self.build_evaluation() self.do_training() - def run_restore(self, filename_tst, ckpt_dir): - self.setup_test_pipeline(filename_tst) + def run_restore(self, filename_l1b, filename_l2, ckpt_dir): + self.setup_test_pipeline(filename_l1b, filename_l2) self.build_model() self.build_training() self.build_evaluation() self.restore(ckpt_dir) - self.h5f_tst.close() + + if self.h5f_l1b_tst is not None: + self.h5f_l1b_tst.close() + if self.h5f_l2_tst is not None: + self.h5f_l2_tst.close() def run_evaluate(self, filename, ckpt_dir): data_dct, ll, cc = make_for_full_domain_predict(filename, name_list=train_params) @@ -896,7 +934,7 @@ class IcingIntensityNN: self.do_evaluate(ckpt_dir) -def run_restore_static(filename_tst, ckpt_dir_s_path): +def run_restore_static(filename_l1b, filename_l2, ckpt_dir_s_path): ckpt_dir_s = os.listdir(ckpt_dir_s_path) cm_s = [] for ckpt in ckpt_dir_s: @@ -904,7 +942,7 @@ def run_restore_static(filename_tst, ckpt_dir_s_path): if not os.path.isdir(ckpt_dir): continue nn = IcingIntensityNN() - nn.run_restore(filename_tst, ckpt_dir) + nn.run_restore(filename_l1b, filename_l2, ckpt_dir) cm_s.append(tf.math.confusion_matrix(nn.test_labels.flatten(), nn.test_preds.flatten())) num = len(cm_s) cm_avg = cm_s[0]