snapshot...

d380bb20 · tomrink · 4ba3e35d · d380bb20
Commit d380bb20 authored Sep 17, 2021 by tomrink
--- a/modules/deeplearning/icing_cnn.py
+++ b/modules/deeplearning/icing_cnn.py
@@ -44,7 +44,7 @@ f = open(mean_std_file, 'rb')
 mean_std_dct_l2 = pickle.load(f)
 f.close()
-mean_std_file = homedir+'data/icing/mean_std_no_ice.pkl'
+mean_std_file = homedir+'data/icing/mean_std_l1b_no_ice.pkl'
 f = open(mean_std_file, 'rb')
 mean_std_dct_l1b = pickle.load(f)
 f.close()
@@ -70,6 +70,7 @@ train_params_l1b = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'te
 # train_params_l1b = ['cld_height_acha', 'cld_geo_thick', 'cld_temp_acha', 'cld_press_acha', 'supercooled_cloud_fraction',
 #                     'cld_emiss_acha', 'conv_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp', 'lwc_dcomp']
 # ---------------------------------------------
 train_params = train_params_l1b
 # -- Zero out params (Experimentation Only) ------------
 zero_out_params = ['cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp', 'lwc_dcomp']
@@ -138,11 +139,6 @@ class IcingIntensityNN:
        self.handle = None
        self.inner_handle = None
        self.in_mem_batch = None
-        self.filename_trn = None
-        self.h5f_trn = None
-        self.filename_tst = None
-        self.h5f_tst = None
-        self.h5f_l1b = None
        self.h5f_l1b_trn = None
        self.h5f_l1b_tst = None
@@ -234,9 +230,6 @@ class IcingIntensityNN:
                print(e)
    def get_in_mem_data_batch(self, idxs, is_training):
-        h5f = self.h5f_trn
-        if not is_training:
-            h5f = self.h5f_tst
        if CACHE_DATA_IN_MEM:
            key = frozenset(idxs)
@@ -250,7 +243,7 @@ class IcingIntensityNN:
        data = []
        for param in train_params:
-            nda = h5f[param][nd_idxs, ]
+            nda = self.get_parameter_data(param, nd_idxs, is_training)
            if NOISE_TRAINING and is_training:
                nda = normalize(nda, param, mean_std_dct, add_noise=True, noise_scale=0.01, seed=42)
            else:
@@ -266,8 +259,7 @@ class IcingIntensityNN:
        data = data.astype(np.float32)
        data = np.transpose(data, axes=(1, 2, 3, 0))
-        label = h5f['icing_intensity'][nd_idxs]
+        label = self.get_label_data(nd_idxs, is_training)
-        label = label.astype(np.int32)
        label = np.where(label == -1, 0, label)
        # binary, two class
@@ -299,6 +291,23 @@ class IcingIntensityNN:
        nda = h5f[param][nd_idxs,]
        return nda
+    def get_label_data(self, nd_idxs, is_training):
+        # Note: labels will be same for nd_idxs across both L1B and L2
+        if is_training:
+            if self.h5f_l1b_trn is not None:
+                h5f = self.h5f_l1b_trn
+            else:
+                h5f = self.h5f_l2_trn
+        else:
+            if self.h5f_l1b_tst is not None:
+                h5f = self.h5f_l1b_tst
+            else:
+                h5f = self.h5f_l2_tst
+        label = h5f['icing_intensity'][nd_idxs]
+        label = label.astype(np.int32)
+        return label
    def get_in_mem_data_batch_train(self, idxs):
        return self.get_in_mem_data_batch(idxs, True)
@@ -365,21 +374,33 @@ class IcingIntensityNN:
        dataset = dataset.cache()
        self.eval_dataset = dataset
-    def setup_pipeline(self, filename_trn, filename_tst, trn_idxs=None, tst_idxs=None, seed=None):
+    def setup_pipeline(self, filename_l1b_trn, filename_l1b_tst, filename_l2_trn, filename_l2_tst, trn_idxs=None, tst_idxs=None, seed=None):
-        self.filename_trn = filename_trn
+        if filename_l1b_trn is not None:
-        self.h5f_trn = h5py.File(filename_trn, 'r')
+            self.h5f_l1b_trn = h5py.File(filename_l1b_trn, 'r')
+        if filename_l1b_tst is not None:
-        self.filename_tst = filename_tst
+            self.h5f_l1b_tst = h5py.File(filename_l1b_tst, 'r')
-        self.h5f_tst = h5py.File(filename_tst, 'r')
+        if filename_l2_trn is not None:
+            self.h5f_l2_trn = h5py.File(filename_l2_trn, 'r')
+        if filename_l2_tst is not None:
+            self.h5f_l2_tst = h5py.File(filename_l2_tst, 'r')
        if trn_idxs is None:
-            time = self.h5f_trn['time']
+            # Note: time is same across both L1B and L2 for idxs
+            if self.h5f_l1b_trn is not None:
+                h5f = self.h5f_l1b_trn
+            else:
+                h5f = self.h5f_l2_trn
+            time = h5f['time']
            trn_idxs = np.arange(time.shape[0])
            if seed is not None:
                np.random.seed(seed)
            np.random.shuffle(trn_idxs)
-            time = self.h5f_tst['time']
+            if self.h5f_l1b_tst is not None:
+                h5f = self.h5f_l1b_tst
+            else:
+                h5f = self.h5f_l2_tst
+            time = h5f['time']
            tst_idxs = np.arange(time.shape[0])
            if seed is not None:
                np.random.seed(seed)
@@ -395,11 +416,18 @@ class IcingIntensityNN:
        print('num test samples: ', tst_idxs.shape[0])
        print('setup_pipeline: Done')
-    def setup_test_pipeline(self, filename, seed=None, shuffle=False):
+    def setup_test_pipeline(self, filename_l1b, filename_l2, seed=None, shuffle=False):
-        self.filename_tst = filename
-        self.h5f_tst = h5py.File(filename, 'r')
-        time = self.h5f_tst['time']
+        if filename_l1b is not None:
+            self.h5f_l1b_tst = h5py.File(filename_l1b, 'r')
+        if filename_l2 is not None:
+            self.h5f_l2_tst = h5py.File(filename_l2, 'r')
+        if self.h5f_l1b_tst is not None:
+            h5f = self.h5f_l1b_tst
+        else:
+            h5f = self.h5f_l2_tst
+        time = h5f['time']
        tst_idxs = np.arange(time.shape[0])
        self.num_data_samples = len(tst_idxs)
        if seed is not None:
@@ -800,8 +828,14 @@ class IcingIntensityNN:
        self.writer_train.close()
        self.writer_valid.close()
-        self.h5f_trn.close()
+        if self.h5f_l1b_trn is not None:
-        self.h5f_tst.close()
+            self.h5f_l1b_trn.close()
+        if self.h5f_l1b_tst is not None:
+            self.h5f_l1b_tst.close()
+        if self.h5f_l2_trn is not None:
+            self.h5f_l2_trn.close()
+        if self.h5f_l2_tst is not None:
+            self.h5f_l2_tst.close()
        f = open('/home/rink/best_stats_'+now+'.pkl', 'wb')
        pickle.dump((best_test_loss, best_test_acc, best_test_recall, best_test_precision, best_test_auc, best_test_f1, best_test_mcc), f)
@@ -871,21 +905,25 @@ class IcingIntensityNN:
            preds = np.argmax(preds, axis=1)
        self.test_preds = preds
-    def run(self, filename_trn, filename_tst):
+    def run(self, filename_l1b_trn, filename_l1b_tst, filename_l2_trn, filename_l2_tst):
        with tf.device('/device:GPU:'+str(self.gpu_device)):
-            self.setup_pipeline(filename_trn, filename_tst)
+            self.setup_pipeline(filename_l1b_trn, filename_l1b_tst, filename_l2_trn, filename_l2_tst)
            self.build_model()
            self.build_training()
            self.build_evaluation()
            self.do_training()
-    def run_restore(self, filename_tst, ckpt_dir):
+    def run_restore(self, filename_l1b, filename_l2, ckpt_dir):
-        self.setup_test_pipeline(filename_tst)
+        self.setup_test_pipeline(filename_l1b, filename_l2)
        self.build_model()
        self.build_training()
        self.build_evaluation()
        self.restore(ckpt_dir)
-        self.h5f_tst.close()
+        if self.h5f_l1b_tst is not None:
+            self.h5f_l1b_tst.close()
+        if self.h5f_l2_tst is not None:
+            self.h5f_l2_tst.close()
    def run_evaluate(self, filename, ckpt_dir):
        data_dct, ll, cc = make_for_full_domain_predict(filename, name_list=train_params)
@@ -896,7 +934,7 @@ class IcingIntensityNN:
        self.do_evaluate(ckpt_dir)
-def run_restore_static(filename_tst, ckpt_dir_s_path):
+def run_restore_static(filename_l1b, filename_l2, ckpt_dir_s_path):
    ckpt_dir_s = os.listdir(ckpt_dir_s_path)
    cm_s = []
    for ckpt in ckpt_dir_s:
@@ -904,7 +942,7 @@ def run_restore_static(filename_tst, ckpt_dir_s_path):
        if not os.path.isdir(ckpt_dir):
            continue
        nn = IcingIntensityNN()
-        nn.run_restore(filename_tst, ckpt_dir)
+        nn.run_restore(filename_l1b, filename_l2, ckpt_dir)
        cm_s.append(tf.math.confusion_matrix(nn.test_labels.flatten(), nn.test_preds.flatten()))
    num = len(cm_s)
    cm_avg = cm_s[0]