Skip to content
Snippets Groups Projects
Commit d380bb20 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent 4ba3e35d
No related branches found
No related tags found
No related merge requests found
......@@ -44,7 +44,7 @@ f = open(mean_std_file, 'rb')
mean_std_dct_l2 = pickle.load(f)
f.close()
mean_std_file = homedir+'data/icing/mean_std_no_ice.pkl'
mean_std_file = homedir+'data/icing/mean_std_l1b_no_ice.pkl'
f = open(mean_std_file, 'rb')
mean_std_dct_l1b = pickle.load(f)
f.close()
......@@ -70,6 +70,7 @@ train_params_l1b = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'te
# train_params_l1b = ['cld_height_acha', 'cld_geo_thick', 'cld_temp_acha', 'cld_press_acha', 'supercooled_cloud_fraction',
# 'cld_emiss_acha', 'conv_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp', 'lwc_dcomp']
# ---------------------------------------------
train_params = train_params_l1b
# -- Zero out params (Experimentation Only) ------------
zero_out_params = ['cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp', 'lwc_dcomp']
......@@ -138,11 +139,6 @@ class IcingIntensityNN:
self.handle = None
self.inner_handle = None
self.in_mem_batch = None
self.filename_trn = None
self.h5f_trn = None
self.filename_tst = None
self.h5f_tst = None
self.h5f_l1b = None
self.h5f_l1b_trn = None
self.h5f_l1b_tst = None
......@@ -234,9 +230,6 @@ class IcingIntensityNN:
print(e)
def get_in_mem_data_batch(self, idxs, is_training):
h5f = self.h5f_trn
if not is_training:
h5f = self.h5f_tst
if CACHE_DATA_IN_MEM:
key = frozenset(idxs)
......@@ -250,7 +243,7 @@ class IcingIntensityNN:
data = []
for param in train_params:
nda = h5f[param][nd_idxs, ]
nda = self.get_parameter_data(param, nd_idxs, is_training)
if NOISE_TRAINING and is_training:
nda = normalize(nda, param, mean_std_dct, add_noise=True, noise_scale=0.01, seed=42)
else:
......@@ -266,8 +259,7 @@ class IcingIntensityNN:
data = data.astype(np.float32)
data = np.transpose(data, axes=(1, 2, 3, 0))
label = h5f['icing_intensity'][nd_idxs]
label = label.astype(np.int32)
label = self.get_label_data(nd_idxs, is_training)
label = np.where(label == -1, 0, label)
# binary, two class
......@@ -299,6 +291,23 @@ class IcingIntensityNN:
nda = h5f[param][nd_idxs,]
return nda
def get_label_data(self, nd_idxs, is_training):
# Note: labels will be same for nd_idxs across both L1B and L2
if is_training:
if self.h5f_l1b_trn is not None:
h5f = self.h5f_l1b_trn
else:
h5f = self.h5f_l2_trn
else:
if self.h5f_l1b_tst is not None:
h5f = self.h5f_l1b_tst
else:
h5f = self.h5f_l2_tst
label = h5f['icing_intensity'][nd_idxs]
label = label.astype(np.int32)
return label
def get_in_mem_data_batch_train(self, idxs):
return self.get_in_mem_data_batch(idxs, True)
......@@ -365,21 +374,33 @@ class IcingIntensityNN:
dataset = dataset.cache()
self.eval_dataset = dataset
def setup_pipeline(self, filename_trn, filename_tst, trn_idxs=None, tst_idxs=None, seed=None):
self.filename_trn = filename_trn
self.h5f_trn = h5py.File(filename_trn, 'r')
self.filename_tst = filename_tst
self.h5f_tst = h5py.File(filename_tst, 'r')
def setup_pipeline(self, filename_l1b_trn, filename_l1b_tst, filename_l2_trn, filename_l2_tst, trn_idxs=None, tst_idxs=None, seed=None):
if filename_l1b_trn is not None:
self.h5f_l1b_trn = h5py.File(filename_l1b_trn, 'r')
if filename_l1b_tst is not None:
self.h5f_l1b_tst = h5py.File(filename_l1b_tst, 'r')
if filename_l2_trn is not None:
self.h5f_l2_trn = h5py.File(filename_l2_trn, 'r')
if filename_l2_tst is not None:
self.h5f_l2_tst = h5py.File(filename_l2_tst, 'r')
if trn_idxs is None:
time = self.h5f_trn['time']
# Note: time is same across both L1B and L2 for idxs
if self.h5f_l1b_trn is not None:
h5f = self.h5f_l1b_trn
else:
h5f = self.h5f_l2_trn
time = h5f['time']
trn_idxs = np.arange(time.shape[0])
if seed is not None:
np.random.seed(seed)
np.random.shuffle(trn_idxs)
time = self.h5f_tst['time']
if self.h5f_l1b_tst is not None:
h5f = self.h5f_l1b_tst
else:
h5f = self.h5f_l2_tst
time = h5f['time']
tst_idxs = np.arange(time.shape[0])
if seed is not None:
np.random.seed(seed)
......@@ -395,11 +416,18 @@ class IcingIntensityNN:
print('num test samples: ', tst_idxs.shape[0])
print('setup_pipeline: Done')
def setup_test_pipeline(self, filename, seed=None, shuffle=False):
self.filename_tst = filename
self.h5f_tst = h5py.File(filename, 'r')
def setup_test_pipeline(self, filename_l1b, filename_l2, seed=None, shuffle=False):
time = self.h5f_tst['time']
if filename_l1b is not None:
self.h5f_l1b_tst = h5py.File(filename_l1b, 'r')
if filename_l2 is not None:
self.h5f_l2_tst = h5py.File(filename_l2, 'r')
if self.h5f_l1b_tst is not None:
h5f = self.h5f_l1b_tst
else:
h5f = self.h5f_l2_tst
time = h5f['time']
tst_idxs = np.arange(time.shape[0])
self.num_data_samples = len(tst_idxs)
if seed is not None:
......@@ -800,8 +828,14 @@ class IcingIntensityNN:
self.writer_train.close()
self.writer_valid.close()
self.h5f_trn.close()
self.h5f_tst.close()
if self.h5f_l1b_trn is not None:
self.h5f_l1b_trn.close()
if self.h5f_l1b_tst is not None:
self.h5f_l1b_tst.close()
if self.h5f_l2_trn is not None:
self.h5f_l2_trn.close()
if self.h5f_l2_tst is not None:
self.h5f_l2_tst.close()
f = open('/home/rink/best_stats_'+now+'.pkl', 'wb')
pickle.dump((best_test_loss, best_test_acc, best_test_recall, best_test_precision, best_test_auc, best_test_f1, best_test_mcc), f)
......@@ -871,21 +905,25 @@ class IcingIntensityNN:
preds = np.argmax(preds, axis=1)
self.test_preds = preds
def run(self, filename_trn, filename_tst):
def run(self, filename_l1b_trn, filename_l1b_tst, filename_l2_trn, filename_l2_tst):
with tf.device('/device:GPU:'+str(self.gpu_device)):
self.setup_pipeline(filename_trn, filename_tst)
self.setup_pipeline(filename_l1b_trn, filename_l1b_tst, filename_l2_trn, filename_l2_tst)
self.build_model()
self.build_training()
self.build_evaluation()
self.do_training()
def run_restore(self, filename_tst, ckpt_dir):
self.setup_test_pipeline(filename_tst)
def run_restore(self, filename_l1b, filename_l2, ckpt_dir):
self.setup_test_pipeline(filename_l1b, filename_l2)
self.build_model()
self.build_training()
self.build_evaluation()
self.restore(ckpt_dir)
self.h5f_tst.close()
if self.h5f_l1b_tst is not None:
self.h5f_l1b_tst.close()
if self.h5f_l2_tst is not None:
self.h5f_l2_tst.close()
def run_evaluate(self, filename, ckpt_dir):
data_dct, ll, cc = make_for_full_domain_predict(filename, name_list=train_params)
......@@ -896,7 +934,7 @@ class IcingIntensityNN:
self.do_evaluate(ckpt_dir)
def run_restore_static(filename_tst, ckpt_dir_s_path):
def run_restore_static(filename_l1b, filename_l2, ckpt_dir_s_path):
ckpt_dir_s = os.listdir(ckpt_dir_s_path)
cm_s = []
for ckpt in ckpt_dir_s:
......@@ -904,7 +942,7 @@ def run_restore_static(filename_tst, ckpt_dir_s_path):
if not os.path.isdir(ckpt_dir):
continue
nn = IcingIntensityNN()
nn.run_restore(filename_tst, ckpt_dir)
nn.run_restore(filename_l1b, filename_l2, ckpt_dir)
cm_s.append(tf.math.confusion_matrix(nn.test_labels.flatten(), nn.test_preds.flatten()))
num = len(cm_s)
cm_avg = cm_s[0]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment