Skip to content
Snippets Groups Projects
Commit d380bb20 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent 4ba3e35d
Branches
No related tags found
No related merge requests found
...@@ -44,7 +44,7 @@ f = open(mean_std_file, 'rb') ...@@ -44,7 +44,7 @@ f = open(mean_std_file, 'rb')
mean_std_dct_l2 = pickle.load(f) mean_std_dct_l2 = pickle.load(f)
f.close() f.close()
mean_std_file = homedir+'data/icing/mean_std_no_ice.pkl' mean_std_file = homedir+'data/icing/mean_std_l1b_no_ice.pkl'
f = open(mean_std_file, 'rb') f = open(mean_std_file, 'rb')
mean_std_dct_l1b = pickle.load(f) mean_std_dct_l1b = pickle.load(f)
f.close() f.close()
...@@ -70,6 +70,7 @@ train_params_l1b = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'te ...@@ -70,6 +70,7 @@ train_params_l1b = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'te
# train_params_l1b = ['cld_height_acha', 'cld_geo_thick', 'cld_temp_acha', 'cld_press_acha', 'supercooled_cloud_fraction', # train_params_l1b = ['cld_height_acha', 'cld_geo_thick', 'cld_temp_acha', 'cld_press_acha', 'supercooled_cloud_fraction',
# 'cld_emiss_acha', 'conv_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp', 'lwc_dcomp'] # 'cld_emiss_acha', 'conv_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp', 'lwc_dcomp']
# --------------------------------------------- # ---------------------------------------------
train_params = train_params_l1b train_params = train_params_l1b
# -- Zero out params (Experimentation Only) ------------ # -- Zero out params (Experimentation Only) ------------
zero_out_params = ['cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp', 'lwc_dcomp'] zero_out_params = ['cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp', 'lwc_dcomp']
...@@ -138,11 +139,6 @@ class IcingIntensityNN: ...@@ -138,11 +139,6 @@ class IcingIntensityNN:
self.handle = None self.handle = None
self.inner_handle = None self.inner_handle = None
self.in_mem_batch = None self.in_mem_batch = None
self.filename_trn = None
self.h5f_trn = None
self.filename_tst = None
self.h5f_tst = None
self.h5f_l1b = None
self.h5f_l1b_trn = None self.h5f_l1b_trn = None
self.h5f_l1b_tst = None self.h5f_l1b_tst = None
...@@ -234,9 +230,6 @@ class IcingIntensityNN: ...@@ -234,9 +230,6 @@ class IcingIntensityNN:
print(e) print(e)
def get_in_mem_data_batch(self, idxs, is_training): def get_in_mem_data_batch(self, idxs, is_training):
h5f = self.h5f_trn
if not is_training:
h5f = self.h5f_tst
if CACHE_DATA_IN_MEM: if CACHE_DATA_IN_MEM:
key = frozenset(idxs) key = frozenset(idxs)
...@@ -250,7 +243,7 @@ class IcingIntensityNN: ...@@ -250,7 +243,7 @@ class IcingIntensityNN:
data = [] data = []
for param in train_params: for param in train_params:
nda = h5f[param][nd_idxs, ] nda = self.get_parameter_data(param, nd_idxs, is_training)
if NOISE_TRAINING and is_training: if NOISE_TRAINING and is_training:
nda = normalize(nda, param, mean_std_dct, add_noise=True, noise_scale=0.01, seed=42) nda = normalize(nda, param, mean_std_dct, add_noise=True, noise_scale=0.01, seed=42)
else: else:
...@@ -266,8 +259,7 @@ class IcingIntensityNN: ...@@ -266,8 +259,7 @@ class IcingIntensityNN:
data = data.astype(np.float32) data = data.astype(np.float32)
data = np.transpose(data, axes=(1, 2, 3, 0)) data = np.transpose(data, axes=(1, 2, 3, 0))
label = h5f['icing_intensity'][nd_idxs] label = self.get_label_data(nd_idxs, is_training)
label = label.astype(np.int32)
label = np.where(label == -1, 0, label) label = np.where(label == -1, 0, label)
# binary, two class # binary, two class
...@@ -299,6 +291,23 @@ class IcingIntensityNN: ...@@ -299,6 +291,23 @@ class IcingIntensityNN:
nda = h5f[param][nd_idxs,] nda = h5f[param][nd_idxs,]
return nda return nda
def get_label_data(self, nd_idxs, is_training):
# Note: labels will be same for nd_idxs across both L1B and L2
if is_training:
if self.h5f_l1b_trn is not None:
h5f = self.h5f_l1b_trn
else:
h5f = self.h5f_l2_trn
else:
if self.h5f_l1b_tst is not None:
h5f = self.h5f_l1b_tst
else:
h5f = self.h5f_l2_tst
label = h5f['icing_intensity'][nd_idxs]
label = label.astype(np.int32)
return label
def get_in_mem_data_batch_train(self, idxs): def get_in_mem_data_batch_train(self, idxs):
return self.get_in_mem_data_batch(idxs, True) return self.get_in_mem_data_batch(idxs, True)
...@@ -365,21 +374,33 @@ class IcingIntensityNN: ...@@ -365,21 +374,33 @@ class IcingIntensityNN:
dataset = dataset.cache() dataset = dataset.cache()
self.eval_dataset = dataset self.eval_dataset = dataset
def setup_pipeline(self, filename_trn, filename_tst, trn_idxs=None, tst_idxs=None, seed=None): def setup_pipeline(self, filename_l1b_trn, filename_l1b_tst, filename_l2_trn, filename_l2_tst, trn_idxs=None, tst_idxs=None, seed=None):
self.filename_trn = filename_trn if filename_l1b_trn is not None:
self.h5f_trn = h5py.File(filename_trn, 'r') self.h5f_l1b_trn = h5py.File(filename_l1b_trn, 'r')
if filename_l1b_tst is not None:
self.filename_tst = filename_tst self.h5f_l1b_tst = h5py.File(filename_l1b_tst, 'r')
self.h5f_tst = h5py.File(filename_tst, 'r') if filename_l2_trn is not None:
self.h5f_l2_trn = h5py.File(filename_l2_trn, 'r')
if filename_l2_tst is not None:
self.h5f_l2_tst = h5py.File(filename_l2_tst, 'r')
if trn_idxs is None: if trn_idxs is None:
time = self.h5f_trn['time'] # Note: time is same across both L1B and L2 for idxs
if self.h5f_l1b_trn is not None:
h5f = self.h5f_l1b_trn
else:
h5f = self.h5f_l2_trn
time = h5f['time']
trn_idxs = np.arange(time.shape[0]) trn_idxs = np.arange(time.shape[0])
if seed is not None: if seed is not None:
np.random.seed(seed) np.random.seed(seed)
np.random.shuffle(trn_idxs) np.random.shuffle(trn_idxs)
time = self.h5f_tst['time'] if self.h5f_l1b_tst is not None:
h5f = self.h5f_l1b_tst
else:
h5f = self.h5f_l2_tst
time = h5f['time']
tst_idxs = np.arange(time.shape[0]) tst_idxs = np.arange(time.shape[0])
if seed is not None: if seed is not None:
np.random.seed(seed) np.random.seed(seed)
...@@ -395,11 +416,18 @@ class IcingIntensityNN: ...@@ -395,11 +416,18 @@ class IcingIntensityNN:
print('num test samples: ', tst_idxs.shape[0]) print('num test samples: ', tst_idxs.shape[0])
print('setup_pipeline: Done') print('setup_pipeline: Done')
def setup_test_pipeline(self, filename, seed=None, shuffle=False): def setup_test_pipeline(self, filename_l1b, filename_l2, seed=None, shuffle=False):
self.filename_tst = filename
self.h5f_tst = h5py.File(filename, 'r')
time = self.h5f_tst['time'] if filename_l1b is not None:
self.h5f_l1b_tst = h5py.File(filename_l1b, 'r')
if filename_l2 is not None:
self.h5f_l2_tst = h5py.File(filename_l2, 'r')
if self.h5f_l1b_tst is not None:
h5f = self.h5f_l1b_tst
else:
h5f = self.h5f_l2_tst
time = h5f['time']
tst_idxs = np.arange(time.shape[0]) tst_idxs = np.arange(time.shape[0])
self.num_data_samples = len(tst_idxs) self.num_data_samples = len(tst_idxs)
if seed is not None: if seed is not None:
...@@ -800,8 +828,14 @@ class IcingIntensityNN: ...@@ -800,8 +828,14 @@ class IcingIntensityNN:
self.writer_train.close() self.writer_train.close()
self.writer_valid.close() self.writer_valid.close()
self.h5f_trn.close() if self.h5f_l1b_trn is not None:
self.h5f_tst.close() self.h5f_l1b_trn.close()
if self.h5f_l1b_tst is not None:
self.h5f_l1b_tst.close()
if self.h5f_l2_trn is not None:
self.h5f_l2_trn.close()
if self.h5f_l2_tst is not None:
self.h5f_l2_tst.close()
f = open('/home/rink/best_stats_'+now+'.pkl', 'wb') f = open('/home/rink/best_stats_'+now+'.pkl', 'wb')
pickle.dump((best_test_loss, best_test_acc, best_test_recall, best_test_precision, best_test_auc, best_test_f1, best_test_mcc), f) pickle.dump((best_test_loss, best_test_acc, best_test_recall, best_test_precision, best_test_auc, best_test_f1, best_test_mcc), f)
...@@ -871,21 +905,25 @@ class IcingIntensityNN: ...@@ -871,21 +905,25 @@ class IcingIntensityNN:
preds = np.argmax(preds, axis=1) preds = np.argmax(preds, axis=1)
self.test_preds = preds self.test_preds = preds
def run(self, filename_trn, filename_tst): def run(self, filename_l1b_trn, filename_l1b_tst, filename_l2_trn, filename_l2_tst):
with tf.device('/device:GPU:'+str(self.gpu_device)): with tf.device('/device:GPU:'+str(self.gpu_device)):
self.setup_pipeline(filename_trn, filename_tst) self.setup_pipeline(filename_l1b_trn, filename_l1b_tst, filename_l2_trn, filename_l2_tst)
self.build_model() self.build_model()
self.build_training() self.build_training()
self.build_evaluation() self.build_evaluation()
self.do_training() self.do_training()
def run_restore(self, filename_tst, ckpt_dir): def run_restore(self, filename_l1b, filename_l2, ckpt_dir):
self.setup_test_pipeline(filename_tst) self.setup_test_pipeline(filename_l1b, filename_l2)
self.build_model() self.build_model()
self.build_training() self.build_training()
self.build_evaluation() self.build_evaluation()
self.restore(ckpt_dir) self.restore(ckpt_dir)
self.h5f_tst.close()
if self.h5f_l1b_tst is not None:
self.h5f_l1b_tst.close()
if self.h5f_l2_tst is not None:
self.h5f_l2_tst.close()
def run_evaluate(self, filename, ckpt_dir): def run_evaluate(self, filename, ckpt_dir):
data_dct, ll, cc = make_for_full_domain_predict(filename, name_list=train_params) data_dct, ll, cc = make_for_full_domain_predict(filename, name_list=train_params)
...@@ -896,7 +934,7 @@ class IcingIntensityNN: ...@@ -896,7 +934,7 @@ class IcingIntensityNN:
self.do_evaluate(ckpt_dir) self.do_evaluate(ckpt_dir)
def run_restore_static(filename_tst, ckpt_dir_s_path): def run_restore_static(filename_l1b, filename_l2, ckpt_dir_s_path):
ckpt_dir_s = os.listdir(ckpt_dir_s_path) ckpt_dir_s = os.listdir(ckpt_dir_s_path)
cm_s = [] cm_s = []
for ckpt in ckpt_dir_s: for ckpt in ckpt_dir_s:
...@@ -904,7 +942,7 @@ def run_restore_static(filename_tst, ckpt_dir_s_path): ...@@ -904,7 +942,7 @@ def run_restore_static(filename_tst, ckpt_dir_s_path):
if not os.path.isdir(ckpt_dir): if not os.path.isdir(ckpt_dir):
continue continue
nn = IcingIntensityNN() nn = IcingIntensityNN()
nn.run_restore(filename_tst, ckpt_dir) nn.run_restore(filename_l1b, filename_l2, ckpt_dir)
cm_s.append(tf.math.confusion_matrix(nn.test_labels.flatten(), nn.test_preds.flatten())) cm_s.append(tf.math.confusion_matrix(nn.test_labels.flatten(), nn.test_preds.flatten()))
num = len(cm_s) num = len(cm_s)
cm_avg = cm_s[0] cm_avg = cm_s[0]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment