diff --git a/modules/deeplearning/srcnn_cld_frac.py b/modules/deeplearning/srcnn_cld_frac.py index 3cf7d5bd494169bd20be89646d34a32f1b897a40..5d111203a80254b40dee81afd0626415e378af30 100644 --- a/modules/deeplearning/srcnn_cld_frac.py +++ b/modules/deeplearning/srcnn_cld_frac.py @@ -331,8 +331,6 @@ class SRCNN: else: # Half res upsampled to full res: tmp = upsample(tmp) tmp = normalize(tmp, param, mean_std_dct) - if DO_ADD_NOISE: - tmp = add_noise(tmp, noise_scale=NOISE_STDDEV) data_norm.append(tmp) for param in data_params_full: @@ -343,8 +341,6 @@ class SRCNN: # Full res: tmp = tmp[:, slc_y, slc_x] tmp = normalize(tmp, param, mean_std_dct) - if DO_ADD_NOISE: - tmp = add_noise(tmp, noise_scale=NOISE_STDDEV) data_norm.append(tmp) # --------------------------------------------------- tmp = input_data[:, label_idx, :, :] @@ -359,13 +355,6 @@ class SRCNN: tmp = tmp[:, slc_y, slc_x] if label_param != 'cloud_probability': tmp = normalize(tmp, label_param, mean_std_dct) - if DO_ADD_NOISE: - tmp = add_noise(tmp, noise_scale=NOISE_STDDEV) - else: - if DO_ADD_NOISE: - tmp = add_noise(tmp, noise_scale=NOISE_STDDEV) - tmp = np.where(tmp < 0.0, 0.0, tmp) - tmp = np.where(tmp > 1.0, 1.0, tmp) data_norm.append(tmp) # --------- data = np.stack(data_norm, axis=3) @@ -562,12 +551,11 @@ class SRCNN: self.train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') self.test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy') - @tf.function - def train_step(self, mini_batch): - inputs = [mini_batch[0]] - labels = mini_batch[1] + @tf.function(input_signature=[tf.TensorSpec(None, tf.float32), tf.TensorSpec(None, tf.float32)]) + def train_step(self, inputs, labels): + labels = tf.squeeze(labels, axis=[3]) with tf.GradientTape() as tape: - pred = self.model(inputs, training=True) + pred = self.model([inputs], training=True) loss = self.loss(labels, pred) total_loss = loss if len(self.model.losses) > 0: @@ -583,20 +571,20 @@ class SRCNN: return loss - @tf.function - def test_step(self, mini_batch): - inputs = [mini_batch[0]] - labels = mini_batch[1] - pred = self.model(inputs, training=False) + @tf.function(input_signature=[tf.TensorSpec(None, tf.float32), tf.TensorSpec(None, tf.float32)]) + def test_step(self, inputs, labels): + labels = tf.squeeze(labels, axis=[3]) + pred = self.model([inputs], training=False) t_loss = self.loss(labels, pred) self.test_loss(t_loss) self.test_accuracy(labels, pred) - def predict(self, mini_batch): - inputs = [mini_batch[0]] - labels = mini_batch[1] - pred = self.model(inputs, training=False) + # @tf.function(input_signature=[tf.TensorSpec(None, tf.float32), tf.TensorSpec(None, tf.float32)]) + # decorator commented out because pred.numpy(): pred not evaluated yet. + def predict(self, inputs, labels): + pred = self.model([inputs], training=False) + # t_loss = self.loss(tf.squeeze(labels), pred) t_loss = self.loss(labels, pred) self.test_labels.append(labels) @@ -659,7 +647,7 @@ class SRCNN: trn_ds = trn_ds.batch(BATCH_SIZE) for mini_batch in trn_ds: if self.learningRateSchedule is not None: - loss = self.train_step(mini_batch) + loss = self.train_step(mini_batch[0], mini_batch[1]) if (step % 100) == 0: @@ -674,7 +662,7 @@ class SRCNN: tst_ds = tf.data.Dataset.from_tensor_slices((data_tst, label_tst)) tst_ds = tst_ds.batch(BATCH_SIZE) for mini_batch_test in tst_ds: - self.test_step(mini_batch_test) + self.test_step(mini_batch_test[0], mini_batch_test[0]) with self.writer_valid.as_default(): tf.summary.scalar('loss_val', self.test_loss.result(), step=step) @@ -703,7 +691,7 @@ class SRCNN: ds = tf.data.Dataset.from_tensor_slices((data, label)) ds = ds.batch(BATCH_SIZE) for mini_batch in ds: - self.test_step(mini_batch) + self.test_step(mini_batch[0], mini_batch[1]) print('loss, acc: ', self.test_loss.result().numpy(), self.test_accuracy.result().numpy()) print('------------------------------------------------------') @@ -980,6 +968,132 @@ def analyze(file='/Users/tomrink/cld_opd_frac.npy'): [precision_0, precision_1, precision_2], [mcc_0, mcc_1, mcc_2] +def analyze_5cat(file='/Users/tomrink/cld_opd_frac.npy'): + + tup = np.load(file, allow_pickle=True) + lbls = tup[0] + pred = tup[1] + # prob_0 = tup[2] + # prob_1 = tup[3] + # prob_2 = tup[4] + + lbls = lbls.flatten() + pred = pred.flatten() + np.histogram(lbls, bins=5) + np.histogram(pred, bins=5) + + new_lbls = np.zeros(lbls.size, dtype=np.int32) + new_pred = np.zeros(pred.size, dtype=np.int32) + + new_lbls[lbls == 0] = 0 + new_lbls[lbls == 1] = 1 + new_lbls[lbls == 2] = 1 + new_lbls[lbls == 3] = 1 + new_lbls[lbls == 4] = 2 + + new_pred[pred == 0] = 0 + new_pred[pred == 1] = 1 + new_pred[pred == 2] = 1 + new_pred[pred == 3] = 1 + new_pred[pred == 4] = 2 + + np.histogram(new_lbls, bins=3) + np.histogram(new_pred, bins=3) + + lbls = new_lbls + pred = new_pred + + print(np.sum(lbls == 0), np.sum(lbls == 1), np.sum(lbls == 2)) + + msk_0_1 = lbls != 2 + msk_1_2 = lbls != 0 + msk_0_2 = lbls != 1 + + lbls_0_1 = lbls[msk_0_1] + + pred_0_1 = pred[msk_0_1] + pred_0_1 = np.where(pred_0_1 == 2, 1, pred_0_1) + + # ---- + lbls_1_2 = lbls[msk_1_2] + lbls_1_2 = np.where(lbls_1_2 == 1, 0, lbls_1_2) + lbls_1_2 = np.where(lbls_1_2 == 2, 1, lbls_1_2) + + pred_1_2 = pred[msk_1_2] + pred_1_2 = np.where(pred_1_2 == 0, -9, pred_1_2) + pred_1_2 = np.where(pred_1_2 == 1, 0, pred_1_2) + pred_1_2 = np.where(pred_1_2 == 2, 1, pred_1_2) + pred_1_2 = np.where(pred_1_2 == -9, 1, pred_1_2) + + # ---- + lbls_0_2 = lbls[msk_0_2] + lbls_0_2 = np.where(lbls_0_2 == 2, 1, lbls_0_2) + + pred_0_2 = pred[msk_0_2] + pred_0_2 = np.where(pred_0_2 == 2, 1, pred_0_2) + + cm_0_1 = confusion_matrix_values(lbls_0_1, pred_0_1) + cm_1_2 = confusion_matrix_values(lbls_1_2, pred_1_2) + cm_0_2 = confusion_matrix_values(lbls_0_2, pred_0_2) + + true_0_1 = (lbls_0_1 == 0) & (pred_0_1 == 0) + false_0_1 = (lbls_0_1 == 1) & (pred_0_1 == 0) + + true_no_0_1 = (lbls_0_1 == 1) & (pred_0_1 == 1) + false_no_0_1 = (lbls_0_1 == 0) & (pred_0_1 == 1) + + true_0_2 = (lbls_0_2 == 0) & (pred_0_2 == 0) + false_0_2 = (lbls_0_2 == 1) & (pred_0_2 == 0) + + true_no_0_2 = (lbls_0_2 == 1) & (pred_0_2 == 1) + false_no_0_2 = (lbls_0_2 == 0) & (pred_0_2 == 1) + + true_1_2 = (lbls_1_2 == 0) & (pred_1_2 == 0) + false_1_2 = (lbls_1_2 == 1) & (pred_1_2 == 0) + + true_no_1_2 = (lbls_1_2 == 1) & (pred_1_2 == 1) + false_no_1_2 = (lbls_1_2 == 0) & (pred_1_2 == 1) + + tp_0 = np.sum(true_0_1).astype(np.float64) + tp_1 = np.sum(true_1_2).astype(np.float64) + tp_2 = np.sum(true_0_2).astype(np.float64) + + tn_0 = np.sum(true_no_0_1).astype(np.float64) + tn_1 = np.sum(true_no_1_2).astype(np.float64) + tn_2 = np.sum(true_no_0_2).astype(np.float64) + + fp_0 = np.sum(false_0_1).astype(np.float64) + fp_1 = np.sum(false_1_2).astype(np.float64) + fp_2 = np.sum(false_0_2).astype(np.float64) + + fn_0 = np.sum(false_no_0_1).astype(np.float64) + fn_1 = np.sum(false_no_1_2).astype(np.float64) + fn_2 = np.sum(false_no_0_2).astype(np.float64) + + recall_0 = tp_0 / (tp_0 + fn_0) + recall_1 = tp_1 / (tp_1 + fn_1) + recall_2 = tp_2 / (tp_2 + fn_2) + + precision_0 = tp_0 / (tp_0 + fp_0) + precision_1 = tp_1 / (tp_1 + fp_1) + precision_2 = tp_2 / (tp_2 + fp_2) + + mcc_0 = ((tp_0 * tn_0) - (fp_0 * fn_0)) / np.sqrt((tp_0 + fp_0) * (tp_0 + fn_0) * (tn_0 + fp_0) * (tn_0 + fn_0)) + mcc_1 = ((tp_1 * tn_1) - (fp_1 * fn_1)) / np.sqrt((tp_1 + fp_1) * (tp_1 + fn_1) * (tn_1 + fp_1) * (tn_1 + fn_1)) + mcc_2 = ((tp_2 * tn_2) - (fp_2 * fn_2)) / np.sqrt((tp_2 + fp_2) * (tp_2 + fn_2) * (tn_2 + fp_2) * (tn_2 + fn_2)) + + acc_0 = np.sum(lbls_0_1 == pred_0_1)/pred_0_1.size + acc_1 = np.sum(lbls_1_2 == pred_1_2)/pred_1_2.size + acc_2 = np.sum(lbls_0_2 == pred_0_2)/pred_0_2.size + + print(acc_0, recall_0, precision_0, mcc_0) + print(acc_1, recall_1, precision_1, mcc_1) + print(acc_2, recall_2, precision_2, mcc_2) + + return cm_0_1, cm_1_2, cm_0_2, [acc_0, acc_1, acc_2], [recall_0, recall_1, recall_2],\ + [precision_0, precision_1, precision_2], [mcc_0, mcc_1, mcc_2], lbls, pred + + if __name__ == "__main__": nn = SRCNN() nn.run('matchup_filename')