From f4b5cc59e591dec431788d8194d63f5f779a02ec Mon Sep 17 00:00:00 2001 From: tomrink <rink@ssec.wisc.edu> Date: Mon, 6 Feb 2023 21:21:37 -0600 Subject: [PATCH] snapshot... --- modules/deeplearning/cnn_cld_frac.py | 631 ++++++++++++++++----------- 1 file changed, 378 insertions(+), 253 deletions(-) diff --git a/modules/deeplearning/cnn_cld_frac.py b/modules/deeplearning/cnn_cld_frac.py index 68be6bd4..45275100 100644 --- a/modules/deeplearning/cnn_cld_frac.py +++ b/modules/deeplearning/cnn_cld_frac.py @@ -1,11 +1,16 @@ import glob import tensorflow as tf + +import util.util from util.setup import logdir, modeldir, cachepath, now, ancillary_path -from util.util import EarlyStop, normalize, denormalize, resample, resample_2d_linear, resample_one, resample_2d_linear_one, get_grid_values_all +from util.util import EarlyStop, normalize, denormalize, resample, resample_2d_linear, resample_one,\ + resample_2d_linear_one, get_grid_values_all, add_noise, smooth_2d, smooth_2d_single, median_filter_2d,\ + median_filter_2d_single, downscale_2x import os, datetime import numpy as np import pickle import h5py +from scipy.ndimage import gaussian_filter # L1B M/I-bands: /apollo/cloud/scratch/cwhite/VIIRS_HRES/2019/2019_01_01/ # CLAVRx: /apollo/cloud/scratch/Satellite_Output/VIIRS_HRES/2019/2019_01_01/ @@ -13,26 +18,29 @@ import h5py LOG_DEVICE_PLACEMENT = False -PROC_BATCH_SIZE = 2 +PROC_BATCH_SIZE = 4 PROC_BATCH_BUFFER_SIZE = 50000 -NumClasses = 5 +NumClasses = 3 if NumClasses == 2: NumLogits = 1 else: NumLogits = NumClasses BATCH_SIZE = 128 -NUM_EPOCHS = 60 +NUM_EPOCHS = 80 TRACK_MOVING_AVERAGE = False EARLY_STOP = True -NOISE_TRAINING = True +NOISE_TRAINING = False NOISE_STDDEV = 0.01 DO_AUGMENT = True +DO_SMOOTH = True +SIGMA = 1.0 DO_ZERO_OUT = False +DO_ESPCN = False # Note: If True, cannot do mixed resolution input fields (Adjust accordingly below) # setup scaling parameters dictionary mean_std_dct = {} @@ -49,38 +57,64 @@ f.close() mean_std_dct.update(mean_std_dct_l1b) mean_std_dct.update(mean_std_dct_l2) +IMG_DEPTH = 1 # label_param = 'cloud_fraction' # label_param = 'cld_opd_dcomp' label_param = 'cloud_probability' params = ['temp_11_0um_nom', 'temp_12_0um_nom', 'refl_0_65um_nom', label_param] -data_params = ['temp_11_0um_nom'] +data_params_half = ['temp_11_0um_nom'] +data_params_full = ['refl_0_65um_nom'] label_idx = params.index(label_param) -print('data_params: ', data_params) +print('data_params_half: ', data_params_half) +print('data_params_full: ', data_params_full) print('label_param: ', label_param) - -x_138 = np.arange(138) -y_138 = np.arange(138) - -slc_x_2 = slice(0, 138, 2) -slc_y_2 = slice(0, 138, 2) -slc_x = slice(1, 67) -slc_y = slice(1, 67) - -lbl_slc_x = slice(4, 132) -lbl_slc_y = slice(4, 132) +KERNEL_SIZE = 3 # target size: (128, 128) +N = 1 + +if KERNEL_SIZE == 3: + slc_x = slice(2, N*128 + 4) + slc_y = slice(2, N*128 + 4) + slc_x_2 = slice(1, N*128 + 6, 2) + slc_y_2 = slice(1, N*128 + 6, 2) + x_2 = np.arange(int((N*128)/2) + 3) + y_2 = np.arange(int((N*128)/2) + 3) + t = np.arange(0, int((N*128)/2) + 3, 0.5) + s = np.arange(0, int((N*128)/2) + 3, 0.5) + x_k = slice(1, N*128 + 3) + y_k = slice(1, N*128 + 3) + x_128 = slice(3, N*128 + 3) + y_128 = slice(3, N*128 + 3) +elif KERNEL_SIZE == 5: + slc_x = slice(3, 135) + slc_y = slice(3, 135) + slc_x_2 = slice(2, 137, 2) + slc_y_2 = slice(2, 137, 2) + x_128 = slice(5, 133) + y_128 = slice(5, 133) + t = np.arange(1, 67, 0.5) + s = np.arange(1, 67, 0.5) + x_2 = np.arange(68) + y_2 = np.arange(68) +# ---------------------------------------- +# Exp for ESPCN version +if DO_ESPCN: + slc_x_2 = slice(0, 132, 2) + slc_y_2 = slice(0, 132, 2) + x_128 = slice(2, 130) + y_128 = slice(2, 130) def build_residual_conv2d_block(conv, num_filters, block_name, activation=tf.nn.relu, padding='SAME', - kernel_initializer='he_uniform', scale=None, - do_drop_out=True, drop_rate=0.5, do_batch_norm=False): + kernel_initializer='he_uniform', scale=None, kernel_size=3, + do_drop_out=True, drop_rate=0.5, do_batch_norm=True): with tf.name_scope(block_name): - skip = tf.keras.layers.Conv2D(num_filters, kernel_size=3, padding=padding, kernel_initializer=kernel_initializer, activation=activation)(conv) - skip = tf.keras.layers.Conv2D(num_filters, kernel_size=3, padding=padding, activation=None)(skip) + skip = tf.keras.layers.Conv2D(num_filters, kernel_size=kernel_size, padding=padding, kernel_initializer=kernel_initializer, activation=activation)(conv) + skip = tf.keras.layers.Conv2D(num_filters, kernel_size=kernel_size, padding=padding, activation=None)(skip) if scale is not None: skip = tf.keras.layers.Lambda(lambda x: x * scale)(skip) @@ -97,33 +131,93 @@ def build_residual_conv2d_block(conv, num_filters, block_name, activation=tf.nn. return conv -def build_residual_block_1x1(input_layer, num_filters, activation, block_name, padding='SAME', drop_rate=0.5, - do_drop_out=True, do_batch_norm=True): - - with tf.name_scope(block_name): - skip = input_layer - - if do_drop_out: - input_layer = tf.keras.layers.Dropout(drop_rate)(input_layer) - if do_batch_norm: - input_layer = tf.keras.layers.BatchNormalization()(input_layer) - conv = tf.keras.layers.Conv2D(num_filters, kernel_size=1, strides=1, padding=padding, activation=activation)(input_layer) - print(conv.shape) - - if do_drop_out: - conv = tf.keras.layers.Dropout(drop_rate)(conv) - if do_batch_norm: - conv = tf.keras.layers.BatchNormalization()(conv) - conv = tf.keras.layers.Conv2D(num_filters, kernel_size=1, strides=1, padding=padding, activation=None)(conv) - - conv = conv + skip - conv = tf.keras.layers.LeakyReLU()(conv) - print(conv.shape) +def build_residual_block_conv2d_down2x(x_in, num_filters, activation, padding='SAME', drop_rate=0.5, + do_drop_out=True, do_batch_norm=True): + skip = x_in + + conv = tf.keras.layers.Conv2D(num_filters, kernel_size=3, strides=1, padding=padding, activation=activation)(x_in) + conv = tf.keras.layers.MaxPool2D(padding=padding)(conv) + if do_drop_out: + conv = tf.keras.layers.Dropout(drop_rate)(conv) + if do_batch_norm: + conv = tf.keras.layers.BatchNormalization()(conv) + + conv = tf.keras.layers.Conv2D(num_filters, kernel_size=3, strides=1, padding=padding, activation=activation)(conv) + if do_drop_out: + conv = tf.keras.layers.Dropout(drop_rate)(conv) + if do_batch_norm: + conv = tf.keras.layers.BatchNormalization()(conv) + + conv = tf.keras.layers.Conv2D(num_filters, kernel_size=3, strides=1, padding=padding, activation=activation)(conv) + if do_drop_out: + conv = tf.keras.layers.Dropout(drop_rate)(conv) + if do_batch_norm: + conv = tf.keras.layers.BatchNormalization()(conv) + + skip = tf.keras.layers.Conv2D(num_filters, kernel_size=3, strides=1, padding=padding, activation=None)(skip) + skip = tf.keras.layers.MaxPool2D(padding=padding)(skip) + if do_drop_out: + skip = tf.keras.layers.Dropout(drop_rate)(skip) + if do_batch_norm: + skip = tf.keras.layers.BatchNormalization()(skip) + + conv = conv + skip + conv = tf.keras.layers.LeakyReLU()(conv) + print(conv.shape) return conv -class CNN: +def upsample(tmp): + tmp = tmp[:, slc_y_2, slc_x_2] + tmp = resample_2d_linear(x_2, y_2, tmp, t, s) + tmp = tmp[:, y_k, x_k] + return tmp + + +def upsample_nearest(tmp): + bsize = tmp.shape[0] + tmp_2 = tmp[:, slc_y_2, slc_x_2] + up = np.zeros(bsize, t.size, s.size) + for k in range(bsize): + for j in range(t.size/2): + for i in range(s.size/2): + up[k, j, i] = tmp_2[k, j, i] + up[k, j, i+1] = tmp_2[k, j, i] + up[k, j+1, i] = tmp_2[k, j, i] + up[k, j+1, i+1] = tmp_2[k, j, i] + return up + + +def get_label_data(grd_k): + grd_k = np.where(np.isnan(grd_k), 0, grd_k) + grd_k = np.where(grd_k < 0.5, 0, 1) + + a = grd_k[:, 0::2, 0::2] + b = grd_k[:, 1::2, 0::2] + c = grd_k[:, 0::2, 1::2] + d = grd_k[:, 1::2, 1::2] + s_t = a + b + c + d + #s_t = np.where(s_t == 0, 0, s_t) + #s_t = np.where(s_t == 1, 1, s_t) + #s_t = np.where(s_t == 2, 1, s_t) + #s_t = np.where(s_t == 3, 1, s_t) + #s_t = np.where(s_t == 4, 2, s_t) + s_t /= 4.0 + + return s_t + + +# def get_label_data(grd_k): +# grd_k = np.where(np.isnan(grd_k), 0, grd_k) +# grd_k = np.where((grd_k >= 0.0) & (grd_k < 0.3), 0, grd_k) +# grd_k = np.where((grd_k >= 0.3) & (grd_k < 0.7), 1, grd_k) +# grd_k = np.where((grd_k >= 0.7) & (grd_k <= 1.0), 2, grd_k) +# +# return grd_k + + +class SRCNN: def __init__(self): @@ -219,12 +313,9 @@ class CNN: self.test_data_nda = None self.test_label_nda = None - self.n_chans = len(data_params) + 1 + self.n_chans = len(data_params_half) + len(data_params_full) + 1 self.X_img = tf.keras.Input(shape=(None, None, self.n_chans)) - # self.X_img = tf.keras.Input(shape=(36, 36, self.n_chans)) - # self.X_img = tf.keras.Input(shape=(34, 34, self.n_chans)) - # self.X_img = tf.keras.Input(shape=(66, 66, self.n_chans)) self.inputs.append(self.X_img) @@ -239,48 +330,87 @@ class CNN: data_s = [] for k in idxs: f = files[k] - nda = np.load(f) + try: + nda = np.load(f) + except Exception: + print(f) + continue data_s.append(nda) input_data = np.concatenate(data_s) - add_noise = None - noise_scale = None + DO_ADD_NOISE = False if is_training and NOISE_TRAINING: - add_noise = True - noise_scale = NOISE_STDDEV + DO_ADD_NOISE = True data_norm = [] - for param in data_params: + for param in data_params_half: idx = params.index(param) - tmp = input_data[:, idx, slc_y_2, slc_x_2] + tmp = input_data[:, idx, :, :] + tmp = tmp.copy() + tmp = np.where(np.isnan(tmp), 0, tmp) + if DO_ESPCN: + tmp = tmp[:, slc_y_2, slc_x_2] + else: # Half res upsampled to full res: + tmp = upsample(tmp) + tmp = normalize(tmp, param, mean_std_dct) + if DO_ADD_NOISE: + tmp = add_noise(tmp, noise_scale=NOISE_STDDEV) + data_norm.append(tmp) + + for param in data_params_full: + idx = params.index(param) + tmp = input_data[:, idx, :, :] + tmp = tmp.copy() + tmp = np.where(np.isnan(tmp), 0, tmp) + # Full res: tmp = tmp[:, slc_y, slc_x] - tmp = normalize(tmp, param, mean_std_dct, add_noise=add_noise, noise_scale=noise_scale) - # tmp = resample_2d_linear(x_64, y_64, tmp, t, s) + tmp = normalize(tmp, param, mean_std_dct) + if DO_ADD_NOISE: + tmp = add_noise(tmp, noise_scale=NOISE_STDDEV) data_norm.append(tmp) - # -------------------------- - param = 'refl_0_65um_nom' - idx = params.index(param) - tmp = input_data[:, idx, slc_y_2, slc_x_2] - tmp = tmp[:, slc_y, slc_x] - tmp = normalize(tmp, param, mean_std_dct, add_noise=add_noise, noise_scale=noise_scale) - # tmp = resample_2d_linear(x_64, y_64, tmp, t, s) + # --------------------------------------------------- + tmp = input_data[:, label_idx, :, :] + tmp = tmp.copy() + tmp = np.where(np.isnan(tmp), 0, tmp) + if DO_SMOOTH: + tmp = smooth_2d(tmp, sigma=SIGMA) + # tmp = median_filter_2d(tmp) + if DO_ESPCN: + tmp = tmp[:, slc_y_2, slc_x_2] + else: # Half res upsampled to full res: + tmp = upsample(tmp) + if label_param != 'cloud_probability': + tmp = normalize(tmp, label_param, mean_std_dct) + if DO_ADD_NOISE: + tmp = add_noise(tmp, noise_scale=NOISE_STDDEV) + else: + if DO_ADD_NOISE: + tmp = add_noise(tmp, noise_scale=NOISE_STDDEV) + tmp = np.where(tmp < 0.0, 0.0, tmp) + tmp = np.where(tmp > 1.0, 1.0, tmp) data_norm.append(tmp) - # -------- - tmp = input_data[:, label_idx, slc_y_2, slc_x_2] - tmp = tmp[:, slc_y, slc_x] - tmp = np.where(np.isnan(tmp), 0, tmp) # shouldn't need this - ##data_norm.append(tmp) # --------- data = np.stack(data_norm, axis=3) data = data.astype(np.float32) # ----------------------------------------------------- # ----------------------------------------------------- - label = input_data[:, label_idx, lbl_slc_y, lbl_slc_x] - label = self.get_label_data(label) + label = input_data[:, label_idx, :, :] + label = label.copy() + # if DO_SMOOTH: + # label = np.where(np.isnan(label), 0, label) + # label = smooth_2d(label, sigma=SIGMA) + # # label = median_filter_2d(label) + label = label[:, y_128, x_128] + label = get_label_data(label) + + if label_param != 'cloud_probability': + label = normalize(label, label_param, mean_std_dct) + else: + label = np.where(np.isnan(label), 0, label) label = np.expand_dims(label, axis=3) data = data.astype(np.float32) - label = label.astype(np.int32) + label = label.astype(np.float32) if is_training and DO_AUGMENT: data_ud = np.flip(data, axis=1) @@ -294,66 +424,20 @@ class CNN: return data, label - def get_label_data(self, grd_k): - num, leny, lenx = grd_k.shape - leny_d2x = int(leny / 2) - lenx_d2x = int(lenx / 2) - grd_down_2x = np.zeros((num, leny_d2x, lenx_d2x)) - for t in range(num): - for j in range(leny_d2x): - for i in range(lenx_d2x): - cell = grd_k[t, j:j + 2, i:i + 2] - if np.sum(np.isnan(cell)) == 0: - cnt = np.sum(cell) - if cnt == 0: - grd_down_2x[t, j, i] = 0 - elif cnt == 1: - grd_down_2x[t, j, i] = 1 - elif cnt == 2: - grd_down_2x[t, j, i] = 2 - elif cnt == 3: - grd_down_2x[t, j, i] = 3 - elif cnt == 4: - grd_down_2x[t, j, i] = 4 - pass - else: - grd_down_2x[t, j, i] = 0 - - return grd_down_2x - def get_in_mem_data_batch_train(self, idxs): return self.get_in_mem_data_batch(idxs, True) def get_in_mem_data_batch_test(self, idxs): return self.get_in_mem_data_batch(idxs, False) - def get_in_mem_data_batch_eval(self, idxs): - data = [] - for param in self.train_params: - nda = self.data_dct[param] - nda = normalize(nda, param, mean_std_dct) - data.append(nda) - data = np.stack(data) - data = data.astype(np.float32) - data = np.transpose(data, axes=(1, 2, 0)) - data = np.expand_dims(data, axis=0) - - return data - @tf.function(input_signature=[tf.TensorSpec(None, tf.int32)]) def data_function(self, indexes): - out = tf.numpy_function(self.get_in_mem_data_batch_train, [indexes], [tf.float32, tf.int32]) + out = tf.numpy_function(self.get_in_mem_data_batch_train, [indexes], [tf.float32, tf.float32]) return out @tf.function(input_signature=[tf.TensorSpec(None, tf.int32)]) def data_function_test(self, indexes): - out = tf.numpy_function(self.get_in_mem_data_batch_test, [indexes], [tf.float32, tf.int32]) - return out - - @tf.function(input_signature=[tf.TensorSpec(None, tf.int32)]) - def data_function_evaluate(self, indexes): - # TODO: modify for user specified altitude - out = tf.numpy_function(self.get_in_mem_data_batch_eval, [indexes], [tf.float32]) + out = tf.numpy_function(self.get_in_mem_data_batch_test, [indexes], [tf.float32, tf.float32]) return out def get_train_dataset(self, indexes): @@ -377,13 +461,6 @@ class CNN: dataset = dataset.cache() self.test_dataset = dataset - def get_evaluate_dataset(self, indexes): - indexes = list(indexes) - - dataset = tf.data.Dataset.from_tensor_slices(indexes) - dataset = dataset.map(self.data_function_evaluate, num_parallel_calls=8) - self.eval_dataset = dataset - def setup_pipeline(self, train_data_files, test_data_files, num_train_samples): self.train_data_files = train_data_files @@ -412,12 +489,6 @@ class CNN: self.get_test_dataset(tst_idxs) print('setup_test_pipeline: Done') - def setup_eval_pipeline(self, filename): - idxs = [0] - self.num_data_samples = idxs.shape[0] - - self.get_evaluate_dataset(idxs) - def build_srcnn(self, do_drop_out=False, do_batch_norm=False, drop_rate=0.5, factor=2): print('build_cnn') padding = "SAME" @@ -431,35 +502,49 @@ class CNN: input_2d = self.inputs[0] print('input: ', input_2d.shape) - conv = tf.keras.layers.Conv2D(num_filters, kernel_size=3, strides=1, padding='VALID', activation=activation)(input_2d) - # conv = input_2d - print('input: ', conv.shape) - conv = conv_b = tf.keras.layers.Conv2D(num_filters, kernel_size=3, strides=1, kernel_initializer='he_uniform', activation=activation, padding='SAME')(conv) + conv = conv_b = tf.keras.layers.Conv2D(num_filters, kernel_size=KERNEL_SIZE, kernel_initializer='he_uniform', activation=activation, padding='VALID')(input_2d) + print(conv.shape) - # conv = tf.keras.layers.Conv2D(num_filters, kernel_size=3, strides=1, kernel_initializer='he_uniform', activation=activation, padding='SAME')(conv) + # if NOISE_TRAINING: + # conv = conv_b = tf.keras.layers.GaussianNoise(stddev=NOISE_STDDEV)(conv) - # conv = tf.keras.layers.Conv2D(num_filters, kernel_size=3, strides=2, kernel_initializer='he_uniform', activation=activation, padding='SAME')(conv) - print(conv.shape) + scale = 0.2 - if NOISE_TRAINING: - conv = conv_b = tf.keras.layers.GaussianNoise(stddev=NOISE_STDDEV)(conv) + conv_b = build_residual_conv2d_block(conv_b, num_filters, 'Residual_Block_1', kernel_size=KERNEL_SIZE, scale=scale) - conv = build_residual_block_1x1(conv, num_filters, activation, 'Residual_Block_1') + conv_b = build_residual_conv2d_block(conv_b, num_filters, 'Residual_Block_2', kernel_size=KERNEL_SIZE, scale=scale) - conv = build_residual_block_1x1(conv, num_filters, activation, 'Residual_Block_2') + conv_b = build_residual_conv2d_block(conv_b, num_filters, 'Residual_Block_3', kernel_size=KERNEL_SIZE, scale=scale) - conv = build_residual_block_1x1(conv, num_filters, activation, 'Residual_Block_3') + conv_b = build_residual_conv2d_block(conv_b, num_filters, 'Residual_Block_4', kernel_size=KERNEL_SIZE, scale=scale) - conv = build_residual_block_1x1(conv, num_filters, activation, 'Residual_Block_4') + # conv_b = build_residual_conv2d_block(conv_b, num_filters, 'Residual_Block_5', kernel_size=KERNEL_SIZE, scale=scale) - conv = build_residual_block_1x1(conv, num_filters, activation, 'Residual_Block_5') + # conv_b = build_residual_conv2d_block(conv_b, num_filters, 'Residual_Block_6', kernel_size=KERNEL_SIZE, scale=scale) + + conv_b = build_residual_block_conv2d_down2x(conv_b, num_filters, activation) + + conv_b = tf.keras.layers.Conv2D(num_filters, kernel_size=3, strides=1, activation=activation, kernel_initializer='he_uniform', padding=padding)(conv_b) # conv = conv + conv_b + conv = conv_b print(conv.shape) - self.logits = tf.keras.layers.Conv2D(NumLogits, kernel_size=1, strides=1, padding=padding, name='regression')(conv) + if NumClasses == 2: + final_activation = tf.nn.sigmoid # For binary + else: + final_activation = tf.nn.softmax # For multi-class + if not DO_ESPCN: + # This is effectively a Dense layer + self.logits = tf.keras.layers.Conv2D(NumLogits, kernel_size=1, strides=1, padding=padding, activation=final_activation)(conv) + else: + conv = tf.keras.layers.Conv2D(num_filters * (factor ** 2), 3, padding=padding, activation=activation)(conv) + print(conv.shape) + conv = tf.nn.depth_to_space(conv, factor) + print(conv.shape) + self.logits = tf.keras.layers.Conv2D(IMG_DEPTH, kernel_size=3, strides=1, padding=padding, activation=final_activation)(conv) print(self.logits.shape) def build_training(self): @@ -468,10 +553,9 @@ class CNN: else: self.loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False) # For multi-class # self.loss = tf.keras.losses.MeanAbsoluteError() # Regression - # self.loss = tf.keras.losses.MeanSquaredError() # Regression # decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps) - initial_learning_rate = 0.002 + initial_learning_rate = 0.005 decay_rate = 0.95 steps_per_epoch = int(self.num_data_samples/BATCH_SIZE) # one epoch decay_steps = int(steps_per_epoch) @@ -666,10 +750,6 @@ class CNN: self.writer_valid.close() self.writer_train_valid_loss.close() - # f = open(home_dir+'/best_stats_'+now+'.pkl', 'wb') - # pickle.dump((best_test_loss, best_test_acc, best_test_recall, best_test_precision, best_test_auc, best_test_f1, best_test_mcc), f) - # f.close() - def build_model(self): self.build_srcnn() self.model = tf.keras.Model(self.inputs, self.logits) @@ -690,11 +770,20 @@ class CNN: print('loss, acc: ', self.test_loss.result().numpy(), self.test_accuracy.result().numpy()) + labels = np.concatenate(self.test_labels) + preds = np.concatenate(self.test_preds) + print(labels.shape, preds.shape) + + if label_param != 'cloud_probability': + labels_denorm = denormalize(labels, label_param, mean_std_dct) + preds_denorm = denormalize(preds, label_param, mean_std_dct) + + return labels_denorm, preds_denorm + def do_evaluate(self, data, ckpt_dir): ckpt = tf.train.Checkpoint(step=tf.Variable(1), model=self.model) ckpt_manager = tf.train.CheckpointManager(ckpt, ckpt_dir, max_to_keep=3) - ckpt.restore(ckpt_manager.latest_checkpoint) self.reset_test_metrics() @@ -702,14 +791,14 @@ class CNN: pred = self.model([data], training=False) self.test_probs = pred pred = pred.numpy() + if label_param != 'cloud_probability': + pred = denormalize(pred, label_param, mean_std_dct) return pred def run(self, directory, ckpt_dir=None, num_data_samples=50000): train_data_files = glob.glob(directory+'data_train_*.npy') valid_data_files = glob.glob(directory+'data_valid_*.npy') - train_data_files = train_data_files[::2] - valid_data_files = valid_data_files[::2] self.setup_pipeline(train_data_files, valid_data_files, num_data_samples) self.build_model() @@ -718,15 +807,16 @@ class CNN: self.do_training(ckpt_dir=ckpt_dir) def run_restore(self, directory, ckpt_dir): - valid_data_files = glob.glob(directory + 'data_*.npy') + valid_data_files = glob.glob(directory + 'data_valid*.npy') self.num_data_samples = 1000 self.setup_test_pipeline(valid_data_files) self.build_model() self.build_training() self.build_evaluation() - self.restore(ckpt_dir) + return self.restore(ckpt_dir) def run_evaluate(self, data, ckpt_dir): + data = tf.convert_to_tensor(data, dtype=tf.float32) self.num_data_samples = 80000 self.build_model() self.build_training() @@ -734,128 +824,163 @@ class CNN: return self.do_evaluate(data, ckpt_dir) -def run_restore_static(directory, ckpt_dir): - nn = CNN() - nn.run_restore(directory, ckpt_dir) +def run_restore_static(directory, ckpt_dir, out_file=None): + nn = SRCNN() + labels_denorm, preds_denorm = nn.run_restore(directory, ckpt_dir) + if out_file is not None: + np.save(out_file, [np.squeeze(labels_denorm), preds_denorm.argmax(axis=3)]) def run_evaluate_static(in_file, out_file, ckpt_dir): - h5f = h5py.File(in_file, 'r') - grd_a = get_grid_values_all(h5f, 'temp_11_0um_nom') - grd_a = grd_a[2432:4032, 2432:4032] - grd_a = grd_a[::2, ::2] - - grd_b = get_grid_values_all(h5f, 'refl_0_65um_nom') - grd_b = grd_b[2432:4032, 2432:4032] + N = 10 + + slc_x = slice(2, N*128 + 4) + slc_y = slice(2, N*128 + 4) + slc_x_2 = slice(1, N*128 + 6, 2) + slc_y_2 = slice(1, N*128 + 6, 2) + x_2 = np.arange(int((N*128)/2) + 3) + y_2 = np.arange(int((N*128)/2) + 3) + t = np.arange(0, int((N*128)/2) + 3, 0.5) + s = np.arange(0, int((N*128)/2) + 3, 0.5) + x_k = slice(1, N*128 + 3) + y_k = slice(1, N*128 + 3) + x_128 = slice(3, N*128 + 3) + y_128 = slice(3, N*128 + 3) + + sub_y, sub_x = (N * 128) + 10, (N * 128) + 10 + y_0, x_0, = 3232 - int(sub_y/2), 3200 - int(sub_x/2) - grd_c = get_grid_values_all(h5f, label_param) - grd_c = grd_c[2432:4032, 2432:4032] - grd_c = grd_c[::2, ::2] - - leny, lenx = grd_a.shape - x = np.arange(lenx) - y = np.arange(leny) - x_up = np.arange(0, lenx, 0.5) - y_up = np.arange(0, leny, 0.5) + h5f = h5py.File(in_file, 'r') + grd_a = get_grid_values_all(h5f, 'temp_11_0um_nom') + grd_a = grd_a[y_0:y_0+sub_y, x_0:x_0+sub_x] + grd_a = grd_a.copy() + grd_a = np.where(np.isnan(grd_a), 0, grd_a) + hr_grd_a = grd_a.copy() + hr_grd_a = hr_grd_a[y_128, x_128] + # Full res: + # grd_a = grd_a[slc_y, slc_x] + # Half res: + grd_a = grd_a[slc_y_2, slc_x_2] + grd_a = resample_2d_linear_one(x_2, y_2, grd_a, t, s) + grd_a = grd_a[y_k, x_k] grd_a = normalize(grd_a, 'temp_11_0um_nom', mean_std_dct) - grd_a = resample_2d_linear_one(x, y, grd_a, x_up, y_up) - + # ------------------------------------------------------ + grd_b = get_grid_values_all(h5f, 'refl_0_65um_nom') + grd_b = grd_b[y_0:y_0+sub_y, x_0:x_0+sub_x] + grd_b = grd_b.copy() + grd_b = np.where(np.isnan(grd_b), 0, grd_b) + hr_grd_b = grd_b.copy() + hr_grd_b = hr_grd_b[y_128, x_128] + grd_b = grd_b[slc_y, slc_x] grd_b = normalize(grd_b, 'refl_0_65um_nom', mean_std_dct) - if label_param == 'cloud_fraction': - grd_c = np.where(np.isnan(grd_c), 0, grd_c) - else: + grd_c = get_grid_values_all(h5f, label_param) + grd_c = grd_c[y_0:y_0+sub_y, x_0:x_0+sub_x] + hr_grd_c = grd_c.copy() + hr_grd_c = np.where(np.isnan(hr_grd_c), 0, grd_c) + hr_grd_c = hr_grd_c[y_128, x_128] + # hr_grd_c = smooth_2d_single(hr_grd_c, sigma=1.0) + grd_c = np.where(np.isnan(grd_c), 0, grd_c) + grd_c = grd_c.copy() + # grd_c = smooth_2d_single(grd_c, sigma=1.0) + grd_c = grd_c[slc_y_2, slc_x_2] + grd_c = resample_2d_linear_one(x_2, y_2, grd_c, t, s) + grd_c = grd_c[y_k, x_k] + if label_param != 'cloud_probability': grd_c = normalize(grd_c, label_param, mean_std_dct) - grd_c = resample_2d_linear_one(x, y, grd_c, x_up, y_up) data = np.stack([grd_a, grd_b, grd_c], axis=2) data = np.expand_dims(data, axis=0) - nn = CNN() + h5f.close() + + nn = SRCNN() out_sr = nn.run_evaluate(data, ckpt_dir) - if label_param != 'cloud_fraction': - out_sr = denormalize(out_sr, label_param, mean_std_dct) if out_file is not None: - np.save(out_file, out_sr) + np.save(out_file, (out_sr[0, :, :, 0], hr_grd_a, hr_grd_b, hr_grd_c)) else: - return out_sr + return out_sr, hr_grd_a, hr_grd_b, hr_grd_c -def run_evaluate_static_2(in_file, out_file, ckpt_dir): - nda = np.load(in_file) - grd_a = nda[:, 0, :, :] - grd_a = grd_a[:, 3:131:2, 3:131:2] +def analyze(file='/Users/tomrink/cld_opd_out.npy'): + # Save this: + # nn.test_data_files = glob.glob('/Users/tomrink/data/clavrx_opd_valid_DAY/data_valid*.npy') + # idxs = np.arange(50) + # dat, lbl = nn.get_in_mem_data_batch(idxs, False) + # tmp = dat[:, 1:128, 1:128, 1] + # tmp = dat[:, 1:129, 1:129, 1] - grd_b = nda[:, 2, 3:131, 3:131] + tup = np.load(file, allow_pickle=True) + lbls = tup[0] + pred = tup[1] - grd_c = nda[:, 3, :, :] - grd_c = grd_c[:, 3:131:2, 3:131:2] + lbls = lbls[:, :, :, 0] + pred = pred[:, :, :, 0] + print('Total num pixels: ', lbls.size) - num, leny, lenx = grd_a.shape - x = np.arange(lenx) - y = np.arange(leny) - x_up = np.arange(0, lenx, 0.5) - y_up = np.arange(0, leny, 0.5) + pred = pred.flatten() + pred = np.where(pred < 0.0, 0.0, pred) + lbls = lbls.flatten() + diff = pred - lbls - grd_a = normalize(grd_a, 'temp_11_0um_nom', mean_std_dct) - grd_a = resample_2d_linear(x, y, grd_a, x_up, y_up) + mae = (np.sum(np.abs(diff))) / diff.size + print('MAE: ', mae) - grd_b = normalize(grd_b, 'refl_0_65um_nom', mean_std_dct) + bin_edges = [] + bin_ranges = [] - if label_param == 'cloud_fraction': - grd_c = np.where(np.isnan(grd_c), 0, grd_c) - else: - grd_c = normalize(grd_c, label_param, mean_std_dct) - grd_c = resample_2d_linear(x, y, grd_c, x_up, y_up) + bin_ranges.append([0.0, 5.0]) + bin_edges.append(0.0) - data = np.stack([grd_a, grd_b, grd_c], axis=3) - print(data.shape) + bin_ranges.append([5.0, 10.0]) + bin_edges.append(5.0) - nn = CNN() - out_sr = nn.run_evaluate(data, ckpt_dir) - if label_param != 'cloud_fraction': - # out_sr = denormalize(out_sr, label_param, mean_std_dct) - pass - if out_file is not None: - np.save(out_file, out_sr) - else: - return out_sr - - -def analyze(fpath='/Users/tomrink/clavrx_snpp_viirs.A2019080.0100.001.2019080064252.uwssec_B00038315.level2.h5', param='cloud_fraction'): - h5f = h5py.File(fpath, 'r') - grd = get_grid_values_all(h5f, param) - grd = np.where(np.isnan(grd), 0, grd) - bt = get_grid_values_all(h5f, 'temp_11_0um_nom') - refl = get_grid_values_all(h5f, 'refl_0_65um_nom') - grd = grd[2432:4032, 2432:4032] - bt = bt[2432:4032, 2432:4032] - refl = refl[2432:4032, 2432:4032] - print(grd.shape) - - grd_lr = grd[::2, ::2] - print(grd_lr.shape) - leny, lenx = grd_lr.shape - rnd = np.random.normal(loc=0, scale=0.001, size=grd_lr.size) - grd_lr = grd_lr + rnd.reshape(grd_lr.shape) - if param == 'cloud_fraction': - grd_lr = np.where(grd_lr < 0, 0, grd_lr) - grd_lr = np.where(grd_lr > 1, 1, grd_lr) - - x = np.arange(lenx) - y = np.arange(leny) - x_up = np.arange(0, lenx, 0.5) - y_up = np.arange(0, leny, 0.5) - - grd_hr = resample_2d_linear_one(x, y, grd_lr, x_up, y_up) - print(grd_hr.shape) + bin_ranges.append([10.0, 15.0]) + bin_edges.append(10.0) - h5f.close() + bin_ranges.append([15.0, 20.0]) + bin_edges.append(15.0) + + bin_ranges.append([20.0, 30.0]) + bin_edges.append(20.0) + + bin_ranges.append([30.0, 40.0]) + bin_edges.append(30.0) + + bin_ranges.append([40.0, 60.0]) + bin_edges.append(40.0) + + bin_ranges.append([60.0, 80.0]) + bin_edges.append(60.0) + + bin_ranges.append([80.0, 100.0]) + bin_edges.append(80.0) + + bin_ranges.append([100.0, 120.0]) + bin_edges.append(100.0) + + bin_ranges.append([120.0, 140.0]) + bin_edges.append(120.0) + + bin_ranges.append([140.0, 160.0]) + bin_edges.append(140.0) + + bin_edges.append(160.0) + + diff_by_value_bins = util.util.bin_data_by(diff, lbls, bin_ranges) + + values = [] + for k in range(len(bin_ranges)): + diff_k = diff_by_value_bins[k] + mae_k = (np.sum(np.abs(diff_k)) / diff_k.size) + values.append(int(mae_k/bin_ranges[k][1] * 100.0)) + + print('MAE: ', diff_k.size, bin_ranges[k], mae_k) - return grd, grd_lr, grd_hr, bt, refl + return np.array(values), bin_edges if __name__ == "__main__": - nn = CNN() + nn = SRCNN() nn.run('matchup_filename') -- GitLab