Skip to content
Snippets Groups Projects
Commit c7476f5c authored by tomrink's avatar tomrink
Browse files

initial commit..

parent 41d5729b
No related branches found
No related tags found
No related merge requests found
import glob
import tensorflow as tf
import util.util
from util.setup import logdir, modeldir, cachepath, now, ancillary_path
from util.util import EarlyStop, normalize, denormalize, resample, resample_2d_linear, resample_one,\
resample_2d_linear_one, get_grid_values_all, add_noise, smooth_2d, smooth_2d_single, median_filter_2d,\
median_filter_2d_single, downscale_2x
import os, datetime
import numpy as np
import pickle
import h5py
from scipy.ndimage import gaussian_filter
# L1B M/I-bands: /apollo/cloud/scratch/cwhite/VIIRS_HRES/2019/2019_01_01/
# CLAVRx: /apollo/cloud/scratch/Satellite_Output/VIIRS_HRES/2019/2019_01_01/
# /apollo/cloud/scratch/Satellite_Output/andi/NEW/VIIRS_HRES/2019
LOG_DEVICE_PLACEMENT = False
PROC_BATCH_SIZE = 4
PROC_BATCH_BUFFER_SIZE = 50000
NumClasses = 3
if NumClasses == 2:
NumLogits = 1
else:
NumLogits = NumClasses
BATCH_SIZE = 128
NUM_EPOCHS = 80
TRACK_MOVING_AVERAGE = False
EARLY_STOP = True
NOISE_TRAINING = False
NOISE_STDDEV = 0.01
DO_AUGMENT = True
DO_SMOOTH = True
SIGMA = 1.0
DO_ZERO_OUT = False
DO_ESPCN = False # Note: If True, cannot do mixed resolution input fields (Adjust accordingly below)
# setup scaling parameters dictionary
mean_std_dct = {}
mean_std_file = ancillary_path+'mean_std_lo_hi_l2.pkl'
f = open(mean_std_file, 'rb')
mean_std_dct_l2 = pickle.load(f)
f.close()
mean_std_file = ancillary_path+'mean_std_lo_hi_l1b.pkl'
f = open(mean_std_file, 'rb')
mean_std_dct_l1b = pickle.load(f)
f.close()
mean_std_dct.update(mean_std_dct_l1b)
mean_std_dct.update(mean_std_dct_l2)
IMG_DEPTH = 1
# label_param = 'cloud_fraction'
# label_param = 'cld_opd_dcomp'
label_param = 'cloud_probability'
params = ['temp_11_0um_nom', 'temp_12_0um_nom', 'refl_0_65um_nom', label_param]
data_params_half = ['temp_11_0um_nom']
data_params_full = ['refl_0_65um_nom']
label_idx = params.index(label_param)
print('data_params_half: ', data_params_half)
print('data_params_full: ', data_params_full)
print('label_param: ', label_param)
KERNEL_SIZE = 3 # target size: (128, 128)
N = 1
if KERNEL_SIZE == 3:
slc_x = slice(2, N*128 + 4)
slc_y = slice(2, N*128 + 4)
slc_x_2 = slice(1, N*128 + 6, 2)
slc_y_2 = slice(1, N*128 + 6, 2)
x_2 = np.arange(int((N*128)/2) + 3)
y_2 = np.arange(int((N*128)/2) + 3)
t = np.arange(0, int((N*128)/2) + 3, 0.5)
s = np.arange(0, int((N*128)/2) + 3, 0.5)
x_k = slice(1, N*128 + 3)
y_k = slice(1, N*128 + 3)
x_128 = slice(3, N*128 + 3)
y_128 = slice(3, N*128 + 3)
elif KERNEL_SIZE == 5:
slc_x = slice(3, 135)
slc_y = slice(3, 135)
slc_x_2 = slice(2, 137, 2)
slc_y_2 = slice(2, 137, 2)
x_128 = slice(5, 133)
y_128 = slice(5, 133)
t = np.arange(1, 67, 0.5)
s = np.arange(1, 67, 0.5)
x_2 = np.arange(68)
y_2 = np.arange(68)
# ----------------------------------------
# Exp for ESPCN version
if DO_ESPCN:
slc_x_2 = slice(0, 132, 2)
slc_y_2 = slice(0, 132, 2)
x_128 = slice(2, 130)
y_128 = slice(2, 130)
def build_residual_conv2d_block(conv, num_filters, block_name, activation=tf.nn.relu, padding='SAME',
kernel_initializer='he_uniform', scale=None, kernel_size=3,
do_drop_out=True, drop_rate=0.5, do_batch_norm=True):
with tf.name_scope(block_name):
skip = tf.keras.layers.Conv2D(num_filters, kernel_size=kernel_size, padding=padding, kernel_initializer=kernel_initializer, activation=activation)(conv)
skip = tf.keras.layers.Conv2D(num_filters, kernel_size=kernel_size, padding=padding, activation=None)(skip)
if scale is not None:
skip = tf.keras.layers.Lambda(lambda x: x * scale)(skip)
if do_drop_out:
skip = tf.keras.layers.Dropout(drop_rate)(skip)
if do_batch_norm:
skip = tf.keras.layers.BatchNormalization()(skip)
conv = conv + skip
print(block_name+':', conv.shape)
return conv
def build_residual_block_conv2d_down2x(x_in, num_filters, activation, padding='SAME', drop_rate=0.5,
do_drop_out=True, do_batch_norm=True):
skip = x_in
conv = tf.keras.layers.Conv2D(num_filters, kernel_size=3, strides=1, padding=padding, activation=activation)(x_in)
conv = tf.keras.layers.MaxPool2D(padding=padding)(conv)
if do_drop_out:
conv = tf.keras.layers.Dropout(drop_rate)(conv)
if do_batch_norm:
conv = tf.keras.layers.BatchNormalization()(conv)
conv = tf.keras.layers.Conv2D(num_filters, kernel_size=3, strides=1, padding=padding, activation=activation)(conv)
if do_drop_out:
conv = tf.keras.layers.Dropout(drop_rate)(conv)
if do_batch_norm:
conv = tf.keras.layers.BatchNormalization()(conv)
skip = tf.keras.layers.Conv2D(num_filters, kernel_size=3, strides=1, padding=padding, activation=None)(skip)
skip = tf.keras.layers.MaxPool2D(padding=padding)(skip)
if do_drop_out:
skip = tf.keras.layers.Dropout(drop_rate)(skip)
if do_batch_norm:
skip = tf.keras.layers.BatchNormalization()(skip)
conv = conv + skip
conv = tf.keras.layers.LeakyReLU()(conv)
print(conv.shape)
return conv
def upsample(tmp):
tmp = tmp[:, slc_y_2, slc_x_2]
tmp = resample_2d_linear(x_2, y_2, tmp, t, s)
tmp = tmp[:, y_k, x_k]
return tmp
def upsample_nearest(tmp):
bsize = tmp.shape[0]
tmp_2 = tmp[:, slc_y_2, slc_x_2]
up = np.zeros(bsize, t.size, s.size)
for k in range(bsize):
for j in range(t.size/2):
for i in range(s.size/2):
up[k, j, i] = tmp_2[k, j, i]
up[k, j, i+1] = tmp_2[k, j, i]
up[k, j+1, i] = tmp_2[k, j, i]
up[k, j+1, i+1] = tmp_2[k, j, i]
return up
def get_label_data(grd_k):
num, leny, lenx = grd_k.shape
leny_d2x = int(leny / 2)
lenx_d2x = int(lenx / 2)
grd_down_2x = np.zeros((num, leny_d2x, lenx_d2x))
for t in range(num):
for j in range(leny_d2x):
for i in range(lenx_d2x):
cell = grd_k[t, j:j + 2, i:i + 2]
if np.sum(np.isnan(cell)) == 0:
cell = np.where(cell < 0.5, 0, 1)
cnt = np.sum(cell)
if cnt == 0:
grd_down_2x[t, j, i] = 0
elif cnt == 4:
grd_down_2x[t, j, i] = 2
else:
grd_down_2x[t, j, i] = 1
else:
grd_down_2x[t, j, i] = 0
return grd_down_2x
class SRCNN:
def __init__(self):
self.train_data = None
self.train_label = None
self.test_data = None
self.test_label = None
self.test_data_denorm = None
self.train_dataset = None
self.inner_train_dataset = None
self.test_dataset = None
self.eval_dataset = None
self.X_img = None
self.X_prof = None
self.X_u = None
self.X_v = None
self.X_sfc = None
self.inputs = []
self.y = None
self.handle = None
self.inner_handle = None
self.in_mem_batch = None
self.h5f_l1b_trn = None
self.h5f_l1b_tst = None
self.h5f_l2_trn = None
self.h5f_l2_tst = None
self.logits = None
self.predict_data = None
self.predict_dataset = None
self.mean_list = None
self.std_list = None
self.training_op = None
self.correct = None
self.accuracy = None
self.loss = None
self.pred_class = None
self.variable_averages = None
self.global_step = None
self.writer_train = None
self.writer_valid = None
self.writer_train_valid_loss = None
self.OUT_OF_RANGE = False
self.abi = None
self.temp = None
self.wv = None
self.lbfp = None
self.sfc = None
self.in_mem_data_cache = {}
self.in_mem_data_cache_test = {}
self.model = None
self.optimizer = None
self.ema = None
self.train_loss = None
self.train_accuracy = None
self.test_loss = None
self.test_accuracy = None
self.test_auc = None
self.test_recall = None
self.test_precision = None
self.test_confusion_matrix = None
self.test_true_pos = None
self.test_true_neg = None
self.test_false_pos = None
self.test_false_neg = None
self.test_labels = []
self.test_preds = []
self.test_probs = None
self.learningRateSchedule = None
self.num_data_samples = None
self.initial_learning_rate = None
self.data_dct = None
self.train_data_files = None
self.train_label_files = None
self.test_data_files = None
self.test_label_files = None
self.train_data_nda = None
self.train_label_nda = None
self.test_data_nda = None
self.test_label_nda = None
self.n_chans = len(data_params_half) + len(data_params_full) + 1
self.X_img = tf.keras.Input(shape=(None, None, self.n_chans))
self.inputs.append(self.X_img)
tf.debugging.set_log_device_placement(LOG_DEVICE_PLACEMENT)
def get_in_mem_data_batch(self, idxs, is_training):
if is_training:
files = self.train_data_files
else:
files = self.test_data_files
data_s = []
for k in idxs:
f = files[k]
try:
nda = np.load(f)
except Exception:
print(f)
continue
data_s.append(nda)
input_data = np.concatenate(data_s)
DO_ADD_NOISE = False
if is_training and NOISE_TRAINING:
DO_ADD_NOISE = True
data_norm = []
for param in data_params_half:
idx = params.index(param)
tmp = input_data[:, idx, :, :]
tmp = tmp.copy()
tmp = np.where(np.isnan(tmp), 0, tmp)
if DO_ESPCN:
tmp = tmp[:, slc_y_2, slc_x_2]
else: # Half res upsampled to full res:
tmp = upsample(tmp)
tmp = normalize(tmp, param, mean_std_dct)
if DO_ADD_NOISE:
tmp = add_noise(tmp, noise_scale=NOISE_STDDEV)
data_norm.append(tmp)
for param in data_params_full:
idx = params.index(param)
tmp = input_data[:, idx, :, :]
tmp = tmp.copy()
tmp = np.where(np.isnan(tmp), 0, tmp)
# Full res:
tmp = tmp[:, slc_y, slc_x]
tmp = normalize(tmp, param, mean_std_dct)
if DO_ADD_NOISE:
tmp = add_noise(tmp, noise_scale=NOISE_STDDEV)
data_norm.append(tmp)
# ---------------------------------------------------
tmp = input_data[:, label_idx, :, :]
tmp = tmp.copy()
tmp = np.where(np.isnan(tmp), 0, tmp)
if DO_SMOOTH:
tmp = smooth_2d(tmp, sigma=SIGMA)
# tmp = median_filter_2d(tmp)
if DO_ESPCN:
tmp = tmp[:, slc_y_2, slc_x_2]
else: # Half res upsampled to full res:
tmp = upsample(tmp)
if label_param != 'cloud_probability':
tmp = normalize(tmp, label_param, mean_std_dct)
if DO_ADD_NOISE:
tmp = add_noise(tmp, noise_scale=NOISE_STDDEV)
else:
if DO_ADD_NOISE:
tmp = add_noise(tmp, noise_scale=NOISE_STDDEV)
tmp = np.where(tmp < 0.0, 0.0, tmp)
tmp = np.where(tmp > 1.0, 1.0, tmp)
data_norm.append(tmp)
# ---------
data = np.stack(data_norm, axis=3)
data = data.astype(np.float32)
# -----------------------------------------------------
# -----------------------------------------------------
label = input_data[:, label_idx, :, :]
label = label.copy()
# if DO_SMOOTH:
# label = np.where(np.isnan(label), 0, label)
# label = smooth_2d(label, sigma=SIGMA)
# # label = median_filter_2d(label)
label = label[:, y_128, x_128]
label = get_label_data(label)
if label_param != 'cloud_probability':
label = normalize(label, label_param, mean_std_dct)
else:
label = np.where(np.isnan(label), 0, label)
label = np.expand_dims(label, axis=3)
data = data.astype(np.float32)
label = label.astype(np.float32)
if is_training and DO_AUGMENT:
data_ud = np.flip(data, axis=1)
label_ud = np.flip(label, axis=1)
data_lr = np.flip(data, axis=2)
label_lr = np.flip(label, axis=2)
data = np.concatenate([data, data_ud, data_lr])
label = np.concatenate([label, label_ud, label_lr])
return data, label
def get_in_mem_data_batch_train(self, idxs):
return self.get_in_mem_data_batch(idxs, True)
def get_in_mem_data_batch_test(self, idxs):
return self.get_in_mem_data_batch(idxs, False)
@tf.function(input_signature=[tf.TensorSpec(None, tf.int32)])
def data_function(self, indexes):
out = tf.numpy_function(self.get_in_mem_data_batch_train, [indexes], [tf.float32, tf.float32])
return out
@tf.function(input_signature=[tf.TensorSpec(None, tf.int32)])
def data_function_test(self, indexes):
out = tf.numpy_function(self.get_in_mem_data_batch_test, [indexes], [tf.float32, tf.float32])
return out
def get_train_dataset(self, indexes):
indexes = list(indexes)
dataset = tf.data.Dataset.from_tensor_slices(indexes)
dataset = dataset.batch(PROC_BATCH_SIZE)
dataset = dataset.map(self.data_function, num_parallel_calls=8)
dataset = dataset.cache()
if DO_AUGMENT:
dataset = dataset.shuffle(PROC_BATCH_BUFFER_SIZE)
dataset = dataset.prefetch(buffer_size=1)
self.train_dataset = dataset
def get_test_dataset(self, indexes):
indexes = list(indexes)
dataset = tf.data.Dataset.from_tensor_slices(indexes)
dataset = dataset.batch(PROC_BATCH_SIZE)
dataset = dataset.map(self.data_function_test, num_parallel_calls=8)
dataset = dataset.cache()
self.test_dataset = dataset
def setup_pipeline(self, train_data_files, test_data_files, num_train_samples):
self.train_data_files = train_data_files
self.test_data_files = test_data_files
trn_idxs = np.arange(len(train_data_files))
np.random.shuffle(trn_idxs)
tst_idxs = np.arange(len(test_data_files))
self.get_train_dataset(trn_idxs)
self.get_test_dataset(tst_idxs)
self.num_data_samples = num_train_samples # approximately
print('datetime: ', now)
print('training and test data: ')
print('---------------------------')
print('num train samples: ', self.num_data_samples)
print('BATCH SIZE: ', BATCH_SIZE)
print('num test samples: ', tst_idxs.shape[0])
print('setup_pipeline: Done')
def setup_test_pipeline(self, test_data_files):
self.test_data_files = test_data_files
tst_idxs = np.arange(len(test_data_files))
self.get_test_dataset(tst_idxs)
print('setup_test_pipeline: Done')
def build_srcnn(self, do_drop_out=False, do_batch_norm=False, drop_rate=0.5, factor=2):
print('build_cnn')
padding = "SAME"
# activation = tf.nn.relu
# activation = tf.nn.elu
activation = tf.nn.relu
momentum = 0.99
num_filters = 64
input_2d = self.inputs[0]
print('input: ', input_2d.shape)
conv = conv_b = tf.keras.layers.Conv2D(num_filters, kernel_size=KERNEL_SIZE, kernel_initializer='he_uniform', activation=activation, padding='VALID')(input_2d)
print(conv.shape)
# if NOISE_TRAINING:
# conv = conv_b = tf.keras.layers.GaussianNoise(stddev=NOISE_STDDEV)(conv)
scale = 0.2
conv_b = build_residual_conv2d_block(conv_b, num_filters, 'Residual_Block_1', kernel_size=KERNEL_SIZE, scale=scale)
conv_b = build_residual_conv2d_block(conv_b, num_filters, 'Residual_Block_2', kernel_size=KERNEL_SIZE, scale=scale)
conv_b = build_residual_conv2d_block(conv_b, num_filters, 'Residual_Block_3', kernel_size=KERNEL_SIZE, scale=scale)
# conv_b = build_residual_conv2d_block(conv_b, num_filters, 'Residual_Block_4', kernel_size=KERNEL_SIZE, scale=scale)
# conv_b = build_residual_conv2d_block(conv_b, num_filters, 'Residual_Block_5', kernel_size=KERNEL_SIZE, scale=scale)
# conv_b = build_residual_conv2d_block(conv_b, num_filters, 'Residual_Block_6', kernel_size=KERNEL_SIZE, scale=scale)
conv_b = build_residual_block_conv2d_down2x(conv_b, num_filters, activation)
conv_b = tf.keras.layers.Conv2D(num_filters, kernel_size=3, strides=1, activation=activation, kernel_initializer='he_uniform', padding=padding)(conv_b)
# conv = conv + conv_b
conv = conv_b
print(conv.shape)
if not DO_ESPCN:
# This is effectively a Dense layer
self.logits = tf.keras.layers.Conv2D(NumLogits, kernel_size=1, strides=1, padding=padding, name='regression')(conv)
else:
conv = tf.keras.layers.Conv2D(num_filters * (factor ** 2), 3, padding=padding, activation=activation)(conv)
print(conv.shape)
conv = tf.nn.depth_to_space(conv, factor)
print(conv.shape)
self.logits = tf.keras.layers.Conv2D(IMG_DEPTH, kernel_size=3, strides=1, padding=padding, name='regression')(conv)
print(self.logits.shape)
def build_training(self):
if NumClasses == 2:
self.loss = tf.keras.losses.BinaryCrossentropy(from_logits=False) # for two-class only
else:
self.loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False) # For multi-class
# self.loss = tf.keras.losses.MeanAbsoluteError() # Regression
# decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
initial_learning_rate = 0.005
decay_rate = 0.95
steps_per_epoch = int(self.num_data_samples/BATCH_SIZE) # one epoch
decay_steps = int(steps_per_epoch)
print('initial rate, decay rate, steps/epoch, decay steps: ', initial_learning_rate, decay_rate, steps_per_epoch, decay_steps)
self.learningRateSchedule = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate, decay_steps, decay_rate)
optimizer = tf.keras.optimizers.Adam(learning_rate=self.learningRateSchedule)
if TRACK_MOVING_AVERAGE:
# Not really sure this works properly (from tfa)
# optimizer = tfa.optimizers.MovingAverage(optimizer)
self.ema = tf.train.ExponentialMovingAverage(decay=0.9999)
self.optimizer = optimizer
self.initial_learning_rate = initial_learning_rate
def build_evaluation(self):
self.train_accuracy = tf.keras.metrics.MeanAbsoluteError(name='train_accuracy')
self.test_accuracy = tf.keras.metrics.MeanAbsoluteError(name='test_accuracy')
self.train_loss = tf.keras.metrics.Mean(name='train_loss')
self.test_loss = tf.keras.metrics.Mean(name='test_loss')
@tf.function
def train_step(self, mini_batch):
inputs = [mini_batch[0]]
labels = mini_batch[1]
with tf.GradientTape() as tape:
pred = self.model(inputs, training=True)
loss = self.loss(labels, pred)
total_loss = loss
if len(self.model.losses) > 0:
reg_loss = tf.math.add_n(self.model.losses)
total_loss = loss + reg_loss
gradients = tape.gradient(total_loss, self.model.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
if TRACK_MOVING_AVERAGE:
self.ema.apply(self.model.trainable_variables)
self.train_loss(loss)
self.train_accuracy(labels, pred)
return loss
@tf.function
def test_step(self, mini_batch):
inputs = [mini_batch[0]]
labels = mini_batch[1]
pred = self.model(inputs, training=False)
t_loss = self.loss(labels, pred)
self.test_loss(t_loss)
self.test_accuracy(labels, pred)
def predict(self, mini_batch):
inputs = [mini_batch[0]]
labels = mini_batch[1]
pred = self.model(inputs, training=False)
t_loss = self.loss(labels, pred)
self.test_labels.append(labels)
self.test_preds.append(pred.numpy())
self.test_loss(t_loss)
self.test_accuracy(labels, pred)
def reset_test_metrics(self):
self.test_loss.reset_states()
self.test_accuracy.reset_states()
def get_metrics(self):
recall = self.test_recall.result()
precsn = self.test_precision.result()
f1 = 2 * (precsn * recall) / (precsn + recall)
tn = self.test_true_neg.result()
tp = self.test_true_pos.result()
fn = self.test_false_neg.result()
fp = self.test_false_pos.result()
mcc = ((tp * tn) - (fp * fn)) / np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
return f1, mcc
def do_training(self, ckpt_dir=None):
if ckpt_dir is None:
if not os.path.exists(modeldir):
os.mkdir(modeldir)
ckpt = tf.train.Checkpoint(step=tf.Variable(1), model=self.model)
ckpt_manager = tf.train.CheckpointManager(ckpt, modeldir, max_to_keep=3)
else:
ckpt = tf.train.Checkpoint(step=tf.Variable(1), model=self.model)
ckpt_manager = tf.train.CheckpointManager(ckpt, ckpt_dir, max_to_keep=3)
ckpt.restore(ckpt_manager.latest_checkpoint)
self.writer_train = tf.summary.create_file_writer(os.path.join(logdir, 'plot_train'))
self.writer_valid = tf.summary.create_file_writer(os.path.join(logdir, 'plot_valid'))
self.writer_train_valid_loss = tf.summary.create_file_writer(os.path.join(logdir, 'plot_train_valid_loss'))
step = 0
total_time = 0
best_test_loss = np.finfo(dtype=np.float).max
if EARLY_STOP:
es = EarlyStop()
for epoch in range(NUM_EPOCHS):
self.train_loss.reset_states()
self.train_accuracy.reset_states()
t0 = datetime.datetime.now().timestamp()
proc_batch_cnt = 0
n_samples = 0
for data, label in self.train_dataset:
trn_ds = tf.data.Dataset.from_tensor_slices((data, label))
trn_ds = trn_ds.batch(BATCH_SIZE)
for mini_batch in trn_ds:
if self.learningRateSchedule is not None:
loss = self.train_step(mini_batch)
if (step % 100) == 0:
with self.writer_train.as_default():
tf.summary.scalar('loss_trn', loss.numpy(), step=step)
tf.summary.scalar('learning_rate', self.optimizer._decayed_lr('float32').numpy(), step=step)
tf.summary.scalar('num_train_steps', step, step=step)
tf.summary.scalar('num_epochs', epoch, step=step)
self.reset_test_metrics()
for data_tst, label_tst in self.test_dataset:
tst_ds = tf.data.Dataset.from_tensor_slices((data_tst, label_tst))
tst_ds = tst_ds.batch(BATCH_SIZE)
for mini_batch_test in tst_ds:
self.test_step(mini_batch_test)
with self.writer_valid.as_default():
tf.summary.scalar('loss_val', self.test_loss.result(), step=step)
tf.summary.scalar('acc_val', self.test_accuracy.result(), step=step)
with self.writer_train_valid_loss.as_default():
tf.summary.scalar('loss_trn', loss.numpy(), step=step)
tf.summary.scalar('loss_val', self.test_loss.result(), step=step)
print('****** test loss, acc, lr: ', self.test_loss.result().numpy(), self.test_accuracy.result().numpy(),
self.optimizer._decayed_lr('float32').numpy())
step += 1
print('train loss: ', loss.numpy())
proc_batch_cnt += 1
n_samples += data.shape[0]
print('proc_batch_cnt: ', proc_batch_cnt, n_samples)
t1 = datetime.datetime.now().timestamp()
print('End of Epoch: ', epoch+1, 'elapsed time: ', (t1-t0))
total_time += (t1-t0)
self.reset_test_metrics()
for data, label in self.test_dataset:
ds = tf.data.Dataset.from_tensor_slices((data, label))
ds = ds.batch(BATCH_SIZE)
for mini_batch in ds:
self.test_step(mini_batch)
print('loss, acc: ', self.test_loss.result().numpy(), self.test_accuracy.result().numpy())
print('------------------------------------------------------')
tst_loss = self.test_loss.result().numpy()
if tst_loss < best_test_loss:
best_test_loss = tst_loss
ckpt_manager.save()
if EARLY_STOP and es.check_stop(tst_loss):
break
print('total time: ', total_time)
self.writer_train.close()
self.writer_valid.close()
self.writer_train_valid_loss.close()
# f = open(home_dir+'/best_stats_'+now+'.pkl', 'wb')
# pickle.dump((best_test_loss, best_test_acc, best_test_recall, best_test_precision, best_test_auc, best_test_f1, best_test_mcc), f)
# f.close()
def build_model(self):
self.build_srcnn()
self.model = tf.keras.Model(self.inputs, self.logits)
def restore(self, ckpt_dir):
ckpt = tf.train.Checkpoint(step=tf.Variable(1), model=self.model)
ckpt_manager = tf.train.CheckpointManager(ckpt, ckpt_dir, max_to_keep=3)
ckpt.restore(ckpt_manager.latest_checkpoint)
self.reset_test_metrics()
for data, label in self.test_dataset:
ds = tf.data.Dataset.from_tensor_slices((data, label))
ds = ds.batch(BATCH_SIZE)
for mini_batch_test in ds:
self.predict(mini_batch_test)
print('loss, acc: ', self.test_loss.result().numpy(), self.test_accuracy.result().numpy())
labels = np.concatenate(self.test_labels)
preds = np.concatenate(self.test_preds)
print(labels.shape, preds.shape)
labels_denorm = denormalize(labels, label_param, mean_std_dct)
preds_denorm = denormalize(preds, label_param, mean_std_dct)
return labels_denorm, preds_denorm
def do_evaluate(self, data, ckpt_dir):
ckpt = tf.train.Checkpoint(step=tf.Variable(1), model=self.model)
ckpt_manager = tf.train.CheckpointManager(ckpt, ckpt_dir, max_to_keep=3)
ckpt.restore(ckpt_manager.latest_checkpoint)
self.reset_test_metrics()
pred = self.model([data], training=False)
self.test_probs = pred
pred = pred.numpy()
if label_param != 'cloud_probability':
pred = denormalize(pred, label_param, mean_std_dct)
return pred
def run(self, directory, ckpt_dir=None, num_data_samples=50000):
train_data_files = glob.glob(directory+'data_train_*.npy')
valid_data_files = glob.glob(directory+'data_valid_*.npy')
self.setup_pipeline(train_data_files, valid_data_files, num_data_samples)
self.build_model()
self.build_training()
self.build_evaluation()
self.do_training(ckpt_dir=ckpt_dir)
def run_restore(self, directory, ckpt_dir):
valid_data_files = glob.glob(directory + 'data_valid*.npy')
self.num_data_samples = 1000
self.setup_test_pipeline(valid_data_files)
self.build_model()
self.build_training()
self.build_evaluation()
return self.restore(ckpt_dir)
def run_evaluate(self, data, ckpt_dir):
data = tf.convert_to_tensor(data, dtype=tf.float32)
self.num_data_samples = 80000
self.build_model()
self.build_training()
self.build_evaluation()
return self.do_evaluate(data, ckpt_dir)
def run_restore_static(directory, ckpt_dir, out_file=None):
nn = SRCNN()
labels_denorm, preds_denorm = nn.run_restore(directory, ckpt_dir)
if out_file is not None:
np.save(out_file, [labels_denorm, preds_denorm])
def run_evaluate_static(in_file, out_file, ckpt_dir):
N = 10
slc_x = slice(2, N*128 + 4)
slc_y = slice(2, N*128 + 4)
slc_x_2 = slice(1, N*128 + 6, 2)
slc_y_2 = slice(1, N*128 + 6, 2)
x_2 = np.arange(int((N*128)/2) + 3)
y_2 = np.arange(int((N*128)/2) + 3)
t = np.arange(0, int((N*128)/2) + 3, 0.5)
s = np.arange(0, int((N*128)/2) + 3, 0.5)
x_k = slice(1, N*128 + 3)
y_k = slice(1, N*128 + 3)
x_128 = slice(3, N*128 + 3)
y_128 = slice(3, N*128 + 3)
sub_y, sub_x = (N * 128) + 10, (N * 128) + 10
y_0, x_0, = 3232 - int(sub_y/2), 3200 - int(sub_x/2)
h5f = h5py.File(in_file, 'r')
grd_a = get_grid_values_all(h5f, 'temp_11_0um_nom')
grd_a = grd_a[y_0:y_0+sub_y, x_0:x_0+sub_x]
grd_a = grd_a.copy()
grd_a = np.where(np.isnan(grd_a), 0, grd_a)
hr_grd_a = grd_a.copy()
hr_grd_a = hr_grd_a[y_128, x_128]
# Full res:
# grd_a = grd_a[slc_y, slc_x]
# Half res:
grd_a = grd_a[slc_y_2, slc_x_2]
grd_a = resample_2d_linear_one(x_2, y_2, grd_a, t, s)
grd_a = grd_a[y_k, x_k]
grd_a = normalize(grd_a, 'temp_11_0um_nom', mean_std_dct)
# ------------------------------------------------------
grd_b = get_grid_values_all(h5f, 'refl_0_65um_nom')
grd_b = grd_b[y_0:y_0+sub_y, x_0:x_0+sub_x]
grd_b = grd_b.copy()
grd_b = np.where(np.isnan(grd_b), 0, grd_b)
hr_grd_b = grd_b.copy()
hr_grd_b = hr_grd_b[y_128, x_128]
grd_b = grd_b[slc_y, slc_x]
grd_b = normalize(grd_b, 'refl_0_65um_nom', mean_std_dct)
grd_c = get_grid_values_all(h5f, label_param)
grd_c = grd_c[y_0:y_0+sub_y, x_0:x_0+sub_x]
hr_grd_c = grd_c.copy()
hr_grd_c = np.where(np.isnan(hr_grd_c), 0, grd_c)
hr_grd_c = hr_grd_c[y_128, x_128]
# hr_grd_c = smooth_2d_single(hr_grd_c, sigma=1.0)
grd_c = np.where(np.isnan(grd_c), 0, grd_c)
grd_c = grd_c.copy()
# grd_c = smooth_2d_single(grd_c, sigma=1.0)
grd_c = grd_c[slc_y_2, slc_x_2]
grd_c = resample_2d_linear_one(x_2, y_2, grd_c, t, s)
grd_c = grd_c[y_k, x_k]
if label_param != 'cloud_probability':
grd_c = normalize(grd_c, label_param, mean_std_dct)
data = np.stack([grd_a, grd_b, grd_c], axis=2)
data = np.expand_dims(data, axis=0)
h5f.close()
nn = SRCNN()
out_sr = nn.run_evaluate(data, ckpt_dir)
if out_file is not None:
np.save(out_file, (out_sr[0, :, :, 0], hr_grd_a, hr_grd_b, hr_grd_c))
else:
return out_sr, hr_grd_a, hr_grd_b, hr_grd_c
def analyze(file='/Users/tomrink/cld_opd_out.npy'):
# Save this:
# nn.test_data_files = glob.glob('/Users/tomrink/data/clavrx_opd_valid_DAY/data_valid*.npy')
# idxs = np.arange(50)
# dat, lbl = nn.get_in_mem_data_batch(idxs, False)
# tmp = dat[:, 1:128, 1:128, 1]
# tmp = dat[:, 1:129, 1:129, 1]
tup = np.load(file, allow_pickle=True)
lbls = tup[0]
pred = tup[1]
lbls = lbls[:, :, :, 0]
pred = pred[:, :, :, 0]
print('Total num pixels: ', lbls.size)
pred = pred.flatten()
pred = np.where(pred < 0.0, 0.0, pred)
lbls = lbls.flatten()
diff = pred - lbls
mae = (np.sum(np.abs(diff))) / diff.size
print('MAE: ', mae)
bin_edges = []
bin_ranges = []
bin_ranges.append([0.0, 5.0])
bin_edges.append(0.0)
bin_ranges.append([5.0, 10.0])
bin_edges.append(5.0)
bin_ranges.append([10.0, 15.0])
bin_edges.append(10.0)
bin_ranges.append([15.0, 20.0])
bin_edges.append(15.0)
bin_ranges.append([20.0, 30.0])
bin_edges.append(20.0)
bin_ranges.append([30.0, 40.0])
bin_edges.append(30.0)
bin_ranges.append([40.0, 60.0])
bin_edges.append(40.0)
bin_ranges.append([60.0, 80.0])
bin_edges.append(60.0)
bin_ranges.append([80.0, 100.0])
bin_edges.append(80.0)
bin_ranges.append([100.0, 120.0])
bin_edges.append(100.0)
bin_ranges.append([120.0, 140.0])
bin_edges.append(120.0)
bin_ranges.append([140.0, 160.0])
bin_edges.append(140.0)
bin_edges.append(160.0)
diff_by_value_bins = util.util.bin_data_by(diff, lbls, bin_ranges)
values = []
for k in range(len(bin_ranges)):
diff_k = diff_by_value_bins[k]
mae_k = (np.sum(np.abs(diff_k)) / diff_k.size)
values.append(int(mae_k/bin_ranges[k][1] * 100.0))
print('MAE: ', diff_k.size, bin_ranges[k], mae_k)
return np.array(values), bin_edges
if __name__ == "__main__":
nn = SRCNN()
nn.run('matchup_filename')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment