Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
python
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Tom Rink
python
Commits
67815ec7
Commit
67815ec7
authored
2 years ago
by
tomrink
Browse files
Options
Downloads
Patches
Plain Diff
initial commit...
parent
39caf54c
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
modules/deeplearning/cnn_l1b_l2_16.py
+913
-0
913 additions, 0 deletions
modules/deeplearning/cnn_l1b_l2_16.py
with
913 additions
and
0 deletions
modules/deeplearning/cnn_l1b_l2_16.py
0 → 100644
+
913
−
0
View file @
67815ec7
import
glob
import
tensorflow
as
tf
from
util.setup
import
logdir
,
modeldir
,
cachepath
,
now
,
ancillary_path
,
home_dir
from
util.util
import
EarlyStop
,
normalize
,
scale
,
make_for_full_domain_predict
import
os
,
datetime
import
numpy
as
np
import
pickle
import
h5py
# L1B M/I-bands: /apollo/cloud/scratch/cwhite/VIIRS_HRES/2019/2019_01_01/
# CLAVRx: /apollo/cloud/scratch/Satellite_Output/VIIRS_HRES/2019/2019_01_01/
# /apollo/cloud/scratch/Satellite_Output/andi/NEW/VIIRS_HRES/2019
LOG_DEVICE_PLACEMENT
=
False
PROC_BATCH_SIZE
=
10
PROC_BATCH_BUFFER_SIZE
=
50000
NumClasses
=
2
if
NumClasses
==
2
:
NumLogits
=
1
else
:
NumLogits
=
NumClasses
BATCH_SIZE
=
128
NUM_EPOCHS
=
60
TRACK_MOVING_AVERAGE
=
False
EARLY_STOP
=
False
TRIPLET
=
False
CONV3D
=
False
NOISE_TRAINING
=
True
NOISE_STDDEV
=
0.10
DO_AUGMENT
=
True
img_width
=
16
# setup scaling parameters dictionary
mean_std_dct
=
{}
mean_std_file
=
ancillary_path
+
'
mean_std_lo_hi_l2.pkl
'
f
=
open
(
mean_std_file
,
'
rb
'
)
mean_std_dct_l2
=
pickle
.
load
(
f
)
f
.
close
()
mean_std_file
=
ancillary_path
+
'
mean_std_lo_hi_l1b.pkl
'
f
=
open
(
mean_std_file
,
'
rb
'
)
mean_std_dct_l1b
=
pickle
.
load
(
f
)
f
.
close
()
mean_std_dct
.
update
(
mean_std_dct_l1b
)
mean_std_dct
.
update
(
mean_std_dct_l2
)
emis_params
=
[
'
temp_10_4um_nom
'
,
'
temp_11_0um_nom
'
,
'
temp_12_0um_nom
'
,
'
temp_13_3um_nom
'
,
'
temp_3_75um_nom
'
,
'
temp_6_7um_nom
'
,
'
temp_6_2um_nom
'
,
'
temp_7_3um_nom
'
,
'
temp_8_5um_nom
'
,
'
temp_9_7um_nom
'
]
l2_params
=
[
'
cloud_fraction
'
,
'
cld_temp_acha
'
,
'
cld_press_acha
'
,
'
cld_opd_acha
'
,
'
cld_reff_acha
'
]
# -- Zero out params (Experimentation Only) ------------
zero_out_params
=
[
'
cld_reff_dcomp
'
,
'
cld_opd_dcomp
'
,
'
iwc_dcomp
'
,
'
lwc_dcomp
'
]
DO_ZERO_OUT
=
False
label_idx
=
1
label_param
=
l2_params
[
label_idx
]
def
build_conv2d_block
(
conv
,
num_filters
,
activation
,
block_name
,
padding
=
'
SAME
'
):
with
tf
.
name_scope
(
block_name
):
skip
=
conv
conv
=
tf
.
keras
.
layers
.
Conv2D
(
num_filters
,
kernel_size
=
5
,
strides
=
1
,
padding
=
padding
,
activation
=
activation
)(
conv
)
conv
=
tf
.
keras
.
layers
.
MaxPool2D
(
padding
=
padding
)(
conv
)
conv
=
tf
.
keras
.
layers
.
BatchNormalization
()(
conv
)
print
(
conv
.
shape
)
skip
=
tf
.
keras
.
layers
.
Conv2D
(
num_filters
,
kernel_size
=
3
,
strides
=
1
,
padding
=
padding
,
activation
=
None
)(
skip
)
skip
=
tf
.
keras
.
layers
.
MaxPool2D
(
padding
=
padding
)(
skip
)
skip
=
tf
.
keras
.
layers
.
BatchNormalization
()(
skip
)
conv
=
conv
+
skip
conv
=
tf
.
keras
.
layers
.
LeakyReLU
()(
conv
)
print
(
conv
.
shape
)
return
conv
def
build_residual_block_1x1
(
input_layer
,
num_filters
,
activation
,
block_name
,
padding
=
'
SAME
'
,
drop_rate
=
0.5
,
do_drop_out
=
True
,
do_batch_norm
=
True
):
with
tf
.
name_scope
(
block_name
):
skip
=
input_layer
if
do_drop_out
:
input_layer
=
tf
.
keras
.
layers
.
Dropout
(
drop_rate
)(
input_layer
)
if
do_batch_norm
:
input_layer
=
tf
.
keras
.
layers
.
BatchNormalization
()(
input_layer
)
conv
=
tf
.
keras
.
layers
.
Conv2D
(
num_filters
,
kernel_size
=
1
,
strides
=
1
,
padding
=
padding
,
activation
=
activation
)(
input_layer
)
print
(
conv
.
shape
)
# if do_drop_out:
# conv = tf.keras.layers.Dropout(drop_rate)(conv)
# if do_batch_norm:
# conv = tf.keras.layers.BatchNormalization()(conv)
# conv = tf.keras.layers.Conv2D(num_filters, kernel_size=1, strides=1, padding=padding, activation=activation)(conv)
# print(conv.shape)
if
do_drop_out
:
conv
=
tf
.
keras
.
layers
.
Dropout
(
drop_rate
)(
conv
)
if
do_batch_norm
:
conv
=
tf
.
keras
.
layers
.
BatchNormalization
()(
conv
)
conv
=
tf
.
keras
.
layers
.
Conv2D
(
num_filters
,
kernel_size
=
1
,
strides
=
1
,
padding
=
padding
,
activation
=
None
)(
conv
)
conv
=
conv
+
skip
conv
=
tf
.
keras
.
layers
.
LeakyReLU
()(
conv
)
print
(
conv
.
shape
)
return
conv
class
UNET
:
def
__init__
(
self
):
self
.
train_data
=
None
self
.
train_label
=
None
self
.
test_data
=
None
self
.
test_label
=
None
self
.
test_data_denorm
=
None
self
.
train_dataset
=
None
self
.
inner_train_dataset
=
None
self
.
test_dataset
=
None
self
.
eval_dataset
=
None
self
.
X_img
=
None
self
.
X_prof
=
None
self
.
X_u
=
None
self
.
X_v
=
None
self
.
X_sfc
=
None
self
.
inputs
=
[]
self
.
y
=
None
self
.
handle
=
None
self
.
inner_handle
=
None
self
.
in_mem_batch
=
None
self
.
h5f_l1b_trn
=
None
self
.
h5f_l1b_tst
=
None
self
.
h5f_l2_trn
=
None
self
.
h5f_l2_tst
=
None
self
.
logits
=
None
self
.
predict_data
=
None
self
.
predict_dataset
=
None
self
.
mean_list
=
None
self
.
std_list
=
None
self
.
training_op
=
None
self
.
correct
=
None
self
.
accuracy
=
None
self
.
loss
=
None
self
.
pred_class
=
None
self
.
variable_averages
=
None
self
.
global_step
=
None
self
.
writer_train
=
None
self
.
writer_valid
=
None
self
.
writer_train_valid_loss
=
None
self
.
OUT_OF_RANGE
=
False
self
.
abi
=
None
self
.
temp
=
None
self
.
wv
=
None
self
.
lbfp
=
None
self
.
sfc
=
None
self
.
in_mem_data_cache
=
{}
self
.
in_mem_data_cache_test
=
{}
self
.
model
=
None
self
.
optimizer
=
None
self
.
ema
=
None
self
.
train_loss
=
None
self
.
train_accuracy
=
None
self
.
test_loss
=
None
self
.
test_accuracy
=
None
self
.
test_auc
=
None
self
.
test_recall
=
None
self
.
test_precision
=
None
self
.
test_confusion_matrix
=
None
self
.
test_true_pos
=
None
self
.
test_true_neg
=
None
self
.
test_false_pos
=
None
self
.
test_false_neg
=
None
self
.
test_labels
=
[]
self
.
test_preds
=
[]
self
.
test_probs
=
None
self
.
learningRateSchedule
=
None
self
.
num_data_samples
=
None
self
.
initial_learning_rate
=
None
self
.
data_dct
=
None
self
.
train_data_files
=
None
self
.
train_label_files
=
None
self
.
test_data_files
=
None
self
.
test_label_files
=
None
self
.
train_data_nda
=
None
self
.
train_label_nda
=
None
self
.
test_data_nda
=
None
self
.
test_label_nda
=
None
# self.n_chans = len(self.train_params)
self
.
n_chans
=
10
if
TRIPLET
:
self
.
n_chans
*=
3
# self.X_img = tf.keras.Input(shape=(None, None, self.n_chans))
self
.
X_img
=
tf
.
keras
.
Input
(
shape
=
(
30
,
30
,
self
.
n_chans
))
self
.
inputs
.
append
(
self
.
X_img
)
# self.inputs.append(tf.keras.Input(shape=(None, None, self.n_chans)))
self
.
inputs
.
append
(
tf
.
keras
.
Input
(
shape
=
(
30
,
30
,
self
.
n_chans
)))
self
.
DISK_CACHE
=
False
tf
.
debugging
.
set_log_device_placement
(
LOG_DEVICE_PLACEMENT
)
def
get_in_mem_data_batch
(
self
,
idxs
,
is_training
):
if
is_training
:
data_files
=
self
.
train_data_files
label_files
=
self
.
train_label_files
else
:
data_files
=
self
.
test_data_files
label_files
=
self
.
test_label_files
data_s
=
[]
label_s
=
[]
for
k
in
idxs
:
f
=
data_files
[
k
]
nda
=
np
.
load
(
f
)
data_s
.
append
(
nda
)
f
=
label_files
[
k
]
nda
=
np
.
load
(
f
)
label_s
.
append
(
nda
)
data
=
np
.
concatenate
(
data_s
)
label
=
np
.
concatenate
(
label_s
)
label
=
label
[:,
label_idx
,
:,
:]
label
=
np
.
expand_dims
(
label
,
axis
=
3
)
data
=
data
.
astype
(
np
.
float32
)
label
=
label
.
astype
(
np
.
float32
)
data_norm
=
[]
for
k
,
param
in
enumerate
(
emis_params
):
tmp
=
normalize
(
data
[:,
k
,
:,
:],
param
,
mean_std_dct
)
data_norm
.
append
(
tmp
)
data
=
np
.
stack
(
data_norm
,
axis
=
3
)
if
label_param
!=
'
cloud_fraction
'
:
label
=
scale
(
label
,
label_param
,
mean_std_dct
)
if
is_training
and
DO_AUGMENT
:
data_ud
=
np
.
flip
(
data
,
axis
=
1
)
label_ud
=
np
.
flip
(
label
,
axis
=
1
)
data_lr
=
np
.
flip
(
data
,
axis
=
2
)
label_lr
=
np
.
flip
(
label
,
axis
=
2
)
data
=
np
.
concatenate
([
data
,
data_ud
,
data_lr
])
label
=
np
.
concatenate
([
label
,
label_ud
,
label_lr
])
return
data
,
data
,
label
def
get_in_mem_data_batch_train
(
self
,
idxs
):
return
self
.
get_in_mem_data_batch
(
idxs
,
True
)
def
get_in_mem_data_batch_test
(
self
,
idxs
):
return
self
.
get_in_mem_data_batch
(
idxs
,
False
)
def
get_in_mem_data_batch_eval
(
self
,
idxs
):
data
=
[]
for
param
in
self
.
train_params
:
nda
=
self
.
data_dct
[
param
]
nda
=
normalize
(
nda
,
param
,
mean_std_dct
)
data
.
append
(
nda
)
data
=
np
.
stack
(
data
)
data
=
data
.
astype
(
np
.
float32
)
data
=
np
.
transpose
(
data
,
axes
=
(
1
,
2
,
0
))
data
=
np
.
expand_dims
(
data
,
axis
=
0
)
nda
=
np
.
zeros
([
1
])
nda
[
0
]
=
self
.
flight_level
nda
=
tf
.
one_hot
(
nda
,
5
).
numpy
()
nda
=
np
.
expand_dims
(
nda
,
axis
=
0
)
nda
=
np
.
expand_dims
(
nda
,
axis
=
0
)
return
data
,
nda
@tf.function
(
input_signature
=
[
tf
.
TensorSpec
(
None
,
tf
.
int32
)])
def
data_function
(
self
,
indexes
):
out
=
tf
.
numpy_function
(
self
.
get_in_mem_data_batch_train
,
[
indexes
],
[
tf
.
float32
,
tf
.
float32
,
tf
.
float32
])
return
out
@tf.function
(
input_signature
=
[
tf
.
TensorSpec
(
None
,
tf
.
int32
)])
def
data_function_test
(
self
,
indexes
):
out
=
tf
.
numpy_function
(
self
.
get_in_mem_data_batch_test
,
[
indexes
],
[
tf
.
float32
,
tf
.
float32
,
tf
.
float32
])
return
out
@tf.function
(
input_signature
=
[
tf
.
TensorSpec
(
None
,
tf
.
int32
)])
def
data_function_evaluate
(
self
,
indexes
):
# TODO: modify for user specified altitude
out
=
tf
.
numpy_function
(
self
.
get_in_mem_data_batch_eval
,
[
indexes
],
[
tf
.
float32
,
tf
.
float32
])
return
out
def
get_train_dataset
(
self
,
indexes
):
indexes
=
list
(
indexes
)
dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
indexes
)
dataset
=
dataset
.
batch
(
PROC_BATCH_SIZE
)
dataset
=
dataset
.
map
(
self
.
data_function
,
num_parallel_calls
=
8
)
dataset
=
dataset
.
cache
()
if
DO_AUGMENT
:
dataset
=
dataset
.
shuffle
(
PROC_BATCH_BUFFER_SIZE
)
dataset
=
dataset
.
prefetch
(
buffer_size
=
1
)
self
.
train_dataset
=
dataset
def
get_test_dataset
(
self
,
indexes
):
indexes
=
list
(
indexes
)
dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
indexes
)
dataset
=
dataset
.
batch
(
PROC_BATCH_SIZE
)
dataset
=
dataset
.
map
(
self
.
data_function_test
,
num_parallel_calls
=
8
)
dataset
=
dataset
.
cache
()
self
.
test_dataset
=
dataset
def
get_evaluate_dataset
(
self
,
indexes
):
indexes
=
list
(
indexes
)
dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
indexes
)
dataset
=
dataset
.
map
(
self
.
data_function_evaluate
,
num_parallel_calls
=
8
)
self
.
eval_dataset
=
dataset
def
setup_pipeline
(
self
,
train_data_files
,
train_label_files
,
test_data_files
,
test_label_files
,
num_train_samples
):
self
.
train_data_files
=
train_data_files
self
.
train_label_files
=
train_label_files
self
.
test_data_files
=
test_data_files
self
.
test_label_files
=
test_label_files
trn_idxs
=
np
.
arange
(
len
(
train_data_files
))
np
.
random
.
shuffle
(
trn_idxs
)
tst_idxs
=
np
.
arange
(
len
(
train_data_files
))
self
.
get_train_dataset
(
trn_idxs
)
self
.
get_test_dataset
(
tst_idxs
)
self
.
num_data_samples
=
num_train_samples
# approximately
print
(
'
datetime:
'
,
now
)
print
(
'
training and test data:
'
)
print
(
'
---------------------------
'
)
print
(
'
num train samples:
'
,
self
.
num_data_samples
)
print
(
'
BATCH SIZE:
'
,
BATCH_SIZE
)
print
(
'
num test samples:
'
,
tst_idxs
.
shape
[
0
])
print
(
'
setup_pipeline: Done
'
)
def
setup_test_pipeline
(
self
,
filename_l1b
,
filename_l2
,
seed
=
None
,
shuffle
=
False
):
if
filename_l1b
is
not
None
:
self
.
h5f_l1b_tst
=
h5py
.
File
(
filename_l1b
,
'
r
'
)
if
filename_l2
is
not
None
:
self
.
h5f_l2_tst
=
h5py
.
File
(
filename_l2
,
'
r
'
)
if
self
.
h5f_l1b_tst
is
not
None
:
h5f
=
self
.
h5f_l1b_tst
else
:
h5f
=
self
.
h5f_l2_tst
time
=
h5f
[
'
time
'
]
tst_idxs
=
np
.
arange
(
time
.
shape
[
0
])
self
.
num_data_samples
=
len
(
tst_idxs
)
if
seed
is
not
None
:
np
.
random
.
seed
(
seed
)
if
shuffle
:
np
.
random
.
shuffle
(
tst_idxs
)
self
.
get_test_dataset
(
tst_idxs
)
print
(
'
num test samples:
'
,
tst_idxs
.
shape
[
0
])
print
(
'
setup_test_pipeline: Done
'
)
def
setup_eval_pipeline
(
self
,
data_dct
,
num_tiles
=
1
):
self
.
data_dct
=
data_dct
idxs
=
np
.
arange
(
num_tiles
)
self
.
num_data_samples
=
idxs
.
shape
[
0
]
self
.
get_evaluate_dataset
(
idxs
)
def
build_fcl
(
self
,
input_layer
):
print
(
'
build fully connected layer
'
)
num_filters
=
input_layer
.
shape
[
3
]
drop_rate
=
0.5
# activation = tf.nn.relu
# activation = tf.nn.elu
activation
=
tf
.
nn
.
leaky_relu
# padding = "VALID"
padding
=
"
SAME
"
conv
=
input_layer
# conv = build_residual_block_1x1(input_layer, num_filters, activation, 'Residual_Block_1', padding=padding)
# conv = build_residual_block_1x1(conv, num_filters, activation, 'Residual_Block_2', padding=padding)
# conv = build_residual_block_1x1(conv, num_filters, activation, 'Residual_Block_3', padding=padding)
# conv = build_residual_block_1x1(conv, num_filters, activation, 'Residual_Block_4', padding=padding)
# conv = build_residual_block_1x1(conv, num_filters, activation, 'Residual_Block_5', padding=padding)
# print(conv.shape)
if
NumClasses
==
2
:
activation
=
tf
.
nn
.
sigmoid
# For binary
else
:
activation
=
tf
.
nn
.
softmax
# For multi-class
activation
=
tf
.
nn
.
sigmoid
# Called logits, but these are actually probabilities, see activation
logits
=
tf
.
keras
.
layers
.
Conv2D
(
1
,
kernel_size
=
1
,
strides
=
1
,
padding
=
padding
,
name
=
'
probability
'
,
activation
=
activation
)(
conv
)
print
(
logits
.
shape
)
self
.
logits
=
logits
def
build_unet
(
self
):
print
(
'
build_cnn
'
)
# padding = "VALID"
padding
=
"
SAME
"
# activation = tf.nn.relu
# activation = tf.nn.elu
activation
=
tf
.
nn
.
leaky_relu
momentum
=
0.99
# num_filters = len(self.train_params) * 4
num_filters
=
self
.
n_chans
*
8
input_2d
=
self
.
inputs
[
0
]
print
(
'
input:
'
,
input_2d
.
shape
)
conv
=
tf
.
keras
.
layers
.
Conv2D
(
num_filters
,
kernel_size
=
5
,
strides
=
1
,
padding
=
'
VALID
'
,
activation
=
None
)(
input_2d
)
conv
=
conv
[:,
4
:
20
,
4
:
20
,
:]
print
(
'
Contracting Branch
'
)
print
(
'
input:
'
,
conv
.
shape
)
skip
=
conv
if
NOISE_TRAINING
:
conv
=
tf
.
keras
.
layers
.
GaussianNoise
(
stddev
=
NOISE_STDDEV
)(
conv
)
conv
=
tf
.
keras
.
layers
.
Conv2D
(
num_filters
,
kernel_size
=
5
,
strides
=
1
,
padding
=
padding
,
activation
=
activation
)(
conv
)
conv
=
tf
.
keras
.
layers
.
BatchNormalization
()(
conv
)
print
(
conv
.
shape
)
skip
=
tf
.
keras
.
layers
.
Conv2D
(
num_filters
,
kernel_size
=
3
,
strides
=
1
,
padding
=
padding
,
activation
=
None
)(
skip
)
skip
=
tf
.
keras
.
layers
.
BatchNormalization
()(
skip
)
conv
=
conv
+
skip
conv
=
tf
.
keras
.
layers
.
LeakyReLU
()(
conv
)
print
(
'
1d:
'
,
conv
.
shape
)
# -----------------------------------------------------------------------------------------------------------
skip
=
conv
num_filters
*=
2
conv
=
tf
.
keras
.
layers
.
Conv2D
(
num_filters
,
kernel_size
=
3
,
strides
=
1
,
padding
=
padding
,
activation
=
activation
)(
conv
)
conv
=
tf
.
keras
.
layers
.
MaxPool2D
(
padding
=
padding
)(
conv
)
conv
=
tf
.
keras
.
layers
.
BatchNormalization
()(
conv
)
skip
=
tf
.
keras
.
layers
.
Conv2D
(
num_filters
,
kernel_size
=
3
,
strides
=
1
,
padding
=
padding
,
activation
=
None
)(
skip
)
skip
=
tf
.
keras
.
layers
.
MaxPool2D
(
padding
=
padding
)(
skip
)
skip
=
tf
.
keras
.
layers
.
BatchNormalization
()(
skip
)
conv
=
conv
+
skip
conv
=
tf
.
keras
.
layers
.
LeakyReLU
()(
conv
)
print
(
'
2d:
'
,
conv
.
shape
)
# # ----------------------------------------------------------------------------------------------------------
#
skip
=
conv
num_filters
*=
2
conv
=
tf
.
keras
.
layers
.
Conv2D
(
num_filters
,
kernel_size
=
3
,
strides
=
1
,
padding
=
padding
,
activation
=
activation
)(
conv
)
conv
=
tf
.
keras
.
layers
.
MaxPool2D
(
padding
=
padding
)(
conv
)
conv
=
tf
.
keras
.
layers
.
BatchNormalization
()(
conv
)
skip
=
tf
.
keras
.
layers
.
Conv2D
(
num_filters
,
kernel_size
=
3
,
strides
=
1
,
padding
=
padding
,
activation
=
None
)(
skip
)
skip
=
tf
.
keras
.
layers
.
MaxPool2D
(
padding
=
padding
)(
skip
)
skip
=
tf
.
keras
.
layers
.
BatchNormalization
()(
skip
)
conv
=
conv
+
skip
conv
=
tf
.
keras
.
layers
.
LeakyReLU
()(
conv
)
print
(
'
3d:
'
,
conv
.
shape
)
#
# return conv
# -----------------------------------------------------------------------------------------------------------
skip
=
conv
num_filters
*=
2
conv
=
tf
.
keras
.
layers
.
Conv2D
(
num_filters
,
kernel_size
=
3
,
strides
=
1
,
padding
=
padding
,
activation
=
activation
)(
conv
)
conv
=
tf
.
keras
.
layers
.
MaxPool2D
(
padding
=
padding
)(
conv
)
conv
=
tf
.
keras
.
layers
.
BatchNormalization
()(
conv
)
skip
=
tf
.
keras
.
layers
.
Conv2D
(
num_filters
,
kernel_size
=
3
,
strides
=
1
,
padding
=
padding
,
activation
=
None
)(
skip
)
skip
=
tf
.
keras
.
layers
.
MaxPool2D
(
padding
=
padding
)(
skip
)
skip
=
tf
.
keras
.
layers
.
BatchNormalization
()(
skip
)
conv
=
conv
+
skip
conv
=
tf
.
keras
.
layers
.
LeakyReLU
()(
conv
)
print
(
'
4d:
'
,
conv
.
shape
)
return
conv
# # if NumClasses == 2:
# # activation = tf.nn.sigmoid # For binary
# # else:
# # activation = tf.nn.softmax # For multi-class
# activation = tf.nn.sigmoid
#
# # Called logits, but these are actually probabilities, see activation
# self.logits = tf.keras.layers.Conv2D(1, kernel_size=1, strides=1, padding=padding, name='probability', activation=activation)(conv)
#
# print(self.logits.shape)
def
build_training
(
self
):
# if NumClasses == 2:
# self.loss = tf.keras.losses.BinaryCrossentropy(from_logits=False) # for two-class only
# else:
# self.loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False) # For multi-class
self
.
loss
=
tf
.
keras
.
losses
.
MeanSquaredError
()
# Regression
# decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
initial_learning_rate
=
0.002
decay_rate
=
0.95
steps_per_epoch
=
int
(
self
.
num_data_samples
/
BATCH_SIZE
)
# one epoch
decay_steps
=
int
(
steps_per_epoch
/
2
)
print
(
'
initial rate, decay rate, steps/epoch, decay steps:
'
,
initial_learning_rate
,
decay_rate
,
steps_per_epoch
,
decay_steps
)
self
.
learningRateSchedule
=
tf
.
keras
.
optimizers
.
schedules
.
ExponentialDecay
(
initial_learning_rate
,
decay_steps
,
decay_rate
)
optimizer
=
tf
.
keras
.
optimizers
.
Adam
(
learning_rate
=
self
.
learningRateSchedule
)
if
TRACK_MOVING_AVERAGE
:
# Not really sure this works properly (from tfa)
# optimizer = tfa.optimizers.MovingAverage(optimizer)
self
.
ema
=
tf
.
train
.
ExponentialMovingAverage
(
decay
=
0.9999
)
self
.
optimizer
=
optimizer
self
.
initial_learning_rate
=
initial_learning_rate
def
build_evaluation
(
self
):
#self.train_loss = tf.keras.metrics.Mean(name='train_loss')
#self.test_loss = tf.keras.metrics.Mean(name='test_loss')
self
.
train_accuracy
=
tf
.
keras
.
metrics
.
MeanAbsoluteError
(
name
=
'
train_accuracy
'
)
self
.
test_accuracy
=
tf
.
keras
.
metrics
.
MeanAbsoluteError
(
name
=
'
test_accuracy
'
)
self
.
train_loss
=
tf
.
keras
.
metrics
.
Mean
(
name
=
'
train_loss
'
)
self
.
test_loss
=
tf
.
keras
.
metrics
.
Mean
(
name
=
'
test_loss
'
)
# if NumClasses == 2:
# self.train_accuracy = tf.keras.metrics.BinaryAccuracy(name='train_accuracy')
# self.test_accuracy = tf.keras.metrics.BinaryAccuracy(name='test_accuracy')
# self.test_auc = tf.keras.metrics.AUC(name='test_auc')
# self.test_recall = tf.keras.metrics.Recall(name='test_recall')
# self.test_precision = tf.keras.metrics.Precision(name='test_precision')
# self.test_true_neg = tf.keras.metrics.TrueNegatives(name='test_true_neg')
# self.test_true_pos = tf.keras.metrics.TruePositives(name='test_true_pos')
# self.test_false_neg = tf.keras.metrics.FalseNegatives(name='test_false_neg')
# self.test_false_pos = tf.keras.metrics.FalsePositives(name='test_false_pos')
# else:
# self.train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
# self.test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
@tf.function
def
train_step
(
self
,
mini_batch
):
inputs
=
[
mini_batch
[
0
],
mini_batch
[
1
]]
labels
=
mini_batch
[
2
]
with
tf
.
GradientTape
()
as
tape
:
pred
=
self
.
model
(
inputs
,
training
=
True
)
loss
=
self
.
loss
(
labels
,
pred
)
total_loss
=
loss
if
len
(
self
.
model
.
losses
)
>
0
:
reg_loss
=
tf
.
math
.
add_n
(
self
.
model
.
losses
)
total_loss
=
loss
+
reg_loss
gradients
=
tape
.
gradient
(
total_loss
,
self
.
model
.
trainable_variables
)
self
.
optimizer
.
apply_gradients
(
zip
(
gradients
,
self
.
model
.
trainable_variables
))
if
TRACK_MOVING_AVERAGE
:
self
.
ema
.
apply
(
self
.
model
.
trainable_variables
)
self
.
train_loss
(
loss
)
self
.
train_accuracy
(
labels
,
pred
)
return
loss
@tf.function
def
test_step
(
self
,
mini_batch
):
inputs
=
[
mini_batch
[
0
],
mini_batch
[
1
]]
labels
=
mini_batch
[
2
]
pred
=
self
.
model
(
inputs
,
training
=
False
)
t_loss
=
self
.
loss
(
labels
,
pred
)
self
.
test_loss
(
t_loss
)
self
.
test_accuracy
(
labels
,
pred
)
# if NumClasses == 2:
# self.test_auc(labels, pred)
# self.test_recall(labels, pred)
# self.test_precision(labels, pred)
# self.test_true_neg(labels, pred)
# self.test_true_pos(labels, pred)
# self.test_false_neg(labels, pred)
# self.test_false_pos(labels, pred)
def
predict
(
self
,
mini_batch
):
inputs
=
[
mini_batch
[
0
],
mini_batch
[
1
]]
labels
=
mini_batch
[
2
]
pred
=
self
.
model
(
inputs
,
training
=
False
)
t_loss
=
self
.
loss
(
labels
,
pred
)
self
.
test_labels
.
append
(
labels
)
self
.
test_preds
.
append
(
pred
.
numpy
())
self
.
test_loss
(
t_loss
)
self
.
test_accuracy
(
labels
,
pred
)
# if NumClasses == 2:
# self.test_auc(labels, pred)
# self.test_recall(labels, pred)
# self.test_precision(labels, pred)
# self.test_true_neg(labels, pred)
# self.test_true_pos(labels, pred)
# self.test_false_neg(labels, pred)
# self.test_false_pos(labels, pred)
def
reset_test_metrics
(
self
):
self
.
test_loss
.
reset_states
()
self
.
test_accuracy
.
reset_states
()
# if NumClasses == 2:
# self.test_auc.reset_states()
# self.test_recall.reset_states()
# self.test_precision.reset_states()
# self.test_true_neg.reset_states()
# self.test_true_pos.reset_states()
# self.test_false_neg.reset_states()
# self.test_false_pos.reset_states()
def
get_metrics
(
self
):
recall
=
self
.
test_recall
.
result
()
precsn
=
self
.
test_precision
.
result
()
f1
=
2
*
(
precsn
*
recall
)
/
(
precsn
+
recall
)
tn
=
self
.
test_true_neg
.
result
()
tp
=
self
.
test_true_pos
.
result
()
fn
=
self
.
test_false_neg
.
result
()
fp
=
self
.
test_false_pos
.
result
()
mcc
=
((
tp
*
tn
)
-
(
fp
*
fn
))
/
np
.
sqrt
((
tp
+
fp
)
*
(
tp
+
fn
)
*
(
tn
+
fp
)
*
(
tn
+
fn
))
return
f1
,
mcc
def
do_training
(
self
,
ckpt_dir
=
None
):
if
ckpt_dir
is
None
:
if
not
os
.
path
.
exists
(
modeldir
):
os
.
mkdir
(
modeldir
)
ckpt
=
tf
.
train
.
Checkpoint
(
step
=
tf
.
Variable
(
1
),
model
=
self
.
model
)
ckpt_manager
=
tf
.
train
.
CheckpointManager
(
ckpt
,
modeldir
,
max_to_keep
=
3
)
else
:
ckpt
=
tf
.
train
.
Checkpoint
(
step
=
tf
.
Variable
(
1
),
model
=
self
.
model
)
ckpt_manager
=
tf
.
train
.
CheckpointManager
(
ckpt
,
ckpt_dir
,
max_to_keep
=
3
)
self
.
writer_train
=
tf
.
summary
.
create_file_writer
(
os
.
path
.
join
(
logdir
,
'
plot_train
'
))
self
.
writer_valid
=
tf
.
summary
.
create_file_writer
(
os
.
path
.
join
(
logdir
,
'
plot_valid
'
))
self
.
writer_train_valid_loss
=
tf
.
summary
.
create_file_writer
(
os
.
path
.
join
(
logdir
,
'
plot_train_valid_loss
'
))
step
=
0
total_time
=
0
best_test_loss
=
np
.
finfo
(
dtype
=
np
.
float
).
max
best_test_acc
=
0
best_test_recall
=
0
best_test_precision
=
0
best_test_auc
=
0
best_test_f1
=
0
best_test_mcc
=
0
if
EARLY_STOP
:
es
=
EarlyStop
()
for
epoch
in
range
(
NUM_EPOCHS
):
self
.
train_loss
.
reset_states
()
self
.
train_accuracy
.
reset_states
()
t0
=
datetime
.
datetime
.
now
().
timestamp
()
proc_batch_cnt
=
0
n_samples
=
0
for
data0
,
data1
,
label
in
self
.
train_dataset
:
trn_ds
=
tf
.
data
.
Dataset
.
from_tensor_slices
((
data0
,
data1
,
label
))
trn_ds
=
trn_ds
.
batch
(
BATCH_SIZE
)
for
mini_batch
in
trn_ds
:
if
self
.
learningRateSchedule
is
not
None
:
loss
=
self
.
train_step
(
mini_batch
)
if
(
step
%
100
)
==
0
:
with
self
.
writer_train
.
as_default
():
tf
.
summary
.
scalar
(
'
loss_trn
'
,
loss
.
numpy
(),
step
=
step
)
tf
.
summary
.
scalar
(
'
learning_rate
'
,
self
.
optimizer
.
_decayed_lr
(
'
float32
'
).
numpy
(),
step
=
step
)
tf
.
summary
.
scalar
(
'
num_train_steps
'
,
step
,
step
=
step
)
tf
.
summary
.
scalar
(
'
num_epochs
'
,
epoch
,
step
=
step
)
self
.
reset_test_metrics
()
for
data0_tst
,
data1_tst
,
label_tst
in
self
.
test_dataset
:
tst_ds
=
tf
.
data
.
Dataset
.
from_tensor_slices
((
data0_tst
,
data1_tst
,
label_tst
))
tst_ds
=
tst_ds
.
batch
(
BATCH_SIZE
)
for
mini_batch_test
in
tst_ds
:
self
.
test_step
(
mini_batch_test
)
# if NumClasses == 2:
# f1, mcc = self.get_metrics()
with
self
.
writer_valid
.
as_default
():
tf
.
summary
.
scalar
(
'
loss_val
'
,
self
.
test_loss
.
result
(),
step
=
step
)
tf
.
summary
.
scalar
(
'
acc_val
'
,
self
.
test_accuracy
.
result
(),
step
=
step
)
# if NumClasses == 2:
# tf.summary.scalar('auc_val', self.test_auc.result(), step=step)
# tf.summary.scalar('recall_val', self.test_recall.result(), step=step)
# tf.summary.scalar('prec_val', self.test_precision.result(), step=step)
# tf.summary.scalar('f1_val', f1, step=step)
# tf.summary.scalar('mcc_val', mcc, step=step)
# tf.summary.scalar('num_train_steps', step, step=step)
# tf.summary.scalar('num_epochs', epoch, step=step)
with
self
.
writer_train_valid_loss
.
as_default
():
tf
.
summary
.
scalar
(
'
loss_trn
'
,
loss
.
numpy
(),
step
=
step
)
tf
.
summary
.
scalar
(
'
loss_val
'
,
self
.
test_loss
.
result
(),
step
=
step
)
print
(
'
****** test loss, acc, lr:
'
,
self
.
test_loss
.
result
().
numpy
(),
self
.
test_accuracy
.
result
().
numpy
(),
self
.
optimizer
.
_decayed_lr
(
'
float32
'
).
numpy
())
step
+=
1
print
(
'
train loss:
'
,
loss
.
numpy
())
proc_batch_cnt
+=
1
n_samples
+=
data0
.
shape
[
0
]
print
(
'
proc_batch_cnt:
'
,
proc_batch_cnt
,
n_samples
)
t1
=
datetime
.
datetime
.
now
().
timestamp
()
print
(
'
End of Epoch:
'
,
epoch
+
1
,
'
elapsed time:
'
,
(
t1
-
t0
))
total_time
+=
(
t1
-
t0
)
self
.
reset_test_metrics
()
for
data0
,
data1
,
label
in
self
.
test_dataset
:
ds
=
tf
.
data
.
Dataset
.
from_tensor_slices
((
data0
,
data1
,
label
))
ds
=
ds
.
batch
(
BATCH_SIZE
)
for
mini_batch
in
ds
:
self
.
test_step
(
mini_batch
)
print
(
'
loss, acc:
'
,
self
.
test_loss
.
result
().
numpy
(),
self
.
test_accuracy
.
result
().
numpy
())
# if NumClasses == 2:
# f1, mcc = self.get_metrics()
# print('loss, acc, recall, precision, auc, f1, mcc: ', self.test_loss.result().numpy(), self.test_accuracy.result().numpy(),
# self.test_recall.result().numpy(), self.test_precision.result().numpy(), self.test_auc.result().numpy(), f1.numpy(), mcc.numpy())
# else:
# print('loss, acc: ', self.test_loss.result().numpy(), self.test_accuracy.result().numpy())
print
(
'
------------------------------------------------------
'
)
tst_loss
=
self
.
test_loss
.
result
().
numpy
()
if
tst_loss
<
best_test_loss
:
best_test_loss
=
tst_loss
# if NumClasses == 2:
# best_test_acc = self.test_accuracy.result().numpy()
# best_test_recall = self.test_recall.result().numpy()
# best_test_precision = self.test_precision.result().numpy()
# best_test_auc = self.test_auc.result().numpy()
# best_test_f1 = f1.numpy()
# best_test_mcc = mcc.numpy()
ckpt_manager
.
save
()
if
self
.
DISK_CACHE
and
epoch
==
0
:
f
=
open
(
cachepath
,
'
wb
'
)
pickle
.
dump
(
self
.
in_mem_data_cache
,
f
)
f
.
close
()
if
EARLY_STOP
and
es
.
check_stop
(
tst_loss
):
break
print
(
'
total time:
'
,
total_time
)
self
.
writer_train
.
close
()
self
.
writer_valid
.
close
()
self
.
writer_train_valid_loss
.
close
()
if
self
.
h5f_l1b_trn
is
not
None
:
self
.
h5f_l1b_trn
.
close
()
if
self
.
h5f_l1b_tst
is
not
None
:
self
.
h5f_l1b_tst
.
close
()
if
self
.
h5f_l2_trn
is
not
None
:
self
.
h5f_l2_trn
.
close
()
if
self
.
h5f_l2_tst
is
not
None
:
self
.
h5f_l2_tst
.
close
()
# f = open(home_dir+'/best_stats_'+now+'.pkl', 'wb')
# pickle.dump((best_test_loss, best_test_acc, best_test_recall, best_test_precision, best_test_auc, best_test_f1, best_test_mcc), f)
# f.close()
def
build_model
(
self
):
cnn
=
self
.
build_unet
()
self
.
build_fcl
(
cnn
)
self
.
model
=
tf
.
keras
.
Model
(
self
.
inputs
,
self
.
logits
)
def
restore
(
self
,
ckpt_dir
):
ckpt
=
tf
.
train
.
Checkpoint
(
step
=
tf
.
Variable
(
1
),
model
=
self
.
model
)
ckpt_manager
=
tf
.
train
.
CheckpointManager
(
ckpt
,
ckpt_dir
,
max_to_keep
=
3
)
ckpt
.
restore
(
ckpt_manager
.
latest_checkpoint
)
self
.
reset_test_metrics
()
for
data0
,
data1
,
label
in
self
.
test_dataset
:
ds
=
tf
.
data
.
Dataset
.
from_tensor_slices
((
data0
,
data1
,
label
))
ds
=
ds
.
batch
(
BATCH_SIZE
)
for
mini_batch_test
in
ds
:
self
.
predict
(
mini_batch_test
)
f1
,
mcc
=
self
.
get_metrics
()
print
(
'
loss, acc:
'
,
self
.
test_loss
.
result
().
numpy
(),
self
.
test_accuracy
.
result
().
numpy
(),
self
.
test_recall
.
result
().
numpy
(),
self
.
test_precision
.
result
().
numpy
(),
self
.
test_auc
.
result
().
numpy
(),
f1
.
numpy
(),
mcc
.
numpy
())
labels
=
np
.
concatenate
(
self
.
test_labels
)
self
.
test_labels
=
labels
preds
=
np
.
concatenate
(
self
.
test_preds
)
self
.
test_probs
=
preds
if
NumClasses
==
2
:
preds
=
np
.
where
(
preds
>
0.5
,
1
,
0
)
else
:
preds
=
np
.
argmax
(
preds
,
axis
=
1
)
self
.
test_preds
=
preds
def
do_evaluate
(
self
,
prob_thresh
=
0.5
):
self
.
reset_test_metrics
()
pred_s
=
[]
for
data
in
self
.
eval_dataset
:
print
(
data
[
0
].
shape
,
data
[
1
].
shape
)
pred
=
self
.
model
([
data
])
print
(
pred
.
shape
,
np
.
histogram
(
pred
.
numpy
()))
preds
=
np
.
concatenate
(
pred_s
)
preds
=
preds
[:,
0
]
self
.
test_probs
=
preds
if
NumClasses
==
2
:
preds
=
np
.
where
(
preds
>
prob_thresh
,
1
,
0
)
else
:
preds
=
np
.
argmax
(
preds
,
axis
=
1
)
self
.
test_preds
=
preds
def
run
(
self
,
directory
):
train_data_files
=
glob
.
glob
(
directory
+
'
data_train*.npy
'
)
valid_data_files
=
glob
.
glob
(
directory
+
'
data_valid*.npy
'
)
train_label_files
=
glob
.
glob
(
directory
+
'
label_train*.npy
'
)
valid_label_files
=
glob
.
glob
(
directory
+
'
label_valid*.npy
'
)
train_data_files
.
sort
()
valid_data_files
.
sort
()
train_label_files
.
sort
()
valid_label_files
.
sort
()
self
.
setup_pipeline
(
train_data_files
,
train_label_files
,
valid_data_files
,
valid_label_files
,
200000
)
self
.
build_model
()
self
.
build_training
()
self
.
build_evaluation
()
self
.
do_training
()
def
run_restore
(
self
,
filename_l1b
,
filename_l2
,
ckpt_dir
):
self
.
setup_test_pipeline
(
filename_l1b
,
filename_l2
)
self
.
build_model
()
self
.
build_training
()
self
.
build_evaluation
()
self
.
restore
(
ckpt_dir
)
if
self
.
h5f_l1b_tst
is
not
None
:
self
.
h5f_l1b_tst
.
close
()
if
self
.
h5f_l2_tst
is
not
None
:
self
.
h5f_l2_tst
.
close
()
def
run_evaluate
(
self
,
filename
,
ckpt_dir
):
data_dct
,
ll
,
cc
=
make_for_full_domain_predict
(
filename
,
name_list
=
self
.
train_params
)
self
.
setup_eval_pipeline
(
data_dct
,
len
(
ll
))
self
.
build_model
()
self
.
build_training
()
self
.
build_evaluation
()
self
.
do_evaluate
(
ckpt_dir
)
if
__name__
==
"
__main__
"
:
nn
=
UNET
()
nn
.
run
(
'
matchup_filename
'
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment