Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
python
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Tom Rink
python
Commits
dde06dd6
Commit
dde06dd6
authored
3 years ago
by
tomrink
Browse files
Options
Downloads
Patches
Plain Diff
initial commit of FCN for icing prediction.
parent
a60dcac4
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
modules/deeplearning/icing_fcn.py
+1147
-0
1147 additions, 0 deletions
modules/deeplearning/icing_fcn.py
with
1147 additions
and
0 deletions
modules/deeplearning/icing_fcn.py
0 → 100644
+
1147
−
0
View file @
dde06dd6
import
tensorflow
as
tf
from
util.setup
import
logdir
,
modeldir
,
cachepath
,
now
,
ancillary_path
from
util.util
import
EarlyStop
,
normalize
,
make_for_full_domain_predict
from
util.geos_nav
import
get_navigation
,
get_lon_lat_2d_mesh
import
os
,
datetime
import
numpy
as
np
import
pickle
import
h5py
LOG_DEVICE_PLACEMENT
=
False
# Manual (data, label) caching, but has been replaced with tf.data.dataset.cache()
CACHE_DATA_IN_MEM
=
False
PROC_BATCH_SIZE
=
4096
PROC_BATCH_BUFFER_SIZE
=
50000
NumClasses
=
2
if
NumClasses
==
2
:
NumLogits
=
1
else
:
NumLogits
=
NumClasses
BATCH_SIZE
=
128
NUM_EPOCHS
=
100
TRACK_MOVING_AVERAGE
=
False
EARLY_STOP
=
False
TRIPLET
=
False
CONV3D
=
False
NOISE_TRAINING
=
False
NOISE_STDDEV
=
0.01
img_width
=
16
mean_std_dct
=
{}
mean_std_file
=
ancillary_path
+
'
mean_std_lo_hi_l2.pkl
'
f
=
open
(
mean_std_file
,
'
rb
'
)
mean_std_dct_l2
=
pickle
.
load
(
f
)
f
.
close
()
mean_std_file
=
ancillary_path
+
'
mean_std_lo_hi_l1b.pkl
'
f
=
open
(
mean_std_file
,
'
rb
'
)
mean_std_dct_l1b
=
pickle
.
load
(
f
)
f
.
close
()
mean_std_dct
.
update
(
mean_std_dct_l1b
)
mean_std_dct
.
update
(
mean_std_dct_l2
)
# -- NIGHT L2 -----------------------------
train_params_l2_night
=
[
'
cld_height_acha
'
,
'
cld_geo_thick
'
,
'
cld_temp_acha
'
,
'
cld_press_acha
'
,
'
supercooled_cloud_fraction
'
,
'
cld_emiss_acha
'
,
'
conv_cloud_fraction
'
,
'
cld_reff_acha
'
,
'
cld_opd_acha
'
]
# -- DAY L2 --------------------------------
train_params_l2_day
=
[
'
cld_height_acha
'
,
'
cld_geo_thick
'
,
'
cld_temp_acha
'
,
'
cld_press_acha
'
,
'
supercooled_cloud_fraction
'
,
'
cld_emiss_acha
'
,
'
conv_cloud_fraction
'
,
'
cld_reff_dcomp
'
,
'
cld_opd_dcomp
'
,
'
iwc_dcomp
'
,
'
lwc_dcomp
'
]
# -- DAY L1B --------------------------------
train_params_l1b_day
=
[
'
temp_10_4um_nom
'
,
'
temp_11_0um_nom
'
,
'
temp_12_0um_nom
'
,
'
temp_13_3um_nom
'
,
'
temp_3_75um_nom
'
,
'
temp_6_2um_nom
'
,
'
temp_6_7um_nom
'
,
'
temp_7_3um_nom
'
,
'
temp_8_5um_nom
'
,
'
temp_9_7um_nom
'
,
'
refl_0_47um_nom
'
,
'
refl_0_65um_nom
'
,
'
refl_0_86um_nom
'
,
'
refl_1_38um_nom
'
,
'
refl_1_60um_nom
'
]
# -- NIGHT L1B -------------------------------
train_params_l1b_night
=
[
'
temp_10_4um_nom
'
,
'
temp_11_0um_nom
'
,
'
temp_12_0um_nom
'
,
'
temp_13_3um_nom
'
,
'
temp_3_75um_nom
'
,
'
temp_6_2um_nom
'
,
'
temp_6_7um_nom
'
,
'
temp_7_3um_nom
'
,
'
temp_8_5um_nom
'
,
'
temp_9_7um_nom
'
]
# -- DAY LUNAR ---------------------------------
# train_params_l1b = ['cld_height_acha', 'cld_geo_thick', 'cld_temp_acha', 'cld_press_acha', 'supercooled_cloud_fraction',
# 'cld_emiss_acha', 'conv_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp', 'lwc_dcomp']
# ---------------------------------------------
train_params
=
train_params_l1b_day
+
train_params_l2_day
# -- Zero out params (Experimentation Only) ------------
zero_out_params
=
[
'
cld_reff_dcomp
'
,
'
cld_opd_dcomp
'
,
'
iwc_dcomp
'
,
'
lwc_dcomp
'
]
DO_ZERO_OUT
=
False
def
build_residual_block
(
input
,
drop_rate
,
num_neurons
,
activation
,
block_name
,
doDropout
=
True
,
doBatchNorm
=
True
):
with
tf
.
name_scope
(
block_name
):
if
doDropout
:
fc
=
tf
.
keras
.
layers
.
Dropout
(
drop_rate
)(
input
)
fc
=
tf
.
keras
.
layers
.
Dense
(
num_neurons
,
activation
=
activation
)(
fc
)
else
:
fc
=
tf
.
keras
.
layers
.
Dense
(
num_neurons
,
activation
=
activation
)(
input
)
if
doBatchNorm
:
fc
=
tf
.
keras
.
layers
.
BatchNormalization
()(
fc
)
print
(
fc
.
shape
)
fc_skip
=
fc
if
doDropout
:
fc
=
tf
.
keras
.
layers
.
Dropout
(
drop_rate
)(
fc
)
fc
=
tf
.
keras
.
layers
.
Dense
(
num_neurons
,
activation
=
activation
)(
fc
)
if
doBatchNorm
:
fc
=
tf
.
keras
.
layers
.
BatchNormalization
()(
fc
)
print
(
fc
.
shape
)
if
doDropout
:
fc
=
tf
.
keras
.
layers
.
Dropout
(
drop_rate
)(
fc
)
fc
=
tf
.
keras
.
layers
.
Dense
(
num_neurons
,
activation
=
activation
)(
fc
)
if
doBatchNorm
:
fc
=
tf
.
keras
.
layers
.
BatchNormalization
()(
fc
)
print
(
fc
.
shape
)
if
doDropout
:
fc
=
tf
.
keras
.
layers
.
Dropout
(
drop_rate
)(
fc
)
fc
=
tf
.
keras
.
layers
.
Dense
(
num_neurons
,
activation
=
None
)(
fc
)
if
doBatchNorm
:
fc
=
tf
.
keras
.
layers
.
BatchNormalization
()(
fc
)
fc
=
fc
+
fc_skip
fc
=
tf
.
keras
.
layers
.
LeakyReLU
()(
fc
)
print
(
fc
.
shape
)
return
fc
class
IcingIntensityFCN
:
def
__init__
(
self
,
day_night
=
'
DAY
'
,
l1b_or_l2
=
'
both
'
,
use_flight_altitude
=
False
,
gpu_device
=
0
,
datapath
=
None
):
if
day_night
==
'
DAY
'
:
self
.
train_params_l1b
=
train_params_l1b_day
self
.
train_params_l2
=
train_params_l2_day
if
l1b_or_l2
==
'
both
'
:
self
.
train_params
=
train_params_l1b_day
+
train_params_l2_day
elif
l1b_or_l2
==
'
l1b
'
:
self
.
train_params
=
train_params_l1b_day
elif
l1b_or_l2
==
'
l2
'
:
self
.
train_params
=
train_params_l2_day
else
:
self
.
train_params_l1b
=
train_params_l1b_night
self
.
train_params_l2
=
train_params_l2_night
if
l1b_or_l2
==
'
both
'
:
self
.
train_params
=
train_params_l1b_night
+
train_params_l2_night
elif
l1b_or_l2
==
'
l1b
'
:
self
.
train_params
=
train_params_l1b_night
elif
l1b_or_l2
==
'
l2
'
:
self
.
train_params
=
train_params_l2_night
self
.
train_data
=
None
self
.
train_label
=
None
self
.
test_data
=
None
self
.
test_label
=
None
self
.
test_data_denorm
=
None
self
.
train_dataset
=
None
self
.
inner_train_dataset
=
None
self
.
test_dataset
=
None
self
.
eval_dataset
=
None
self
.
X_img
=
None
self
.
X_prof
=
None
self
.
X_u
=
None
self
.
X_v
=
None
self
.
X_sfc
=
None
self
.
inputs
=
[]
self
.
y
=
None
self
.
handle
=
None
self
.
inner_handle
=
None
self
.
in_mem_batch
=
None
self
.
h5f_l1b_trn
=
None
self
.
h5f_l1b_tst
=
None
self
.
h5f_l2_trn
=
None
self
.
h5f_l2_tst
=
None
self
.
logits
=
None
self
.
predict_data
=
None
self
.
predict_dataset
=
None
self
.
mean_list
=
None
self
.
std_list
=
None
self
.
training_op
=
None
self
.
correct
=
None
self
.
accuracy
=
None
self
.
loss
=
None
self
.
pred_class
=
None
self
.
gpu_device
=
gpu_device
self
.
variable_averages
=
None
self
.
global_step
=
None
self
.
writer_train
=
None
self
.
writer_valid
=
None
self
.
OUT_OF_RANGE
=
False
self
.
abi
=
None
self
.
temp
=
None
self
.
wv
=
None
self
.
lbfp
=
None
self
.
sfc
=
None
self
.
in_mem_data_cache
=
{}
self
.
in_mem_data_cache_test
=
{}
self
.
model
=
None
self
.
optimizer
=
None
self
.
ema
=
None
self
.
train_loss
=
None
self
.
train_accuracy
=
None
self
.
test_loss
=
None
self
.
test_accuracy
=
None
self
.
test_auc
=
None
self
.
test_recall
=
None
self
.
test_precision
=
None
self
.
test_confusion_matrix
=
None
self
.
test_true_pos
=
None
self
.
test_true_neg
=
None
self
.
test_false_pos
=
None
self
.
test_false_neg
=
None
self
.
test_labels
=
[]
self
.
test_preds
=
[]
self
.
test_probs
=
None
self
.
learningRateSchedule
=
None
self
.
num_data_samples
=
None
self
.
initial_learning_rate
=
None
self
.
data_dct
=
None
n_chans
=
len
(
self
.
train_params
)
if
TRIPLET
:
n_chans
*=
3
self
.
X_img
=
tf
.
keras
.
Input
(
shape
=
(
None
,
None
,
n_chans
))
self
.
inputs
.
append
(
self
.
X_img
)
self
.
inputs
.
append
(
tf
.
keras
.
Input
(
5
))
self
.
flight_level
=
0
self
.
DISK_CACHE
=
False
self
.
USE_FLIGHT_ALTITUDE
=
use_flight_altitude
if
datapath
is
not
None
:
self
.
DISK_CACHE
=
False
f
=
open
(
datapath
,
'
rb
'
)
self
.
in_mem_data_cache
=
pickle
.
load
(
f
)
f
.
close
()
tf
.
debugging
.
set_log_device_placement
(
LOG_DEVICE_PLACEMENT
)
# Doesn't seem to play well with SLURM
# gpus = tf.config.experimental.list_physical_devices('GPU')
# if gpus:
# try:
# # Currently, memory growth needs to be the same across GPUs
# for gpu in gpus:
# tf.config.experimental.set_memory_growth(gpu, True)
# logical_gpus = tf.config.experimental.list_logical_devices('GPU')
# print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
# except RuntimeError as e:
# # Memory growth must be set before GPUs have been initialized
# print(e)
def
get_in_mem_data_batch
(
self
,
idxs
,
is_training
):
# Pretty much dead, but left in here for reference (See note above)
if
CACHE_DATA_IN_MEM
:
key
=
frozenset
(
idxs
)
if
is_training
:
tup
=
self
.
in_mem_data_cache
.
get
(
key
)
else
:
tup
=
self
.
in_mem_data_cache_test
(
key
)
if
tup
is
not
None
:
return
tup
[
0
],
tup
[
1
],
tup
[
2
]
# sort these to use as numpy indexing arrays
nd_idxs
=
np
.
array
(
idxs
)
nd_idxs
=
np
.
sort
(
nd_idxs
)
data
=
[]
for
param
in
self
.
train_params
:
nda
=
self
.
get_parameter_data
(
param
,
nd_idxs
,
is_training
)
# Manual Corruption Process. Better: see use of tf.keras.layers.GaussianNoise
# if NOISE_TRAINING and is_training:
# nda = normalize(nda, param, mean_std_dct, add_noise=True, noise_scale=0.01, seed=42)
# else:
# nda = normalize(nda, param, mean_std_dct)
nda
=
normalize
(
nda
,
param
,
mean_std_dct
)
if
DO_ZERO_OUT
and
is_training
:
try
:
zero_out_params
.
index
(
param
)
nda
[:,]
=
0.0
except
ValueError
:
pass
data
.
append
(
nda
)
data
=
np
.
stack
(
data
)
data
=
data
.
astype
(
np
.
float32
)
data
=
np
.
transpose
(
data
,
axes
=
(
1
,
2
,
3
,
0
))
data_alt
=
self
.
get_scalar_data
(
nd_idxs
,
is_training
)
label
=
self
.
get_label_data
(
nd_idxs
,
is_training
)
label
=
np
.
where
(
label
==
-
1
,
0
,
label
)
# binary, two class
if
NumClasses
==
2
:
label
=
np
.
where
(
label
!=
0
,
1
,
label
)
label
=
label
.
reshape
((
label
.
shape
[
0
],
1
))
elif
NumClasses
==
3
:
label
=
np
.
where
(
np
.
logical_or
(
label
==
1
,
label
==
2
),
1
,
label
)
label
=
np
.
where
(
np
.
invert
(
np
.
logical_or
(
label
==
0
,
label
==
1
)),
2
,
label
)
label
=
label
.
reshape
((
label
.
shape
[
0
],
1
))
if
CACHE_DATA_IN_MEM
:
if
is_training
:
self
.
in_mem_data_cache
[
key
]
=
(
data
,
data_alt
,
label
)
else
:
self
.
in_mem_data_cache_test
[
key
]
=
(
data
,
data_alt
,
label
)
return
data
,
data_alt
,
label
def
get_parameter_data
(
self
,
param
,
nd_idxs
,
is_training
):
if
is_training
:
if
param
in
self
.
train_params_l1b
:
h5f
=
self
.
h5f_l1b_trn
else
:
h5f
=
self
.
h5f_l2_trn
else
:
if
param
in
self
.
train_params_l1b
:
h5f
=
self
.
h5f_l1b_tst
else
:
h5f
=
self
.
h5f_l2_tst
nda
=
h5f
[
param
][
nd_idxs
,]
return
nda
def
get_scalar_data
(
self
,
nd_idxs
,
is_training
):
param
=
'
flight_altitude
'
if
is_training
:
if
self
.
h5f_l1b_trn
is
not
None
:
h5f
=
self
.
h5f_l1b_trn
else
:
h5f
=
self
.
h5f_l2_trn
else
:
if
self
.
h5f_l1b_tst
is
not
None
:
h5f
=
self
.
h5f_l1b_tst
else
:
h5f
=
self
.
h5f_l2_tst
nda
=
h5f
[
param
][
nd_idxs
,]
nda
[
np
.
logical_and
(
nda
>=
0
,
nda
<
2000
)]
=
0
nda
[
np
.
logical_and
(
nda
>=
2000
,
nda
<
4000
)]
=
1
nda
[
np
.
logical_and
(
nda
>=
4000
,
nda
<
6000
)]
=
2
nda
[
np
.
logical_and
(
nda
>=
6000
,
nda
<
8000
)]
=
3
nda
[
np
.
logical_and
(
nda
>=
8000
,
nda
<
15000
)]
=
4
nda
=
tf
.
one_hot
(
nda
,
5
).
numpy
()
return
nda
def
get_label_data
(
self
,
nd_idxs
,
is_training
):
# Note: labels will be same for nd_idxs across both L1B and L2
if
is_training
:
if
self
.
h5f_l1b_trn
is
not
None
:
h5f
=
self
.
h5f_l1b_trn
else
:
h5f
=
self
.
h5f_l2_trn
else
:
if
self
.
h5f_l1b_tst
is
not
None
:
h5f
=
self
.
h5f_l1b_tst
else
:
h5f
=
self
.
h5f_l2_tst
label
=
h5f
[
'
icing_intensity
'
][
nd_idxs
]
label
=
label
.
astype
(
np
.
int32
)
return
label
def
get_in_mem_data_batch_train
(
self
,
idxs
):
return
self
.
get_in_mem_data_batch
(
idxs
,
True
)
def
get_in_mem_data_batch_test
(
self
,
idxs
):
return
self
.
get_in_mem_data_batch
(
idxs
,
False
)
def
get_in_mem_data_batch_eval
(
self
,
idxs
):
# sort these to use as numpy indexing arrays
nd_idxs
=
np
.
array
(
idxs
)
nd_idxs
=
np
.
sort
(
nd_idxs
)
data
=
[]
for
param
in
self
.
train_params
:
nda
=
self
.
data_dct
[
param
][
nd_idxs
,
]
nda
=
normalize
(
nda
,
param
,
mean_std_dct
)
data
.
append
(
nda
)
data
=
np
.
stack
(
data
)
data
=
data
.
astype
(
np
.
float32
)
data
=
np
.
transpose
(
data
,
axes
=
(
1
,
2
,
3
,
0
))
# TODO: altitude data will be specified by user at run-time
nda
=
np
.
zeros
([
nd_idxs
.
size
])
nda
[:]
=
self
.
flight_level
nda
=
tf
.
one_hot
(
nda
,
5
).
numpy
()
return
data
,
nda
@tf.function
(
input_signature
=
[
tf
.
TensorSpec
(
None
,
tf
.
int32
)])
def
data_function
(
self
,
indexes
):
out
=
tf
.
numpy_function
(
self
.
get_in_mem_data_batch_train
,
[
indexes
],
[
tf
.
float32
,
tf
.
float32
,
tf
.
int32
])
return
out
@tf.function
(
input_signature
=
[
tf
.
TensorSpec
(
None
,
tf
.
int32
)])
def
data_function_test
(
self
,
indexes
):
out
=
tf
.
numpy_function
(
self
.
get_in_mem_data_batch_test
,
[
indexes
],
[
tf
.
float32
,
tf
.
float32
,
tf
.
int32
])
return
out
@tf.function
(
input_signature
=
[
tf
.
TensorSpec
(
None
,
tf
.
int32
)])
def
data_function_evaluate
(
self
,
indexes
):
# TODO: modify for user specified altitude
out
=
tf
.
numpy_function
(
self
.
get_in_mem_data_batch_eval
,
[
indexes
],
[
tf
.
float32
,
tf
.
float32
])
return
out
def
get_train_dataset
(
self
,
indexes
):
indexes
=
list
(
indexes
)
dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
indexes
)
dataset
=
dataset
.
batch
(
PROC_BATCH_SIZE
)
dataset
=
dataset
.
map
(
self
.
data_function
,
num_parallel_calls
=
8
)
dataset
=
dataset
.
cache
()
# dataset = dataset.shuffle(PROC_BATCH_BUFFER_SIZE)
dataset
=
dataset
.
prefetch
(
buffer_size
=
1
)
self
.
train_dataset
=
dataset
def
get_test_dataset
(
self
,
indexes
):
indexes
=
list
(
indexes
)
dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
indexes
)
dataset
=
dataset
.
batch
(
PROC_BATCH_SIZE
)
dataset
=
dataset
.
map
(
self
.
data_function_test
,
num_parallel_calls
=
8
)
dataset
=
dataset
.
cache
()
self
.
test_dataset
=
dataset
def
get_evaluate_dataset
(
self
,
indexes
):
indexes
=
list
(
indexes
)
dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
indexes
)
dataset
=
dataset
.
batch
(
PROC_BATCH_SIZE
)
dataset
=
dataset
.
map
(
self
.
data_function_evaluate
,
num_parallel_calls
=
8
)
dataset
=
dataset
.
cache
()
self
.
eval_dataset
=
dataset
def
setup_pipeline
(
self
,
filename_l1b_trn
,
filename_l1b_tst
,
filename_l2_trn
,
filename_l2_tst
,
trn_idxs
=
None
,
tst_idxs
=
None
,
seed
=
None
):
if
filename_l1b_trn
is
not
None
:
self
.
h5f_l1b_trn
=
h5py
.
File
(
filename_l1b_trn
,
'
r
'
)
if
filename_l1b_tst
is
not
None
:
self
.
h5f_l1b_tst
=
h5py
.
File
(
filename_l1b_tst
,
'
r
'
)
if
filename_l2_trn
is
not
None
:
self
.
h5f_l2_trn
=
h5py
.
File
(
filename_l2_trn
,
'
r
'
)
if
filename_l2_tst
is
not
None
:
self
.
h5f_l2_tst
=
h5py
.
File
(
filename_l2_tst
,
'
r
'
)
if
trn_idxs
is
None
:
# Note: time is same across both L1B and L2 for idxs
if
self
.
h5f_l1b_trn
is
not
None
:
h5f
=
self
.
h5f_l1b_trn
else
:
h5f
=
self
.
h5f_l2_trn
time
=
h5f
[
'
time
'
]
trn_idxs
=
np
.
arange
(
time
.
shape
[
0
])
if
seed
is
not
None
:
np
.
random
.
seed
(
seed
)
np
.
random
.
shuffle
(
trn_idxs
)
if
self
.
h5f_l1b_tst
is
not
None
:
h5f
=
self
.
h5f_l1b_tst
else
:
h5f
=
self
.
h5f_l2_tst
time
=
h5f
[
'
time
'
]
tst_idxs
=
np
.
arange
(
time
.
shape
[
0
])
if
seed
is
not
None
:
np
.
random
.
seed
(
seed
)
np
.
random
.
shuffle
(
tst_idxs
)
self
.
num_data_samples
=
trn_idxs
.
shape
[
0
]
self
.
get_train_dataset
(
trn_idxs
)
self
.
get_test_dataset
(
tst_idxs
)
print
(
'
datetime:
'
,
now
)
print
(
'
training and test data:
'
)
print
(
filename_l1b_trn
)
print
(
filename_l1b_tst
)
print
(
filename_l2_trn
)
print
(
filename_l2_tst
)
print
(
'
---------------------------
'
)
print
(
'
num train samples:
'
,
self
.
num_data_samples
)
print
(
'
BATCH SIZE:
'
,
BATCH_SIZE
)
print
(
'
num test samples:
'
,
tst_idxs
.
shape
[
0
])
print
(
'
setup_pipeline: Done
'
)
def
setup_test_pipeline
(
self
,
filename_l1b
,
filename_l2
,
seed
=
None
,
shuffle
=
False
):
if
filename_l1b
is
not
None
:
self
.
h5f_l1b_tst
=
h5py
.
File
(
filename_l1b
,
'
r
'
)
if
filename_l2
is
not
None
:
self
.
h5f_l2_tst
=
h5py
.
File
(
filename_l2
,
'
r
'
)
if
self
.
h5f_l1b_tst
is
not
None
:
h5f
=
self
.
h5f_l1b_tst
else
:
h5f
=
self
.
h5f_l2_tst
time
=
h5f
[
'
time
'
]
tst_idxs
=
np
.
arange
(
time
.
shape
[
0
])
self
.
num_data_samples
=
len
(
tst_idxs
)
if
seed
is
not
None
:
np
.
random
.
seed
(
seed
)
if
shuffle
:
np
.
random
.
shuffle
(
tst_idxs
)
self
.
get_test_dataset
(
tst_idxs
)
print
(
'
num test samples:
'
,
tst_idxs
.
shape
[
0
])
print
(
'
setup_test_pipeline: Done
'
)
def
setup_eval_pipeline
(
self
,
data_dct
,
num_tiles
):
self
.
data_dct
=
data_dct
idxs
=
np
.
arange
(
num_tiles
)
self
.
num_data_samples
=
idxs
.
shape
[
0
]
self
.
get_evaluate_dataset
(
idxs
)
def
build_1d_cnn
(
self
):
print
(
'
build_1d_cnn
'
)
# padding = 'VALID'
padding
=
'
SAME
'
# activation = tf.nn.relu
# activation = tf.nn.elu
activation
=
tf
.
nn
.
leaky_relu
num_filters
=
6
conv
=
tf
.
keras
.
layers
.
Conv1D
(
num_filters
,
5
,
strides
=
1
,
padding
=
padding
)(
self
.
inputs
[
1
])
conv
=
tf
.
keras
.
layers
.
MaxPool1D
(
padding
=
padding
)(
conv
)
print
(
conv
)
num_filters
*=
2
conv
=
tf
.
keras
.
layers
.
Conv1D
(
num_filters
,
3
,
strides
=
1
,
padding
=
padding
)(
conv
)
conv
=
tf
.
keras
.
layers
.
MaxPool1D
(
padding
=
padding
)(
conv
)
print
(
conv
)
num_filters
*=
2
conv
=
tf
.
keras
.
layers
.
Conv1D
(
num_filters
,
3
,
strides
=
1
,
padding
=
padding
)(
conv
)
conv
=
tf
.
keras
.
layers
.
MaxPool1D
(
padding
=
padding
)(
conv
)
print
(
conv
)
num_filters
*=
2
conv
=
tf
.
keras
.
layers
.
Conv1D
(
num_filters
,
3
,
strides
=
1
,
padding
=
padding
)(
conv
)
conv
=
tf
.
keras
.
layers
.
MaxPool1D
(
padding
=
padding
)(
conv
)
print
(
conv
)
flat
=
tf
.
keras
.
layers
.
Flatten
()(
conv
)
print
(
flat
)
return
flat
def
build_cnn
(
self
):
print
(
'
build_cnn
'
)
# padding = "VALID"
padding
=
"
SAME
"
# activation = tf.nn.relu
# activation = tf.nn.elu
activation
=
tf
.
nn
.
leaky_relu
momentum
=
0.99
num_filters
=
len
(
self
.
train_params
)
*
4
if
NOISE_TRAINING
:
input_2d
=
tf
.
keras
.
layers
.
GaussianNoise
(
stddev
=
NOISE_STDDEV
)(
self
.
inputs
[
0
])
else
:
input_2d
=
self
.
inputs
[
0
]
conv
=
tf
.
keras
.
layers
.
Conv2D
(
num_filters
,
5
,
strides
=
[
1
,
1
],
padding
=
padding
,
activation
=
activation
)(
input_2d
)
conv
=
tf
.
keras
.
layers
.
MaxPool2D
(
padding
=
padding
)(
conv
)
conv
=
tf
.
keras
.
layers
.
BatchNormalization
()(
conv
)
print
(
conv
.
shape
)
num_filters
*=
2
conv
=
tf
.
keras
.
layers
.
Conv2D
(
num_filters
,
3
,
strides
=
[
1
,
1
],
padding
=
padding
,
activation
=
activation
)(
conv
)
conv
=
tf
.
keras
.
layers
.
MaxPool2D
(
padding
=
padding
)(
conv
)
conv
=
tf
.
keras
.
layers
.
BatchNormalization
()(
conv
)
print
(
conv
.
shape
)
num_filters
*=
2
conv
=
tf
.
keras
.
layers
.
Conv2D
(
num_filters
,
3
,
strides
=
[
1
,
1
],
padding
=
padding
,
activation
=
activation
)(
conv
)
conv
=
tf
.
keras
.
layers
.
MaxPool2D
(
padding
=
padding
)(
conv
)
conv
=
tf
.
keras
.
layers
.
BatchNormalization
()(
conv
)
print
(
conv
.
shape
)
num_filters
*=
2
conv
=
tf
.
keras
.
layers
.
Conv2D
(
num_filters
,
3
,
strides
=
[
1
,
1
],
padding
=
padding
,
activation
=
activation
)(
conv
)
conv
=
tf
.
keras
.
layers
.
MaxPool2D
(
padding
=
padding
)(
conv
)
conv
=
tf
.
keras
.
layers
.
BatchNormalization
()(
conv
)
print
(
conv
.
shape
)
# num_filters *= 2
# conv = tf.keras.layers.Conv2D(num_filters, 3, strides=[1, 1], padding=padding, activation=activation)(conv)
# conv = tf.keras.layers.MaxPool2D(padding=padding)(conv)
# conv = tf.keras.layers.BatchNormalization()(conv)
# print(conv.shape)
flat
=
tf
.
keras
.
layers
.
Flatten
()(
conv
)
return
flat
def
build_dnn
(
self
,
input_layer
=
None
):
print
(
'
build fully connected layer
'
)
drop_rate
=
0.5
# activation = tf.nn.relu
# activation = tf.nn.elu
activation
=
tf
.
nn
.
leaky_relu
momentum
=
0.99
if
input_layer
is
not
None
:
flat
=
input_layer
n_hidden
=
input_layer
.
shape
[
1
]
else
:
flat
=
self
.
X_img
n_hidden
=
self
.
X_img
.
shape
[
1
]
fac
=
2
fc
=
build_residual_block
(
flat
,
drop_rate
,
fac
*
n_hidden
,
activation
,
'
Residual_Block_1
'
,
doDropout
=
True
,
doBatchNorm
=
True
)
fc
=
build_residual_block
(
fc
,
drop_rate
,
fac
*
n_hidden
,
activation
,
'
Residual_Block_2
'
,
doDropout
=
True
,
doBatchNorm
=
True
)
fc
=
build_residual_block
(
fc
,
drop_rate
,
fac
*
n_hidden
,
activation
,
'
Residual_Block_3
'
,
doDropout
=
True
,
doBatchNorm
=
True
)
fc
=
build_residual_block
(
fc
,
drop_rate
,
fac
*
n_hidden
,
activation
,
'
Residual_Block_4
'
,
doDropout
=
True
,
doBatchNorm
=
True
)
fc
=
build_residual_block
(
fc
,
drop_rate
,
fac
*
n_hidden
,
activation
,
'
Residual_Block_5
'
,
doDropout
=
True
,
doBatchNorm
=
True
)
# fc = build_residual_block(fc, drop_rate, fac*n_hidden, activation, 'Residual_Block_6', doDropout=True, doBatchNorm=True)
#
# fc = build_residual_block(fc, drop_rate, fac*n_hidden, activation, 'Residual_Block_7', doDropout=True, doBatchNorm=True)
#
# fc = build_residual_block(fc, drop_rate, fac*n_hidden, activation, 'Residual_Block_8', doDropout=True, doBatchNorm=True)
fc
=
tf
.
keras
.
layers
.
Dense
(
n_hidden
,
activation
=
activation
)(
fc
)
fc
=
tf
.
keras
.
layers
.
BatchNormalization
()(
fc
)
if
NumClasses
==
2
:
activation
=
tf
.
nn
.
sigmoid
# For binary
else
:
activation
=
tf
.
nn
.
softmax
# For multi-class
# Called logits, but these are actually probabilities, see activation
logits
=
tf
.
keras
.
layers
.
Dense
(
NumLogits
,
activation
=
activation
)(
fc
)
print
(
logits
.
shape
)
self
.
logits
=
logits
def
build_training
(
self
):
if
NumClasses
==
2
:
self
.
loss
=
tf
.
keras
.
losses
.
BinaryCrossentropy
(
from_logits
=
False
)
# for two-class only
else
:
self
.
loss
=
tf
.
keras
.
losses
.
SparseCategoricalCrossentropy
(
from_logits
=
False
)
# For multi-class
# decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
initial_learning_rate
=
0.006
decay_rate
=
0.95
steps_per_epoch
=
int
(
self
.
num_data_samples
/
BATCH_SIZE
)
# one epoch
decay_steps
=
int
(
steps_per_epoch
/
2
)
print
(
'
initial rate, decay rate, steps/epoch, decay steps:
'
,
initial_learning_rate
,
decay_rate
,
steps_per_epoch
,
decay_steps
)
self
.
learningRateSchedule
=
tf
.
keras
.
optimizers
.
schedules
.
ExponentialDecay
(
initial_learning_rate
,
decay_steps
,
decay_rate
)
optimizer
=
tf
.
keras
.
optimizers
.
Adam
(
learning_rate
=
self
.
learningRateSchedule
)
if
TRACK_MOVING_AVERAGE
:
# Not really sure this works properly (from tfa)
# optimizer = tfa.optimizers.MovingAverage(optimizer)
self
.
ema
=
tf
.
train
.
ExponentialMovingAverage
(
decay
=
0.9999
)
self
.
optimizer
=
optimizer
self
.
initial_learning_rate
=
initial_learning_rate
def
build_evaluation
(
self
):
self
.
train_loss
=
tf
.
keras
.
metrics
.
Mean
(
name
=
'
train_loss
'
)
self
.
test_loss
=
tf
.
keras
.
metrics
.
Mean
(
name
=
'
test_loss
'
)
if
NumClasses
==
2
:
self
.
train_accuracy
=
tf
.
keras
.
metrics
.
BinaryAccuracy
(
name
=
'
train_accuracy
'
)
self
.
test_accuracy
=
tf
.
keras
.
metrics
.
BinaryAccuracy
(
name
=
'
test_accuracy
'
)
self
.
test_auc
=
tf
.
keras
.
metrics
.
AUC
(
name
=
'
test_auc
'
)
self
.
test_recall
=
tf
.
keras
.
metrics
.
Recall
(
name
=
'
test_recall
'
)
self
.
test_precision
=
tf
.
keras
.
metrics
.
Precision
(
name
=
'
test_precision
'
)
self
.
test_true_neg
=
tf
.
keras
.
metrics
.
TrueNegatives
(
name
=
'
test_true_neg
'
)
self
.
test_true_pos
=
tf
.
keras
.
metrics
.
TruePositives
(
name
=
'
test_true_pos
'
)
self
.
test_false_neg
=
tf
.
keras
.
metrics
.
FalseNegatives
(
name
=
'
test_false_neg
'
)
self
.
test_false_pos
=
tf
.
keras
.
metrics
.
FalsePositives
(
name
=
'
test_false_pos
'
)
else
:
self
.
train_accuracy
=
tf
.
keras
.
metrics
.
SparseCategoricalAccuracy
(
name
=
'
train_accuracy
'
)
self
.
test_accuracy
=
tf
.
keras
.
metrics
.
SparseCategoricalAccuracy
(
name
=
'
test_accuracy
'
)
@tf.function
def
train_step
(
self
,
mini_batch
):
inputs
=
[
mini_batch
[
0
],
mini_batch
[
1
]]
labels
=
mini_batch
[
2
]
with
tf
.
GradientTape
()
as
tape
:
pred
=
self
.
model
(
inputs
,
training
=
True
)
loss
=
self
.
loss
(
labels
,
pred
)
total_loss
=
loss
if
len
(
self
.
model
.
losses
)
>
0
:
reg_loss
=
tf
.
math
.
add_n
(
self
.
model
.
losses
)
total_loss
=
loss
+
reg_loss
gradients
=
tape
.
gradient
(
total_loss
,
self
.
model
.
trainable_variables
)
self
.
optimizer
.
apply_gradients
(
zip
(
gradients
,
self
.
model
.
trainable_variables
))
if
TRACK_MOVING_AVERAGE
:
self
.
ema
.
apply
(
self
.
model
.
trainable_variables
)
self
.
train_loss
(
loss
)
self
.
train_accuracy
(
labels
,
pred
)
return
loss
@tf.function
def
test_step
(
self
,
mini_batch
):
inputs
=
[
mini_batch
[
0
],
mini_batch
[
1
]]
labels
=
mini_batch
[
2
]
pred
=
self
.
model
(
inputs
,
training
=
False
)
t_loss
=
self
.
loss
(
labels
,
pred
)
self
.
test_loss
(
t_loss
)
self
.
test_accuracy
(
labels
,
pred
)
if
NumClasses
==
2
:
self
.
test_auc
(
labels
,
pred
)
self
.
test_recall
(
labels
,
pred
)
self
.
test_precision
(
labels
,
pred
)
self
.
test_true_neg
(
labels
,
pred
)
self
.
test_true_pos
(
labels
,
pred
)
self
.
test_false_neg
(
labels
,
pred
)
self
.
test_false_pos
(
labels
,
pred
)
def
predict
(
self
,
mini_batch
):
inputs
=
[
mini_batch
[
0
],
mini_batch
[
1
]]
labels
=
mini_batch
[
2
]
pred
=
self
.
model
(
inputs
,
training
=
False
)
t_loss
=
self
.
loss
(
labels
,
pred
)
self
.
test_labels
.
append
(
labels
)
self
.
test_preds
.
append
(
pred
.
numpy
())
self
.
test_loss
(
t_loss
)
self
.
test_accuracy
(
labels
,
pred
)
if
NumClasses
==
2
:
self
.
test_auc
(
labels
,
pred
)
self
.
test_recall
(
labels
,
pred
)
self
.
test_precision
(
labels
,
pred
)
self
.
test_true_neg
(
labels
,
pred
)
self
.
test_true_pos
(
labels
,
pred
)
self
.
test_false_neg
(
labels
,
pred
)
self
.
test_false_pos
(
labels
,
pred
)
def
reset_test_metrics
(
self
):
self
.
test_loss
.
reset_states
()
self
.
test_accuracy
.
reset_states
()
if
NumClasses
==
2
:
self
.
test_auc
.
reset_states
()
self
.
test_recall
.
reset_states
()
self
.
test_precision
.
reset_states
()
self
.
test_true_neg
.
reset_states
()
self
.
test_true_pos
.
reset_states
()
self
.
test_false_neg
.
reset_states
()
self
.
test_false_pos
.
reset_states
()
def
get_metrics
(
self
):
recall
=
self
.
test_recall
.
result
()
precsn
=
self
.
test_precision
.
result
()
f1
=
2
*
(
precsn
*
recall
)
/
(
precsn
+
recall
)
tn
=
self
.
test_true_neg
.
result
()
tp
=
self
.
test_true_pos
.
result
()
fn
=
self
.
test_false_neg
.
result
()
fp
=
self
.
test_false_pos
.
result
()
mcc
=
((
tp
*
tn
)
-
(
fp
*
fn
))
/
np
.
sqrt
((
tp
+
fp
)
*
(
tp
+
fn
)
*
(
tn
+
fp
)
*
(
tn
+
fn
))
return
f1
,
mcc
def
do_training
(
self
,
ckpt_dir
=
None
):
if
ckpt_dir
is
None
:
if
not
os
.
path
.
exists
(
modeldir
):
os
.
mkdir
(
modeldir
)
ckpt
=
tf
.
train
.
Checkpoint
(
step
=
tf
.
Variable
(
1
),
model
=
self
.
model
)
ckpt_manager
=
tf
.
train
.
CheckpointManager
(
ckpt
,
modeldir
,
max_to_keep
=
3
)
else
:
ckpt
=
tf
.
train
.
Checkpoint
(
step
=
tf
.
Variable
(
1
),
model
=
self
.
model
)
ckpt_manager
=
tf
.
train
.
CheckpointManager
(
ckpt
,
ckpt_dir
,
max_to_keep
=
3
)
self
.
writer_train
=
tf
.
summary
.
create_file_writer
(
os
.
path
.
join
(
logdir
,
'
plot_train
'
))
self
.
writer_valid
=
tf
.
summary
.
create_file_writer
(
os
.
path
.
join
(
logdir
,
'
plot_valid
'
))
step
=
0
total_time
=
0
best_test_loss
=
np
.
finfo
(
dtype
=
np
.
float
).
max
best_test_acc
=
0
best_test_recall
=
0
best_test_precision
=
0
best_test_auc
=
0
best_test_f1
=
0
best_test_mcc
=
0
if
EARLY_STOP
:
es
=
EarlyStop
()
for
epoch
in
range
(
NUM_EPOCHS
):
self
.
train_loss
.
reset_states
()
self
.
train_accuracy
.
reset_states
()
t0
=
datetime
.
datetime
.
now
().
timestamp
()
proc_batch_cnt
=
0
n_samples
=
0
for
data0
,
data1
,
label
in
self
.
train_dataset
:
trn_ds
=
tf
.
data
.
Dataset
.
from_tensor_slices
((
data0
,
data1
,
label
))
trn_ds
=
trn_ds
.
batch
(
BATCH_SIZE
)
for
mini_batch
in
trn_ds
:
if
self
.
learningRateSchedule
is
not
None
:
loss
=
self
.
train_step
(
mini_batch
)
if
(
step
%
100
)
==
0
:
with
self
.
writer_train
.
as_default
():
tf
.
summary
.
scalar
(
'
loss_trn
'
,
loss
.
numpy
(),
step
=
step
)
tf
.
summary
.
scalar
(
'
learning_rate
'
,
self
.
optimizer
.
_decayed_lr
(
'
float32
'
).
numpy
(),
step
=
step
)
tf
.
summary
.
scalar
(
'
num_train_steps
'
,
step
,
step
=
step
)
tf
.
summary
.
scalar
(
'
num_epochs
'
,
epoch
,
step
=
step
)
self
.
reset_test_metrics
()
for
data0_tst
,
data1_tst
,
label_tst
in
self
.
test_dataset
:
tst_ds
=
tf
.
data
.
Dataset
.
from_tensor_slices
((
data0_tst
,
data1_tst
,
label_tst
))
tst_ds
=
tst_ds
.
batch
(
BATCH_SIZE
)
for
mini_batch_test
in
tst_ds
:
self
.
test_step
(
mini_batch_test
)
if
NumClasses
==
2
:
f1
,
mcc
=
self
.
get_metrics
()
with
self
.
writer_valid
.
as_default
():
tf
.
summary
.
scalar
(
'
loss_val
'
,
self
.
test_loss
.
result
(),
step
=
step
)
tf
.
summary
.
scalar
(
'
acc_val
'
,
self
.
test_accuracy
.
result
(),
step
=
step
)
if
NumClasses
==
2
:
tf
.
summary
.
scalar
(
'
auc_val
'
,
self
.
test_auc
.
result
(),
step
=
step
)
tf
.
summary
.
scalar
(
'
recall_val
'
,
self
.
test_recall
.
result
(),
step
=
step
)
tf
.
summary
.
scalar
(
'
prec_val
'
,
self
.
test_precision
.
result
(),
step
=
step
)
tf
.
summary
.
scalar
(
'
f1_val
'
,
f1
,
step
=
step
)
tf
.
summary
.
scalar
(
'
mcc_val
'
,
mcc
,
step
=
step
)
tf
.
summary
.
scalar
(
'
num_train_steps
'
,
step
,
step
=
step
)
tf
.
summary
.
scalar
(
'
num_epochs
'
,
epoch
,
step
=
step
)
print
(
'
****** test loss, acc, lr:
'
,
self
.
test_loss
.
result
().
numpy
(),
self
.
test_accuracy
.
result
().
numpy
(),
self
.
optimizer
.
_decayed_lr
(
'
float32
'
).
numpy
())
step
+=
1
print
(
'
train loss:
'
,
loss
.
numpy
())
proc_batch_cnt
+=
1
n_samples
+=
data0
.
shape
[
0
]
print
(
'
proc_batch_cnt:
'
,
proc_batch_cnt
,
n_samples
)
t1
=
datetime
.
datetime
.
now
().
timestamp
()
print
(
'
End of Epoch:
'
,
epoch
+
1
,
'
elapsed time:
'
,
(
t1
-
t0
))
total_time
+=
(
t1
-
t0
)
self
.
reset_test_metrics
()
for
data0
,
data1
,
label
in
self
.
test_dataset
:
ds
=
tf
.
data
.
Dataset
.
from_tensor_slices
((
data0
,
data1
,
label
))
ds
=
ds
.
batch
(
BATCH_SIZE
)
for
mini_batch
in
ds
:
self
.
test_step
(
mini_batch
)
if
NumClasses
==
2
:
f1
,
mcc
=
self
.
get_metrics
()
print
(
'
loss, acc, recall, precision, auc, f1, mcc:
'
,
self
.
test_loss
.
result
().
numpy
(),
self
.
test_accuracy
.
result
().
numpy
(),
self
.
test_recall
.
result
().
numpy
(),
self
.
test_precision
.
result
().
numpy
(),
self
.
test_auc
.
result
().
numpy
(),
f1
.
numpy
(),
mcc
.
numpy
())
else
:
print
(
'
loss, acc:
'
,
self
.
test_loss
.
result
().
numpy
(),
self
.
test_accuracy
.
result
().
numpy
())
print
(
'
------------------------------------------------------
'
)
tst_loss
=
self
.
test_loss
.
result
().
numpy
()
if
tst_loss
<
best_test_loss
:
best_test_loss
=
tst_loss
if
NumClasses
==
2
:
best_test_acc
=
self
.
test_accuracy
.
result
().
numpy
()
best_test_recall
=
self
.
test_recall
.
result
().
numpy
()
best_test_precision
=
self
.
test_precision
.
result
().
numpy
()
best_test_auc
=
self
.
test_auc
.
result
().
numpy
()
best_test_f1
=
f1
.
numpy
()
best_test_mcc
=
mcc
.
numpy
()
ckpt_manager
.
save
()
if
self
.
DISK_CACHE
and
epoch
==
0
:
f
=
open
(
cachepath
,
'
wb
'
)
pickle
.
dump
(
self
.
in_mem_data_cache
,
f
)
f
.
close
()
if
EARLY_STOP
and
es
.
check_stop
(
tst_loss
):
break
print
(
'
total time:
'
,
total_time
)
self
.
writer_train
.
close
()
self
.
writer_valid
.
close
()
if
self
.
h5f_l1b_trn
is
not
None
:
self
.
h5f_l1b_trn
.
close
()
if
self
.
h5f_l1b_tst
is
not
None
:
self
.
h5f_l1b_tst
.
close
()
if
self
.
h5f_l2_trn
is
not
None
:
self
.
h5f_l2_trn
.
close
()
if
self
.
h5f_l2_tst
is
not
None
:
self
.
h5f_l2_tst
.
close
()
f
=
open
(
'
/home/rink/best_stats_
'
+
now
+
'
.pkl
'
,
'
wb
'
)
pickle
.
dump
((
best_test_loss
,
best_test_acc
,
best_test_recall
,
best_test_precision
,
best_test_auc
,
best_test_f1
,
best_test_mcc
),
f
)
f
.
close
()
def
build_model
(
self
):
flat
=
self
.
build_cnn
()
# flat_1d = self.build_1d_cnn()
# flat = tf.keras.layers.concatenate([flat, flat_1d, flat_anc])
# flat = tf.keras.layers.concatenate([flat, flat_1d])
# self.build_dnn(flat)
if
self
.
USE_FLIGHT_ALTITUDE
:
flat
=
tf
.
keras
.
layers
.
concatenate
([
flat
,
self
.
inputs
[
1
]])
self
.
build_dnn
(
flat
)
self
.
model
=
tf
.
keras
.
Model
(
self
.
inputs
,
self
.
logits
)
def
restore
(
self
,
ckpt_dir
):
ckpt
=
tf
.
train
.
Checkpoint
(
step
=
tf
.
Variable
(
1
),
model
=
self
.
model
)
ckpt_manager
=
tf
.
train
.
CheckpointManager
(
ckpt
,
ckpt_dir
,
max_to_keep
=
3
)
ckpt
.
restore
(
ckpt_manager
.
latest_checkpoint
)
self
.
reset_test_metrics
()
for
data0
,
data1
,
label
in
self
.
test_dataset
:
ds
=
tf
.
data
.
Dataset
.
from_tensor_slices
((
data0
,
data1
,
label
))
ds
=
ds
.
batch
(
BATCH_SIZE
)
for
mini_batch_test
in
ds
:
self
.
predict
(
mini_batch_test
)
f1
,
mcc
=
self
.
get_metrics
()
print
(
'
loss, acc:
'
,
self
.
test_loss
.
result
().
numpy
(),
self
.
test_accuracy
.
result
().
numpy
(),
self
.
test_recall
.
result
().
numpy
(),
self
.
test_precision
.
result
().
numpy
(),
self
.
test_auc
.
result
().
numpy
(),
f1
.
numpy
(),
mcc
.
numpy
())
labels
=
np
.
concatenate
(
self
.
test_labels
)
self
.
test_labels
=
labels
preds
=
np
.
concatenate
(
self
.
test_preds
)
self
.
test_probs
=
preds
if
NumClasses
==
2
:
preds
=
np
.
where
(
preds
>
0.5
,
1
,
0
)
else
:
preds
=
np
.
argmax
(
preds
,
axis
=
1
)
self
.
test_preds
=
preds
def
do_evaluate
(
self
,
ckpt_dir
=
None
,
prob_thresh
=
0.5
):
# if ckpt_dir is not None: # if is None, this has been done already
# ckpt = tf.train.Checkpoint(step=tf.Variable(1), model=self.model)
# ckpt_manager = tf.train.CheckpointManager(ckpt, ckpt_dir, max_to_keep=3)
# ckpt.restore(ckpt_manager.latest_checkpoint)
self
.
reset_test_metrics
()
pred_s
=
[]
for
data
in
self
.
eval_dataset
:
ds
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
data
)
ds
=
ds
.
batch
(
BATCH_SIZE
)
for
mini_batch
in
ds
:
pred
=
self
.
model
([
mini_batch
],
training
=
False
)
pred_s
.
append
(
pred
)
preds
=
np
.
concatenate
(
pred_s
)
preds
=
preds
[:,
0
]
self
.
test_probs
=
preds
if
NumClasses
==
2
:
preds
=
np
.
where
(
preds
>
prob_thresh
,
1
,
0
)
else
:
preds
=
np
.
argmax
(
preds
,
axis
=
1
)
self
.
test_preds
=
preds
def
run
(
self
,
filename_l1b_trn
,
filename_l1b_tst
,
filename_l2_trn
,
filename_l2_tst
):
# This doesn't really play well with SLURM
# with tf.device('/device:GPU:'+str(self.gpu_device)):
self
.
setup_pipeline
(
filename_l1b_trn
,
filename_l1b_tst
,
filename_l2_trn
,
filename_l2_tst
)
self
.
build_model
()
self
.
build_training
()
self
.
build_evaluation
()
self
.
do_training
()
def
run_restore
(
self
,
filename_l1b
,
filename_l2
,
ckpt_dir
):
self
.
setup_test_pipeline
(
filename_l1b
,
filename_l2
)
self
.
build_model
()
self
.
build_training
()
self
.
build_evaluation
()
self
.
restore
(
ckpt_dir
)
if
self
.
h5f_l1b_tst
is
not
None
:
self
.
h5f_l1b_tst
.
close
()
if
self
.
h5f_l2_tst
is
not
None
:
self
.
h5f_l2_tst
.
close
()
def
run_evaluate
(
self
,
filename
,
ckpt_dir
):
data_dct
,
ll
,
cc
=
make_for_full_domain_predict
(
filename
,
name_list
=
self
.
train_params
)
self
.
setup_eval_pipeline
(
data_dct
,
len
(
ll
))
self
.
build_model
()
self
.
build_training
()
self
.
build_evaluation
()
self
.
do_evaluate
(
ckpt_dir
)
def
run_restore_static
(
filename_l1b
,
filename_l2
,
ckpt_dir_s_path
,
day_night
=
'
DAY
'
,
use_flight_altitude
=
False
):
ckpt_dir_s
=
os
.
listdir
(
ckpt_dir_s_path
)
cm_s
=
[]
prob_s
=
[]
labels
=
None
for
ckpt
in
ckpt_dir_s
:
ckpt_dir
=
ckpt_dir_s_path
+
ckpt
if
not
os
.
path
.
isdir
(
ckpt_dir
):
continue
nn
=
IcingIntensityNN
(
day_night
=
day_night
,
use_flight_altitude
=
use_flight_altitude
)
nn
.
run_restore
(
filename_l1b
,
filename_l2
,
ckpt_dir
)
cm_s
.
append
(
tf
.
math
.
confusion_matrix
(
nn
.
test_labels
.
flatten
(),
nn
.
test_preds
.
flatten
()))
prob_s
.
append
(
nn
.
test_probs
.
flatten
())
if
labels
is
None
:
# These should be the same
labels
=
nn
.
test_labels
.
flatten
()
num
=
len
(
cm_s
)
cm_avg
=
cm_s
[
0
]
prob_avg
=
prob_s
[
0
]
for
k
in
range
(
num
-
1
):
cm_avg
+=
cm_s
[
k
+
1
]
prob_avg
+=
prob_s
[
k
+
1
]
cm_avg
/=
num
prob_avg
/=
num
return
labels
,
prob_avg
,
cm_avg
def
run_evaluate_static_avg
(
data_dct
,
ll
,
cc
,
ckpt_dir_s_path
,
day_night
=
'
DAY
'
,
flight_level
=
4
,
use_flight_altitude
=
False
,
prob_thresh
=
0.5
,
satellite
=
'
GOES16
'
,
domain
=
'
FD
'
):
num_elems
=
len
(
cc
)
num_lines
=
len
(
ll
)
cc
=
np
.
array
(
cc
)
ll
=
np
.
array
(
ll
)
ckpt_dir_s
=
os
.
listdir
(
ckpt_dir_s_path
)
nav
=
get_navigation
(
satellite
,
domain
)
prob_s
=
[]
for
ckpt
in
ckpt_dir_s
:
ckpt_dir
=
ckpt_dir_s_path
+
ckpt
if
not
os
.
path
.
isdir
(
ckpt_dir
):
continue
nn
=
IcingIntensityNN
(
day_night
=
day_night
,
use_flight_altitude
=
use_flight_altitude
)
nn
.
flight_level
=
flight_level
nn
.
setup_eval_pipeline
(
data_dct
,
num_lines
*
num_elems
)
nn
.
build_model
()
nn
.
build_training
()
nn
.
build_evaluation
()
nn
.
do_evaluate
(
ckpt_dir
)
prob_s
.
append
(
nn
.
test_probs
)
num
=
len
(
prob_s
)
prob_avg
=
prob_s
[
0
]
for
k
in
range
(
num
-
1
):
prob_avg
+=
prob_s
[
k
+
1
]
prob_avg
/=
num
probs
=
prob_avg
if
NumClasses
==
2
:
preds
=
np
.
where
(
probs
>
prob_thresh
,
1
,
0
)
else
:
preds
=
np
.
argmax
(
probs
,
axis
=
1
)
preds_2d
=
preds
.
reshape
((
num_lines
,
num_elems
))
ll
,
cc
=
np
.
meshgrid
(
ll
,
cc
,
indexing
=
'
ij
'
)
cc
=
cc
.
flatten
()
ll
=
ll
.
flatten
()
ice_mask
=
preds
==
1
ice_cc
=
cc
[
ice_mask
]
ice_ll
=
ll
[
ice_mask
]
ice_lons
,
ice_lats
=
nav
.
lc_to_earth
(
ice_cc
,
ice_ll
)
return
ice_lons
,
ice_lats
,
preds_2d
def
run_evaluate_static
(
data_dct
,
num_tiles
,
ckpt_dir_s_path
,
day_night
=
'
DAY
'
,
l1b_or_l2
=
'
both
'
,
prob_thresh
=
0.5
,
flight_levels
=
[
0
,
1
,
2
,
3
,
4
],
use_flight_altitude
=
False
):
ckpt_dir_s
=
os
.
listdir
(
ckpt_dir_s_path
)
ckpt_dir
=
ckpt_dir_s_path
+
ckpt_dir_s
[
0
]
if
not
use_flight_altitude
:
flight_levels
=
[
0
]
probs_dct
=
{
flvl
:
None
for
flvl
in
flight_levels
}
preds_dct
=
{
flvl
:
None
for
flvl
in
flight_levels
}
nn
=
IcingIntensityNN
(
day_night
=
day_night
,
l1b_or_l2
=
l1b_or_l2
,
use_flight_altitude
=
use_flight_altitude
)
nn
.
num_data_samples
=
num_tiles
nn
.
build_model
()
nn
.
build_training
()
nn
.
build_evaluation
()
ckpt
=
tf
.
train
.
Checkpoint
(
step
=
tf
.
Variable
(
1
),
model
=
nn
.
model
)
ckpt_manager
=
tf
.
train
.
CheckpointManager
(
ckpt
,
ckpt_dir
,
max_to_keep
=
3
)
ckpt
.
restore
(
ckpt_manager
.
latest_checkpoint
)
for
flvl
in
flight_levels
:
nn
.
flight_level
=
flvl
nn
.
setup_eval_pipeline
(
data_dct
,
num_tiles
)
nn
.
do_evaluate
(
ckpt_dir
)
probs
=
nn
.
test_probs
if
NumClasses
==
2
:
preds
=
np
.
where
(
probs
>
prob_thresh
,
1
,
0
)
else
:
preds
=
np
.
argmax
(
probs
,
axis
=
1
)
probs_dct
[
flvl
]
=
probs
preds_dct
[
flvl
]
=
preds
return
preds_dct
,
probs_dct
if
__name__
==
"
__main__
"
:
nn
=
IcingIntensityNN
()
nn
.
run
(
'
matchup_filename
'
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment