Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
python
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Tom Rink
python
Commits
56f95b05
Commit
56f95b05
authored
3 years ago
by
tomrink
Browse files
Options
Downloads
Patches
Plain Diff
snapshot...
parent
18b28d38
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
modules/deeplearning/icing.py
+403
-85
403 additions, 85 deletions
modules/deeplearning/icing.py
with
403 additions
and
85 deletions
modules/deeplearning/icing.py
+
403
−
85
View file @
56f95b05
import
tensorflow
as
tf
from
util.setup
import
logdir
,
modeldir
,
cachepath
from
util.util
import
homedir
import
subprocess
import
tensorflow_addons
as
tfa
from
util.setup
import
logdir
,
modeldir
,
cachepath
,
now
from
util.util
import
homedir
,
EarlyStop
from
util.geos_nav
import
GEOSNavigation
import
os
,
datetime
import
numpy
as
np
import
pickle
import
h5py
from
icing.pirep_goes
import
split_data
,
normalize
from
icing.pirep_goes
import
normalize
,
make_for_full_domain_predict
LOG_DEVICE_PLACEMENT
=
False
CACHE_DATA_IN_MEM
=
Tru
e
CACHE_DATA_IN_MEM
=
Fals
e
PROC_BATCH_SIZE
=
102
40
PROC_BATCH_SIZE
=
40
96
PROC_BATCH_BUFFER_SIZE
=
50000
NumLabels
=
1
BATCH_SIZE
=
256
NUM_EPOCHS
=
200
TRACK_MOVING_AVERAGE
=
False
NumClasses
=
2
if
NumClasses
==
2
:
NumLogits
=
1
else
:
NumLogits
=
NumClasses
BATCH_SIZE
=
128
NUM_EPOCHS
=
100
TRACK_MOVING_AVERAGE
=
False
EARLY_STOP
=
False
TRIPLET
=
False
CONV3D
=
False
NOISE_TRAINING
=
False
img_width
=
16
mean_std_file
=
homedir
+
'
data/icing/fovs_mean_std_day.pkl
'
mean_std_file
=
homedir
+
'
data/icing/mean_std_no_ice.pkl
'
# mean_std_file = homedir+'data/icing/mean_std_l1b_no_ice.pkl'
f
=
open
(
mean_std_file
,
'
rb
'
)
mean_std_dct
=
pickle
.
load
(
f
)
f
.
close
()
# train_params = ['cld_height_acha', 'cld_geo_thick', 'supercooled_cloud_fraction', 'cld_temp_acha', 'cld_press_acha',
# 'cld_reff_acha', 'cld_opd_acha', 'conv_cloud_fraction', 'cld_emiss_acha']
train_params
=
[
'
cld_height_acha
'
,
'
cld_geo_thick
'
,
'
supercooled_cloud_fraction
'
,
'
cld_temp_acha
'
,
'
cld_press_acha
'
,
'
cld_reff_dcomp
'
,
'
cld_opd_dcomp
'
,
'
cld_cwp_dcomp
'
,
'
iwc_dcomp
'
,
'
lwc_dcomp
'
]
#'cloud_phase']
'
cld_reff_dcomp
'
,
'
cld_opd_dcomp
'
,
'
cld_cwp_dcomp
'
,
'
iwc_dcomp
'
,
'
lwc_dcomp
'
,
'
conv_cloud_fraction
'
,
'
cld_emiss_acha
'
]
# train_params = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'temp_13_3um_nom', 'temp_3_75um_nom',
# 'temp_6_2um_nom', 'temp_6_7um_nom', 'temp_7_3um_nom', 'temp_8_5um_nom', 'temp_9_7um_nom',
# 'refl_0_47um_nom', 'refl_0_65um_nom', 'refl_0_86um_nom', 'refl_1_38um_nom', 'refl_1_60um_nom']
# train_params = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'temp_13_3um_nom', 'temp_3_75um_nom',
# 'temp_6_2um_nom', 'temp_6_7um_nom', 'temp_7_3um_nom', 'temp_8_5um_nom', 'temp_9_7um_nom']
def
build_residual_block
(
input
,
drop_rate
,
num_neurons
,
activation
,
block_name
,
doDropout
=
True
,
doBatchNorm
=
True
):
...
...
@@ -89,6 +105,7 @@ class IcingIntensityNN:
self
.
train_dataset
=
None
self
.
inner_train_dataset
=
None
self
.
test_dataset
=
None
self
.
eval_dataset
=
None
self
.
X_img
=
None
self
.
X_prof
=
None
self
.
X_u
=
None
...
...
@@ -99,8 +116,10 @@ class IcingIntensityNN:
self
.
handle
=
None
self
.
inner_handle
=
None
self
.
in_mem_batch
=
None
self
.
filename
=
None
self
.
h5f
=
None
self
.
filename_trn
=
None
self
.
h5f_trn
=
None
self
.
filename_tst
=
None
self
.
h5f_tst
=
None
self
.
h5f_l1b
=
None
self
.
logits
=
None
...
...
@@ -142,22 +161,28 @@ class IcingIntensityNN:
self
.
test_auc
=
None
self
.
test_recall
=
None
self
.
test_precision
=
None
self
.
test_confusion_matrix
=
None
self
.
test_true_pos
=
None
self
.
test_true_neg
=
None
self
.
test_false_pos
=
None
self
.
test_false_neg
=
None
self
.
test_labels
=
[]
self
.
test_preds
=
[]
self
.
test_probs
=
None
self
.
learningRateSchedule
=
None
self
.
num_data_samples
=
None
self
.
initial_learning_rate
=
None
self
.
data_dct
=
None
n_chans
=
len
(
train_params
)
NUM_PARAMS
=
1
if
TRIPLET
:
n_chans
*=
3
#self.X_img = tf.keras.Input(shape=(img_width, img_width, n_chans))
self
.
X_img
=
tf
.
keras
.
Input
(
shape
=
n_chans
)
#self.X_prof = tf.keras.Input(shape=(NUM_VERT_LEVELS, NUM_VERT_PARAMS))
#self.X_sfc = tf.keras.Input(shape=2)
self
.
inputs
.
append
(
self
.
X_img
)
#self.inputs.append(self.X_prof)
self
.
DISK_CACHE
=
False
...
...
@@ -181,7 +206,11 @@ class IcingIntensityNN:
# Memory growth must be set before GPUs have been initialized
print
(
e
)
def
get_in_mem_data_batch
(
self
,
idxs
):
def
get_in_mem_data_batch
(
self
,
idxs
,
is_training
):
h5f
=
self
.
h5f_trn
if
not
is_training
:
h5f
=
self
.
h5f_tst
key
=
frozenset
(
idxs
)
if
CACHE_DATA_IN_MEM
:
...
...
@@ -195,29 +224,69 @@ class IcingIntensityNN:
data
=
[]
for
param
in
train_params
:
nda
=
self
.
h5f
[
param
][
nd_idxs
,
]
nda
=
normalize
(
nda
,
param
,
mean_std_dct
)
nda
=
h5f
[
param
][
nd_idxs
,
]
if
NOISE_TRAINING
and
is_training
:
nda
=
normalize
(
nda
,
param
,
mean_std_dct
,
add_noise
=
True
,
noise_scale
=
0.01
,
seed
=
42
)
else
:
nda
=
normalize
(
nda
,
param
,
mean_std_dct
)
data
.
append
(
nda
)
data
=
np
.
stack
(
data
)
data
=
data
.
astype
(
np
.
float32
)
data
=
np
.
transpose
(
data
,
axes
=
(
1
,
0
))
label
=
self
.
h5f
[
'
icing_intensity
'
][
nd_idxs
]
label
=
h5f
[
'
icing_intensity
'
][
nd_idxs
]
label
=
label
.
astype
(
np
.
int32
)
label
=
np
.
where
(
label
==
-
1
,
0
,
label
)
# binary, two class
label
=
np
.
where
(
label
!=
0
,
1
,
label
)
label
=
label
.
reshape
((
label
.
shape
[
0
],
1
))
if
NumClasses
==
2
:
label
=
np
.
where
(
label
!=
0
,
1
,
label
)
label
=
label
.
reshape
((
label
.
shape
[
0
],
1
))
elif
NumClasses
==
3
:
label
=
np
.
where
(
np
.
logical_or
(
label
==
1
,
label
==
2
),
1
,
label
)
label
=
np
.
where
(
np
.
invert
(
np
.
logical_or
(
label
==
0
,
label
==
1
)),
2
,
label
)
label
=
label
.
reshape
((
label
.
shape
[
0
],
1
))
if
CACHE_DATA_IN_MEM
:
self
.
in_mem_data_cache
[
key
]
=
(
data
,
label
)
return
data
,
label
def
get_in_mem_data_batch_train
(
self
,
idxs
):
return
self
.
get_in_mem_data_batch
(
idxs
,
True
)
def
get_in_mem_data_batch_test
(
self
,
idxs
):
return
self
.
get_in_mem_data_batch
(
idxs
,
False
)
def
get_in_mem_data_batch_eval
(
self
,
idxs
):
# sort these to use as numpy indexing arrays
nd_idxs
=
np
.
array
(
idxs
)
nd_idxs
=
np
.
sort
(
nd_idxs
)
data
=
[]
for
param
in
train_params
:
nda
=
self
.
data_dct
[
param
][
nd_idxs
,
]
nda
=
normalize
(
nda
,
param
,
mean_std_dct
)
data
.
append
(
nda
)
data
=
np
.
stack
(
data
)
data
=
data
.
astype
(
np
.
float32
)
data
=
np
.
transpose
(
data
,
axes
=
(
1
,
0
))
return
data
@tf.function
(
input_signature
=
[
tf
.
TensorSpec
(
None
,
tf
.
int32
)])
def
data_function
(
self
,
indexes
):
out
=
tf
.
numpy_function
(
self
.
get_in_mem_data_batch
,
[
indexes
],
[
tf
.
float32
,
tf
.
int32
])
out
=
tf
.
numpy_function
(
self
.
get_in_mem_data_batch_train
,
[
indexes
],
[
tf
.
float32
,
tf
.
int32
])
return
out
@tf.function
(
input_signature
=
[
tf
.
TensorSpec
(
None
,
tf
.
int32
)])
def
data_function_test
(
self
,
indexes
):
out
=
tf
.
numpy_function
(
self
.
get_in_mem_data_batch_test
,
[
indexes
],
[
tf
.
float32
,
tf
.
int32
])
return
out
@tf.function
(
input_signature
=
[
tf
.
TensorSpec
(
None
,
tf
.
int32
)])
def
data_function_evaluate
(
self
,
indexes
):
out
=
tf
.
numpy_function
(
self
.
get_in_mem_data_batch_eval
,
[
indexes
],
tf
.
float32
)
return
out
def
get_train_dataset
(
self
,
indexes
):
...
...
@@ -226,7 +295,8 @@ class IcingIntensityNN:
dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
indexes
)
dataset
=
dataset
.
batch
(
PROC_BATCH_SIZE
)
dataset
=
dataset
.
map
(
self
.
data_function
,
num_parallel_calls
=
8
)
dataset
=
dataset
.
shuffle
(
PROC_BATCH_BUFFER_SIZE
)
dataset
=
dataset
.
cache
()
# dataset = dataset.shuffle(PROC_BATCH_BUFFER_SIZE)
dataset
=
dataset
.
prefetch
(
buffer_size
=
1
)
self
.
train_dataset
=
dataset
...
...
@@ -235,15 +305,39 @@ class IcingIntensityNN:
dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
indexes
)
dataset
=
dataset
.
batch
(
PROC_BATCH_SIZE
)
dataset
=
dataset
.
map
(
self
.
data_function
,
num_parallel_calls
=
8
)
dataset
=
dataset
.
map
(
self
.
data_function_test
,
num_parallel_calls
=
8
)
dataset
=
dataset
.
cache
()
self
.
test_dataset
=
dataset
def
setup_pipeline
(
self
,
filename
,
train_idxs
=
None
,
test_idxs
=
None
):
self
.
filename
=
filename
self
.
h5f
=
h5py
.
File
(
filename
,
'
r
'
)
time
=
self
.
h5f
[
'
time
'
]
num_obs
=
time
.
shape
[
0
]
trn_idxs
,
tst_idxs
=
split_data
(
num_obs
,
skip
=
4
)
def
get_evaluate_dataset
(
self
,
indexes
):
indexes
=
list
(
indexes
)
dataset
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
indexes
)
dataset
=
dataset
.
batch
(
PROC_BATCH_SIZE
)
dataset
=
dataset
.
map
(
self
.
data_function_evaluate
,
num_parallel_calls
=
8
)
dataset
=
dataset
.
cache
()
self
.
eval_dataset
=
dataset
def
setup_pipeline
(
self
,
filename_trn
,
filename_tst
,
trn_idxs
=
None
,
tst_idxs
=
None
,
seed
=
None
):
self
.
filename_trn
=
filename_trn
self
.
h5f_trn
=
h5py
.
File
(
filename_trn
,
'
r
'
)
self
.
filename_tst
=
filename_tst
self
.
h5f_tst
=
h5py
.
File
(
filename_tst
,
'
r
'
)
if
trn_idxs
is
None
:
time
=
self
.
h5f_trn
[
'
time
'
]
trn_idxs
=
np
.
arange
(
time
.
shape
[
0
])
if
seed
is
not
None
:
np
.
random
.
seed
(
seed
)
np
.
random
.
shuffle
(
trn_idxs
)
time
=
self
.
h5f_tst
[
'
time
'
]
tst_idxs
=
np
.
arange
(
time
.
shape
[
0
])
if
seed
is
not
None
:
np
.
random
.
seed
(
seed
)
np
.
random
.
shuffle
(
tst_idxs
)
self
.
num_data_samples
=
trn_idxs
.
shape
[
0
]
self
.
get_train_dataset
(
trn_idxs
)
...
...
@@ -254,6 +348,30 @@ class IcingIntensityNN:
print
(
'
num test samples:
'
,
tst_idxs
.
shape
[
0
])
print
(
'
setup_pipeline: Done
'
)
def
setup_test_pipeline
(
self
,
filename
,
seed
=
None
,
shuffle
=
False
):
self
.
filename_tst
=
filename
self
.
h5f_tst
=
h5py
.
File
(
filename
,
'
r
'
)
time
=
self
.
h5f_tst
[
'
time
'
]
tst_idxs
=
np
.
arange
(
time
.
shape
[
0
])
self
.
num_data_samples
=
len
(
tst_idxs
)
if
seed
is
not
None
:
np
.
random
.
seed
(
seed
)
if
shuffle
:
np
.
random
.
shuffle
(
tst_idxs
)
self
.
get_test_dataset
(
tst_idxs
)
print
(
'
num test samples:
'
,
tst_idxs
.
shape
[
0
])
print
(
'
setup_test_pipeline: Done
'
)
def
setup_eval_pipeline
(
self
,
data_dct
,
num_tiles
):
self
.
data_dct
=
data_dct
idxs
=
np
.
arange
(
num_tiles
)
self
.
num_data_samples
=
idxs
.
shape
[
0
]
self
.
get_evaluate_dataset
(
idxs
)
def
build_1d_cnn
(
self
):
print
(
'
build_1d_cnn
'
)
# padding = 'VALID'
...
...
@@ -319,63 +437,66 @@ class IcingIntensityNN:
fc
=
build_residual_block
(
fc
,
drop_rate
,
fac
*
n_hidden
,
activation
,
'
Residual_Block_6
'
,
doBatchNorm
=
True
)
fc
=
build_residual_block
(
fc
,
drop_rate
,
fac
*
n_hidden
,
activation
,
'
Residual_Block_7
'
,
doBatchNorm
=
True
)
fc
=
build_residual_block
(
fc
,
drop_rate
,
fac
*
n_hidden
,
activation
,
'
Residual_Block_8
'
,
doBatchNorm
=
True
)
#
fc = build_residual_block(fc, drop_rate, fac*n_hidden, activation, 'Residual_Block_7', doBatchNorm=True)
#
#
fc = build_residual_block(fc, drop_rate, fac*n_hidden, activation, 'Residual_Block_8', doBatchNorm=True)
fc
=
tf
.
keras
.
layers
.
Dense
(
n_hidden
,
activation
=
activation
)(
fc
)
fc
=
tf
.
keras
.
layers
.
BatchNormalization
()(
fc
)
print
(
fc
.
shape
)
# activation = tf.nn.softmax
activation
=
tf
.
nn
.
sigmoid
# For binary
if
NumClasses
==
2
:
activation
=
tf
.
nn
.
sigmoid
# For binary
else
:
activation
=
tf
.
nn
.
softmax
# For multi-class
logits
=
tf
.
keras
.
layers
.
Dense
(
NumLabels
,
activation
=
activation
)(
fc
)
# Called logits, but these are actually probabilities, see activation
logits
=
tf
.
keras
.
layers
.
Dense
(
NumLogits
,
activation
=
activation
)(
fc
)
print
(
logits
.
shape
)
self
.
logits
=
logits
def
build_training
(
self
):
self
.
loss
=
tf
.
keras
.
losses
.
BinaryCrossentropy
(
from_logits
=
False
)
# for two-class only
#self.loss = tf.keras.losses.SparseCategoricalCrossentropy() # For multi-class
if
NumClasses
==
2
:
self
.
loss
=
tf
.
keras
.
losses
.
BinaryCrossentropy
(
from_logits
=
False
)
# for two-class only
else
:
self
.
loss
=
tf
.
keras
.
losses
.
SparseCategoricalCrossentropy
(
from_logits
=
False
)
# For multi-class
# decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
initial_learning_rate
=
0.002
decay_rate
=
0.95
steps_per_epoch
=
int
(
self
.
num_data_samples
/
BATCH_SIZE
)
# one epoch
# decay_steps = int(steps_per_epoch / 2)
decay_steps
=
4
*
steps_per_epoch
decay_steps
=
8
*
steps_per_epoch
print
(
'
initial rate, decay rate, steps/epoch, decay steps:
'
,
initial_learning_rate
,
decay_rate
,
steps_per_epoch
,
decay_steps
)
self
.
learningRateSchedule
=
tf
.
keras
.
optimizers
.
schedules
.
ExponentialDecay
(
initial_learning_rate
,
decay_steps
,
decay_rate
)
optimizer
=
tf
.
keras
.
optimizers
.
Adam
(
learning_rate
=
self
.
learningRateSchedule
)
if
TRACK_MOVING_AVERAGE
:
ema
=
tf
.
train
.
ExponentialMovingAverage
(
decay
=
0.999
)
with
tf
.
control_dependencies
([
optimizer
]):
optimizer
=
ema
.
apply
(
self
.
model
.
trainable_variables
)
if
TRACK_MOVING_AVERAGE
:
# Not really sure this works properly
optimizer
=
tfa
.
optimizers
.
MovingAverage
(
optimizer
)
self
.
optimizer
=
optimizer
self
.
initial_learning_rate
=
initial_learning_rate
def
build_evaluation
(
self
):
self
.
train_accuracy
=
tf
.
keras
.
metrics
.
BinaryAccuracy
(
name
=
'
train_accuracy
'
)
self
.
test_accuracy
=
tf
.
keras
.
metrics
.
BinaryAccuracy
(
name
=
'
test_accuracy
'
)
self
.
test_auc
=
tf
.
keras
.
metrics
.
AUC
(
name
=
'
test_auc
'
)
self
.
test_recall
=
tf
.
keras
.
metrics
.
Recall
(
name
=
'
test_recall
'
)
self
.
test_precision
=
tf
.
keras
.
metrics
.
Precision
(
name
=
'
test_precision
'
)
self
.
train_loss
=
tf
.
keras
.
metrics
.
Mean
(
name
=
'
train_loss
'
)
self
.
test_loss
=
tf
.
keras
.
metrics
.
Mean
(
name
=
'
test_loss
'
)
def
build_predict
(
self
):
_
,
pred
=
tf
.
nn
.
top_k
(
self
.
logits
)
self
.
pred_class
=
pred
if
TRACK_MOVING_AVERAGE
:
self
.
variable_averages
=
tf
.
train
.
ExponentialMovingAverage
(
0.999
,
self
.
global_step
)
self
.
variable_averages
.
apply
(
self
.
model
.
trainable_variables
)
if
NumClasses
==
2
:
self
.
train_accuracy
=
tf
.
keras
.
metrics
.
BinaryAccuracy
(
name
=
'
train_accuracy
'
)
self
.
test_accuracy
=
tf
.
keras
.
metrics
.
BinaryAccuracy
(
name
=
'
test_accuracy
'
)
self
.
test_auc
=
tf
.
keras
.
metrics
.
AUC
(
name
=
'
test_auc
'
)
self
.
test_recall
=
tf
.
keras
.
metrics
.
Recall
(
name
=
'
test_recall
'
)
self
.
test_precision
=
tf
.
keras
.
metrics
.
Precision
(
name
=
'
test_precision
'
)
self
.
test_true_neg
=
tf
.
keras
.
metrics
.
TrueNegatives
(
name
=
'
test_true_neg
'
)
self
.
test_true_pos
=
tf
.
keras
.
metrics
.
TruePositives
(
name
=
'
test_true_pos
'
)
self
.
test_false_neg
=
tf
.
keras
.
metrics
.
FalseNegatives
(
name
=
'
test_false_neg
'
)
self
.
test_false_pos
=
tf
.
keras
.
metrics
.
FalsePositives
(
name
=
'
test_false_pos
'
)
else
:
self
.
train_accuracy
=
tf
.
keras
.
metrics
.
SparseCategoricalAccuracy
(
name
=
'
train_accuracy
'
)
self
.
test_accuracy
=
tf
.
keras
.
metrics
.
SparseCategoricalAccuracy
(
name
=
'
test_accuracy
'
)
@tf.function
def
train_step
(
self
,
mini_batch
):
...
...
@@ -405,9 +526,14 @@ class IcingIntensityNN:
self
.
test_loss
(
t_loss
)
self
.
test_accuracy
(
labels
,
pred
)
self
.
test_auc
(
labels
,
pred
)
self
.
test_recall
(
labels
,
pred
)
self
.
test_precision
(
labels
,
pred
)
if
NumClasses
==
2
:
self
.
test_auc
(
labels
,
pred
)
self
.
test_recall
(
labels
,
pred
)
self
.
test_precision
(
labels
,
pred
)
self
.
test_true_neg
(
labels
,
pred
)
self
.
test_true_pos
(
labels
,
pred
)
self
.
test_false_neg
(
labels
,
pred
)
self
.
test_false_pos
(
labels
,
pred
)
def
predict
(
self
,
mini_batch
):
inputs
=
[
mini_batch
[
0
]]
...
...
@@ -415,6 +541,45 @@ class IcingIntensityNN:
pred
=
self
.
model
(
inputs
,
training
=
False
)
t_loss
=
self
.
loss
(
labels
,
pred
)
self
.
test_labels
.
append
(
labels
)
self
.
test_preds
.
append
(
pred
.
numpy
())
self
.
test_loss
(
t_loss
)
self
.
test_accuracy
(
labels
,
pred
)
if
NumClasses
==
2
:
self
.
test_auc
(
labels
,
pred
)
self
.
test_recall
(
labels
,
pred
)
self
.
test_precision
(
labels
,
pred
)
self
.
test_true_neg
(
labels
,
pred
)
self
.
test_true_pos
(
labels
,
pred
)
self
.
test_false_neg
(
labels
,
pred
)
self
.
test_false_pos
(
labels
,
pred
)
def
reset_test_metrics
(
self
):
self
.
test_loss
.
reset_states
()
self
.
test_accuracy
.
reset_states
()
if
NumClasses
==
2
:
self
.
test_auc
.
reset_states
()
self
.
test_recall
.
reset_states
()
self
.
test_precision
.
reset_states
()
self
.
test_true_neg
.
reset_states
()
self
.
test_true_pos
.
reset_states
()
self
.
test_false_neg
.
reset_states
()
self
.
test_false_pos
.
reset_states
()
def
get_metrics
(
self
):
recall
=
self
.
test_recall
.
result
()
precsn
=
self
.
test_precision
.
result
()
f1
=
2
*
(
precsn
*
recall
)
/
(
precsn
+
recall
)
tn
=
self
.
test_true_neg
.
result
()
tp
=
self
.
test_true_pos
.
result
()
fn
=
self
.
test_false_neg
.
result
()
fp
=
self
.
test_false_pos
.
result
()
mcc
=
((
tp
*
tn
)
-
(
fp
*
fn
))
/
np
.
sqrt
((
tp
+
fp
)
*
(
tp
+
fn
)
*
(
tn
+
fp
)
*
(
tn
+
fn
))
return
f1
,
mcc
def
do_training
(
self
,
ckpt_dir
=
None
):
if
ckpt_dir
is
None
:
...
...
@@ -431,6 +596,16 @@ class IcingIntensityNN:
step
=
0
total_time
=
0
best_test_loss
=
np
.
finfo
(
dtype
=
np
.
float
).
max
best_test_acc
=
0
best_test_recall
=
0
best_test_precision
=
0
best_test_auc
=
0
best_test_f1
=
0
best_test_mcc
=
0
if
EARLY_STOP
:
es
=
EarlyStop
()
for
epoch
in
range
(
NUM_EPOCHS
):
self
.
train_loss
.
reset_states
()
...
...
@@ -455,20 +630,27 @@ class IcingIntensityNN:
tf
.
summary
.
scalar
(
'
num_train_steps
'
,
step
,
step
=
step
)
tf
.
summary
.
scalar
(
'
num_epochs
'
,
epoch
,
step
=
step
)
self
.
test_loss
.
reset_states
()
self
.
test_accuracy
.
reset_states
()
self
.
reset_test_metrics
()
for
data0_tst
,
label_tst
in
self
.
test_dataset
:
tst_ds
=
tf
.
data
.
Dataset
.
from_tensor_slices
((
data0_tst
,
label_tst
))
tst_ds
=
tst_ds
.
batch
(
BATCH_SIZE
)
for
mini_batch_test
in
tst_ds
:
self
.
test_step
(
mini_batch_test
)
if
NumClasses
==
2
:
f1
,
mcc
=
self
.
get_metrics
()
with
self
.
writer_valid
.
as_default
():
tf
.
summary
.
scalar
(
'
loss_val
'
,
self
.
test_loss
.
result
(),
step
=
step
)
tf
.
summary
.
scalar
(
'
acc_val
'
,
self
.
test_accuracy
.
result
(),
step
=
step
)
tf
.
summary
.
scalar
(
'
num_train_steps
'
,
step
,
step
=
step
)
tf
.
summary
.
scalar
(
'
num_epochs
'
,
epoch
,
step
=
step
)
if
NumClasses
==
2
:
tf
.
summary
.
scalar
(
'
auc_val
'
,
self
.
test_auc
.
result
(),
step
=
step
)
tf
.
summary
.
scalar
(
'
recall_val
'
,
self
.
test_recall
.
result
(),
step
=
step
)
tf
.
summary
.
scalar
(
'
prec_val
'
,
self
.
test_precision
.
result
(),
step
=
step
)
tf
.
summary
.
scalar
(
'
f1_val
'
,
f1
,
step
=
step
)
tf
.
summary
.
scalar
(
'
mcc_val
'
,
mcc
,
step
=
step
)
tf
.
summary
.
scalar
(
'
num_train_steps
'
,
step
,
step
=
step
)
tf
.
summary
.
scalar
(
'
num_epochs
'
,
epoch
,
step
=
step
)
print
(
'
****** test loss, acc:
'
,
self
.
test_loss
.
result
().
numpy
(),
self
.
test_accuracy
.
result
().
numpy
())
...
...
@@ -483,33 +665,56 @@ class IcingIntensityNN:
print
(
'
End of Epoch:
'
,
epoch
+
1
,
'
elapsed time:
'
,
(
t1
-
t0
))
total_time
+=
(
t1
-
t0
)
self
.
test_loss
.
reset_states
()
self
.
test_accuracy
.
reset_states
()
self
.
reset_test_metrics
()
for
data0
,
label
in
self
.
test_dataset
:
ds
=
tf
.
data
.
Dataset
.
from_tensor_slices
((
data0
,
label
))
ds
=
ds
.
batch
(
BATCH_SIZE
)
for
mini_batch
in
ds
:
self
.
test_step
(
mini_batch
)
print
(
'
loss, acc :
'
,
self
.
test_loss
.
result
().
numpy
(),
self
.
test_accuracy
.
result
().
numpy
())
print
(
'
---------------------------------------------------------
'
)
ckpt_manager
.
save
()
if
NumClasses
==
2
:
f1
,
mcc
=
self
.
get_metrics
()
print
(
'
loss, acc, recall, precision, auc, f1, mcc:
'
,
self
.
test_loss
.
result
().
numpy
(),
self
.
test_accuracy
.
result
().
numpy
(),
self
.
test_recall
.
result
().
numpy
(),
self
.
test_precision
.
result
().
numpy
(),
self
.
test_auc
.
result
().
numpy
(),
f1
.
numpy
(),
mcc
.
numpy
())
else
:
print
(
'
loss, acc:
'
,
self
.
test_loss
.
result
().
numpy
(),
self
.
test_accuracy
.
result
().
numpy
())
print
(
'
------------------------------------------------------
'
)
if
TRACK_MOVING_AVERAGE
:
# This may not really work properly
self
.
optimizer
.
assign_average_vars
(
self
.
model
.
trainable_variables
)
tst_loss
=
self
.
test_loss
.
result
().
numpy
()
if
tst_loss
<
best_test_loss
:
best_test_loss
=
tst_loss
best_test_acc
=
self
.
test_accuracy
.
result
().
numpy
()
best_test_recall
=
self
.
test_recall
.
result
().
numpy
()
best_test_precision
=
self
.
test_precision
.
result
().
numpy
()
best_test_auc
=
self
.
test_auc
.
result
().
numpy
()
best_test_f1
=
f1
.
numpy
()
best_test_mcc
=
mcc
.
numpy
()
ckpt_manager
.
save
()
if
self
.
DISK_CACHE
and
epoch
==
0
:
f
=
open
(
cachepath
,
'
wb
'
)
pickle
.
dump
(
self
.
in_mem_data_cache
,
f
)
f
.
close
()
if
EARLY_STOP
and
es
.
check_stop
(
tst_loss
):
break
print
(
'
total time:
'
,
total_time
)
self
.
writer_train
.
close
()
self
.
writer_valid
.
close
()
self
.
h5f_trn
.
close
()
self
.
h5f_tst
.
close
()
f
=
open
(
'
/home/rink/best_stats_
'
+
now
+
'
.pkl
'
,
'
wb
'
)
pickle
.
dump
((
best_test_loss
,
best_test_acc
,
best_test_recall
,
best_test_precision
,
best_test_auc
,
best_test_f1
,
best_test_mcc
),
f
)
f
.
close
()
def
build_model
(
self
):
# flat = self.build_cnn()
# flat_1d = self.build_1d_cnn()
# flat = tf.keras.layers.concatenate([flat, flat_1d, flat_anc])
# flat = tf.keras.layers.concatenate([flat, flat_1d])
# self.build_dnn(flat)
self
.
build_dnn
()
self
.
model
=
tf
.
keras
.
Model
(
self
.
inputs
,
self
.
logits
)
...
...
@@ -523,27 +728,140 @@ class IcingIntensityNN:
self
.
test_loss
.
reset_states
()
self
.
test_accuracy
.
reset_states
()
for
abi_tst
,
temp_tst
,
lbfp_tst
in
self
.
test_dataset
:
ds
=
tf
.
data
.
Dataset
.
from_tensor_slices
((
abi_tst
,
temp_tst
,
lbfp_tst
))
for
data0
,
label
in
self
.
test_dataset
:
ds
=
tf
.
data
.
Dataset
.
from_tensor_slices
((
data0
,
label
))
ds
=
ds
.
batch
(
BATCH_SIZE
)
for
mini_batch_test
in
ds
:
self
.
predict
(
mini_batch_test
)
print
(
'
loss, acc:
'
,
self
.
test_loss
.
result
(),
self
.
test_accuracy
.
result
())
def
run
(
self
,
filename
,
filename_l1b
=
None
,
train_dict
=
None
,
valid_dict
=
None
):
labels
=
np
.
concatenate
(
self
.
test_labels
)
self
.
test_labels
=
labels
preds
=
np
.
concatenate
(
self
.
test_preds
)
self
.
test_probs
=
preds
if
NumClasses
==
2
:
preds
=
np
.
where
(
preds
>
0.5
,
1
,
0
)
else
:
preds
=
np
.
argmax
(
preds
,
axis
=
1
)
self
.
test_preds
=
preds
def
do_evaluate
(
self
,
ckpt_dir
,
prob_thresh
=
0.5
):
ckpt
=
tf
.
train
.
Checkpoint
(
step
=
tf
.
Variable
(
1
),
model
=
self
.
model
)
ckpt_manager
=
tf
.
train
.
CheckpointManager
(
ckpt
,
ckpt_dir
,
max_to_keep
=
3
)
ckpt
.
restore
(
ckpt_manager
.
latest_checkpoint
)
pred_s
=
[]
for
data
in
self
.
eval_dataset
:
ds
=
tf
.
data
.
Dataset
.
from_tensor_slices
(
data
)
ds
=
ds
.
batch
(
BATCH_SIZE
)
for
mini_batch
in
ds
:
pred
=
self
.
model
([
mini_batch
],
training
=
False
)
pred_s
.
append
(
pred
)
preds
=
np
.
concatenate
(
pred_s
)
preds
=
preds
[:,
0
]
self
.
test_probs
=
preds
if
NumClasses
==
2
:
preds
=
np
.
where
(
preds
>
prob_thresh
,
1
,
0
)
else
:
preds
=
np
.
argmax
(
preds
,
axis
=
1
)
self
.
test_preds
=
preds
def
run
(
self
,
filename_trn
,
filename_tst
):
with
tf
.
device
(
'
/device:GPU:
'
+
str
(
self
.
gpu_device
)):
self
.
setup_pipeline
(
filename
,
train_idxs
=
train_dict
,
test_idxs
=
valid_dic
t
)
self
.
setup_pipeline
(
filename
_trn
,
filename_ts
t
)
self
.
build_model
()
self
.
build_training
()
self
.
build_evaluation
()
self
.
do_training
()
def
run_restore
(
self
,
matchup_dic
t
,
ckpt_dir
):
self
.
setup_pipeline
(
None
,
None
,
matchup_dic
t
)
def
run_restore
(
self
,
filename_ts
t
,
ckpt_dir
):
self
.
setup_
test_
pipeline
(
filename_ts
t
)
self
.
build_model
()
self
.
build_training
()
self
.
build_evaluation
()
self
.
restore
(
ckpt_dir
)
self
.
h5f_tst
.
close
()
def
run_evaluate
(
self
,
filename
,
ckpt_dir
):
data_dct
,
ll
,
cc
=
make_for_full_domain_predict
(
filename
,
name_list
=
train_params
)
self
.
setup_eval_pipeline
(
data_dct
,
len
(
ll
))
self
.
build_model
()
self
.
build_training
()
self
.
build_evaluation
()
self
.
do_evaluate
(
ckpt_dir
)
def
run_restore_static
(
filename_tst
,
ckpt_dir_s_path
):
ckpt_dir_s
=
os
.
listdir
(
ckpt_dir_s_path
)
cm_s
=
[]
for
ckpt
in
ckpt_dir_s
:
ckpt_dir
=
ckpt_dir_s_path
+
ckpt
if
not
os
.
path
.
isdir
(
ckpt_dir
):
continue
nn
=
IcingIntensityNN
()
nn
.
run_restore
(
filename_tst
,
ckpt_dir
)
cm_s
.
append
(
tf
.
math
.
confusion_matrix
(
nn
.
test_labels
.
flatten
(),
nn
.
test_preds
.
flatten
()))
num
=
len
(
cm_s
)
cm_avg
=
cm_s
[
0
]
for
k
in
range
(
num
-
1
):
cm_avg
+=
cm_s
[
k
+
1
]
cm_avg
/=
num
return
cm_avg
def
run_evaluate_static
(
filename
,
ckpt_dir_s_path
,
prob_thresh
=
0.5
):
data_dct
,
ll
,
cc
=
make_for_full_domain_predict
(
filename
,
name_list
=
train_params
)
ckpt_dir_s
=
os
.
listdir
(
ckpt_dir_s_path
)
prob_s
=
[]
for
ckpt
in
ckpt_dir_s
:
ckpt_dir
=
ckpt_dir_s_path
+
ckpt
if
not
os
.
path
.
isdir
(
ckpt_dir
):
continue
nn
=
IcingIntensityNN
()
nn
.
setup_eval_pipeline
(
data_dct
,
len
(
ll
))
nn
.
build_model
()
nn
.
build_training
()
nn
.
build_evaluation
()
nn
.
do_evaluate
(
ckpt_dir
,
ll
,
cc
)
prob_s
.
append
(
nn
.
test_probs
)
num
=
len
(
prob_s
)
prob_avg
=
prob_s
[
0
]
for
k
in
range
(
num
-
1
):
prob_avg
+=
prob_s
[
k
+
1
]
prob_avg
/=
num
probs
=
prob_avg
if
NumClasses
==
2
:
preds
=
np
.
where
(
probs
>
prob_thresh
,
1
,
0
)
else
:
preds
=
np
.
argmax
(
probs
,
axis
=
1
)
cc
=
np
.
array
(
cc
)
ll
=
np
.
array
(
ll
)
ice_mask
=
preds
==
1
print
(
cc
.
shape
,
ll
.
shape
,
ice_mask
.
shape
)
ice_cc
=
cc
[
ice_mask
]
ice_ll
=
ll
[
ice_mask
]
nav
=
GEOSNavigation
(
sub_lon
=-
75.0
,
CFAC
=
5.6E-05
,
COFF
=-
0.101332
,
LFAC
=-
5.6E-05
,
LOFF
=
0.128212
,
num_elems
=
2500
,
num_lines
=
1500
)
ice_lons
=
[]
ice_lats
=
[]
for
k
in
range
(
ice_cc
.
shape
[
0
]):
lon
,
lat
=
nav
.
lc_to_earth
(
ice_cc
[
k
],
ice_ll
[
k
])
ice_lons
.
append
(
lon
)
ice_lats
.
append
(
lat
)
return
filename
,
ice_lons
,
ice_lats
if
__name__
==
"
__main__
"
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment