Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
"""Methods for plotting ROC (receiver operating characteristic) curve."""
import numpy
import matplotlib.colors
import matplotlib.pyplot as pyplot
import util.performance_diagrams as performance_diagrams
DEFAULT_LINE_COLOUR = numpy.array([228, 26, 28], dtype=float) / 255
DEFAULT_LINE_WIDTH = 3
DEFAULT_RANDOM_LINE_COLOUR = numpy.full(3, 152. / 255)
DEFAULT_RANDOM_LINE_WIDTH = 2
LEVELS_FOR_PEIRCE_CONTOURS = numpy.linspace(0, 1, num=11, dtype=float)
FIGURE_WIDTH_INCHES = 10
FIGURE_HEIGHT_INCHES = 10
FONT_SIZE = 20
pyplot.rc('font', size=FONT_SIZE)
pyplot.rc('axes', titlesize=FONT_SIZE)
pyplot.rc('axes', labelsize=FONT_SIZE)
pyplot.rc('xtick', labelsize=FONT_SIZE)
pyplot.rc('ytick', labelsize=FONT_SIZE)
pyplot.rc('legend', fontsize=FONT_SIZE)
pyplot.rc('figure', titlesize=FONT_SIZE)
def _get_pofd_pod_grid(pofd_spacing=0.01, pod_spacing=0.01):
"""Creates grid in POFD-POD space.
M = number of rows (unique POD values) in grid
N = number of columns (unique POFD values) in grid
:param pofd_spacing: Spacing between grid cells in adjacent columns.
:param pod_spacing: Spacing between grid cells in adjacent rows.
:return: pofd_matrix: M-by-N numpy array of POFD values.
:return: pod_matrix: M-by-N numpy array of POD values.
"""
num_pofd_values = 1 + int(numpy.ceil(1. / pofd_spacing))
num_pod_values = 1 + int(numpy.ceil(1. / pod_spacing))
unique_pofd_values = numpy.linspace(0., 1., num=num_pofd_values)
unique_pod_values = numpy.linspace(0., 1., num=num_pod_values)[::-1]
return numpy.meshgrid(unique_pofd_values, unique_pod_values)
def _get_peirce_colour_scheme():
"""Returns colour scheme for Peirce score.
:return: colour_map_object: Colour scheme (instance of
`matplotlib.colors.ListedColormap`).
:return: colour_norm_object: Instance of `matplotlib.colors.BoundaryNorm`,
defining the scale of the colour map.
"""
this_colour_map_object = pyplot.cm.Blues
this_colour_norm_object = matplotlib.colors.BoundaryNorm(
LEVELS_FOR_PEIRCE_CONTOURS, this_colour_map_object.N)
rgba_matrix = this_colour_map_object(this_colour_norm_object(
LEVELS_FOR_PEIRCE_CONTOURS
))
colour_list = [
rgba_matrix[i, ..., :-1] for i in range(rgba_matrix.shape[0])
]
colour_map_object = matplotlib.colors.ListedColormap(colour_list)
colour_map_object.set_under(numpy.array([1, 1, 1]))
colour_norm_object = matplotlib.colors.BoundaryNorm(
LEVELS_FOR_PEIRCE_CONTOURS, colour_map_object.N)
return colour_map_object, colour_norm_object
def get_points_in_roc_curve(observed_labels, forecast_probabilities):
"""Creates points for ROC curve.
E = number of examples
T = number of binarization thresholds
:param observed_labels: length-E numpy array of class labels (integers in
0...1).
:param forecast_probabilities: length-E numpy array with forecast
probabilities of label = 1.
:return: pofd_by_threshold: length-T numpy array of POFD (probability of
false detection) values.
:return: pod_by_threshold: length-T numpy array of POD (probability of
detection) values.
"""
assert numpy.all(numpy.logical_or(
observed_labels == 0, observed_labels == 1
))
assert numpy.all(numpy.logical_and(
forecast_probabilities >= 0, forecast_probabilities <= 1
))
observed_labels = observed_labels.astype(int)
binarization_thresholds = numpy.linspace(0, 1, num=1001, dtype=float)
num_thresholds = len(binarization_thresholds)
pofd_by_threshold = numpy.full(num_thresholds, numpy.nan)
pod_by_threshold = numpy.full(num_thresholds, numpy.nan)
for k in range(num_thresholds):
these_forecast_labels = (
forecast_probabilities >= binarization_thresholds[k]
).astype(int)
this_num_hits = numpy.sum(numpy.logical_and(
these_forecast_labels == 1, observed_labels == 1
))
this_num_false_alarms = numpy.sum(numpy.logical_and(
these_forecast_labels == 1, observed_labels == 0
))
this_num_misses = numpy.sum(numpy.logical_and(
these_forecast_labels == 0, observed_labels == 1
))
this_num_correct_nulls = numpy.sum(numpy.logical_and(
these_forecast_labels == 0, observed_labels == 0
))
try:
pofd_by_threshold[k] = (
float(this_num_false_alarms) /
(this_num_false_alarms + this_num_correct_nulls)
)
except ZeroDivisionError:
pass
try:
pod_by_threshold[k] = (
float(this_num_hits) / (this_num_hits + this_num_misses)
)
except ZeroDivisionError:
pass
pod_by_threshold = numpy.array([1.] + pod_by_threshold.tolist() + [0.])
pofd_by_threshold = numpy.array([1.] + pofd_by_threshold.tolist() + [0.])
return pofd_by_threshold, pod_by_threshold
def plot_roc_curve(
observed_labels, forecast_probabilities,
line_colour=DEFAULT_LINE_COLOUR, line_width=DEFAULT_LINE_WIDTH,
random_line_colour=DEFAULT_RANDOM_LINE_COLOUR,
random_line_width=DEFAULT_RANDOM_LINE_WIDTH, axes_object=None):
"""Plots ROC curve.
E = number of examples
:param observed_labels: length-E numpy array of class labels (integers in
0...1).
:param forecast_probabilities: length-E numpy array with forecast
probabilities of label = 1.
:param line_colour: Colour (in any format accepted by `matplotlib.colors`).
:param line_width: Line width (real positive number).
:param random_line_colour: Colour of reference line (ROC curve for random
predictor).
:param random_line_width: Width of reference line (ROC curve for random
predictor).
:param axes_object: Will plot on these axes (instance of
`matplotlib.axes._subplots.AxesSubplot`). If `axes_object is None`,
will create new axes.
:return: pofd_by_threshold: See doc for `get_points_in_roc_curve`.
:return: pod_by_threshold: Same.
"""
pofd_by_threshold, pod_by_threshold = get_points_in_roc_curve(
observed_labels=observed_labels,
forecast_probabilities=forecast_probabilities)
if axes_object is None:
_, axes_object = pyplot.subplots(
1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)
)
pofd_matrix, pod_matrix = _get_pofd_pod_grid()
peirce_score_matrix = pod_matrix - pofd_matrix
colour_map_object, colour_norm_object = _get_peirce_colour_scheme()
pyplot.contourf(
pofd_matrix, pod_matrix, peirce_score_matrix,
LEVELS_FOR_PEIRCE_CONTOURS, cmap=colour_map_object,
norm=colour_norm_object, vmin=0., vmax=1., axes=axes_object)
# TODO(thunderhoser): Calling private method is a HACK.
colour_bar_object = performance_diagrams._add_colour_bar(
axes_object=axes_object, colour_map_object=colour_map_object,
colour_norm_object=colour_norm_object,
values_to_colour=peirce_score_matrix, min_colour_value=0.,
max_colour_value=1., orientation_string='vertical',
extend_min=False, extend_max=False)
print(colour_bar_object)
colour_bar_object.set_label('Peirce score')
random_x_coords = numpy.array([0., 1.])
random_y_coords = numpy.array([0., 1.])
axes_object.plot(
random_x_coords, random_y_coords, color=random_line_colour,
linestyle='dashed', linewidth=random_line_width)
nan_flags = numpy.logical_or(
numpy.isnan(pofd_by_threshold), numpy.isnan(pod_by_threshold)
)
if not numpy.all(nan_flags):
real_indices = numpy.where(numpy.invert(nan_flags))[0]
axes_object.plot(
pofd_by_threshold[real_indices], pod_by_threshold[real_indices],
color=line_colour, linestyle='solid', linewidth=line_width)
axes_object.set_xlabel('POFD (probability of false detection)')
axes_object.set_ylabel('POD (probability of detection)')
axes_object.set_xlim(0., 1.)
axes_object.set_ylim(0., 1.)
return pofd_by_threshold, pod_by_threshold