Skip to content
Snippets Groups Projects
Commit 6acaf5c2 authored by Coda Phillips's avatar Coda Phillips
Browse files

Update bb outlier detection algorithm

parent 6b0eb7ca
No related branches found
No related tags found
No related merge requests found
......@@ -6,11 +6,16 @@ def find_bb_outliers(frame, parameters, bb):
if not np.in1d(['{}bottomTemp'.format(bb),'{}apexTemp'.format(bb),'{}topTemp'.format(bb)], frame.columns).all():
return frame
window_length = parameters.get('window_length', 100)
window_length = parameters.get('window_length', 50)
bbb_outliers = _find_6sigma_outliers(frame['{}bottomTemp'.format(bb)], window_length)
bba_outliers = _find_6sigma_outliers(frame['{}apexTemp'.format(bb)], window_length)
bbt_outliers = _find_6sigma_outliers(frame['{}topTemp'.format(bb)], window_length)
if bb == 'HBB':
bbb_outliers = _find_6sigma_outliers(frame['{}bottomTemp'.format(bb)], window_length)
bba_outliers = _find_6sigma_outliers(frame['{}apexTemp'.format(bb)], window_length)
bbt_outliers = _find_6sigma_outliers(frame['{}topTemp'.format(bb)], window_length)
else:
bbb_outliers = _find_6sigma_outliers(frame['{}bottomTemp'.format(bb)], window_length, estimation_func=_compute_robust_rate_zscore)
bba_outliers = _find_6sigma_outliers(frame['{}apexTemp'.format(bb)], window_length, estimation_func=_compute_robust_rate_zscore)
bbt_outliers = _find_6sigma_outliers(frame['{}topTemp'.format(bb)], window_length, estimation_func=_compute_robust_rate_zscore)
variable_qcs = pd.DataFrame({
'qc_{}bottomTemp'.format(bb) : bbb_outliers * 1,
......@@ -37,9 +42,9 @@ def calibrationambienttemp_outlier_check(frame, parameters):
if 'calibrationAmbientTemp' not in frame.columns:
return frame
window_length = parameters.get('window_length', 100)
window_length = parameters.get('window_length', 50)
temp_outliers = _find_6sigma_outliers(frame['calibrationAmbientTemp'], window_length, use_mean=True)
temp_outliers = _find_6sigma_outliers(frame['calibrationAmbientTemp'], window_length)
frame['calibrationambienttemp_outlier_check'] = temp_outliers * 1
frame = update_variable_qc(frame, pd.DataFrame({'qc_calibrationAmbientTemp':temp_outliers*1}, index=frame.index))
annotate_all(frame, temp_outliers, 'calibrationAmbientTemp outlier')
......@@ -48,23 +53,25 @@ def calibrationambienttemp_outlier_check(frame, parameters):
class CheckList(BaseCheckList):
checks = [ hbb_temp_outlier_check , abb_temp_outlier_check, calibrationambienttemp_outlier_check ]
def _compute_robust_zscore(frame, window_length, use_mean=False):
use_mean = False
if use_mean:
robust_rolling_std = frame.rolling(window=window_length, center=True, min_periods=1).std()
return abs((frame - frame.rolling(window=window_length, center=True, min_periods=1).mean()) / robust_rolling_std)
else:
# Compute a centered rolling MAD over window_length
rolling_mad = abs(frame - frame.rolling(window=window_length, center=True, min_periods=1).median()
).rolling(window=window_length, center=True, min_periods=1).median()
# standard deviation is proportional to median absolute deviation I'm told
robust_rolling_std = rolling_mad * 1.48
return abs((frame - frame.rolling(window=window_length, center=True, min_periods=1).median()) / robust_rolling_std)
def _compute_robust_zscore(frame, window_length):
median_values = frame.rolling(window=window_length, center=True, min_periods=1).median()
# Compute the MAD
mad = abs(frame - median_values).median()
# standard deviation is proportional to median absolute deviation I'm told
robust_std = mad * 1.48
# compute the Mahalanobis distance from rolling median
return abs((frame - median_values) / robust_std)
def _compute_robust_rate_zscore(frame, window_length=None):
time_diffs = pd.Series((frame.index.values[1:] - frame.index.values[:-1]).astype(np.int64),
index=frame.index[1:])
changes = frame.diff() / time_diffs
mad_diff = abs(changes - changes.median()).median() * 1.48
return abs(frame.diff() / time_diffs)
def _find_6sigma_outliers(frame, window_length, use_mean=False):
def _find_6sigma_outliers(frame, window_length, estimation_func=_compute_robust_zscore):
# Find outliers with deviation greater than 6 sigma
outlier_mask = _compute_robust_zscore(frame, window_length, use_mean) > 6
outlier_mask = estimation_func(frame, window_length) > 6
return outlier_mask
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment