diff --git a/electronic_checks.py b/electronic_checks.py index b5b5bc1d0015655e29b8314a6de54cd45a4d6c6b..b954bbd09b1a3b9e5a0fc0c1377a9d1a23b2b2d0 100644 --- a/electronic_checks.py +++ b/electronic_checks.py @@ -6,11 +6,16 @@ def find_bb_outliers(frame, parameters, bb): if not np.in1d(['{}bottomTemp'.format(bb),'{}apexTemp'.format(bb),'{}topTemp'.format(bb)], frame.columns).all(): return frame - window_length = parameters.get('window_length', 100) + window_length = parameters.get('window_length', 50) - bbb_outliers = _find_6sigma_outliers(frame['{}bottomTemp'.format(bb)], window_length) - bba_outliers = _find_6sigma_outliers(frame['{}apexTemp'.format(bb)], window_length) - bbt_outliers = _find_6sigma_outliers(frame['{}topTemp'.format(bb)], window_length) + if bb == 'HBB': + bbb_outliers = _find_6sigma_outliers(frame['{}bottomTemp'.format(bb)], window_length) + bba_outliers = _find_6sigma_outliers(frame['{}apexTemp'.format(bb)], window_length) + bbt_outliers = _find_6sigma_outliers(frame['{}topTemp'.format(bb)], window_length) + else: + bbb_outliers = _find_6sigma_outliers(frame['{}bottomTemp'.format(bb)], window_length, estimation_func=_compute_robust_rate_zscore) + bba_outliers = _find_6sigma_outliers(frame['{}apexTemp'.format(bb)], window_length, estimation_func=_compute_robust_rate_zscore) + bbt_outliers = _find_6sigma_outliers(frame['{}topTemp'.format(bb)], window_length, estimation_func=_compute_robust_rate_zscore) variable_qcs = pd.DataFrame({ 'qc_{}bottomTemp'.format(bb) : bbb_outliers * 1, @@ -37,9 +42,9 @@ def calibrationambienttemp_outlier_check(frame, parameters): if 'calibrationAmbientTemp' not in frame.columns: return frame - window_length = parameters.get('window_length', 100) + window_length = parameters.get('window_length', 50) - temp_outliers = _find_6sigma_outliers(frame['calibrationAmbientTemp'], window_length, use_mean=True) + temp_outliers = _find_6sigma_outliers(frame['calibrationAmbientTemp'], window_length) frame['calibrationambienttemp_outlier_check'] = temp_outliers * 1 frame = update_variable_qc(frame, pd.DataFrame({'qc_calibrationAmbientTemp':temp_outliers*1}, index=frame.index)) annotate_all(frame, temp_outliers, 'calibrationAmbientTemp outlier') @@ -48,23 +53,25 @@ def calibrationambienttemp_outlier_check(frame, parameters): class CheckList(BaseCheckList): checks = [ hbb_temp_outlier_check , abb_temp_outlier_check, calibrationambienttemp_outlier_check ] -def _compute_robust_zscore(frame, window_length, use_mean=False): - use_mean = False - - if use_mean: - robust_rolling_std = frame.rolling(window=window_length, center=True, min_periods=1).std() - return abs((frame - frame.rolling(window=window_length, center=True, min_periods=1).mean()) / robust_rolling_std) - else: - # Compute a centered rolling MAD over window_length - rolling_mad = abs(frame - frame.rolling(window=window_length, center=True, min_periods=1).median() - ).rolling(window=window_length, center=True, min_periods=1).median() - # standard deviation is proportional to median absolute deviation I'm told - robust_rolling_std = rolling_mad * 1.48 - return abs((frame - frame.rolling(window=window_length, center=True, min_periods=1).median()) / robust_rolling_std) +def _compute_robust_zscore(frame, window_length): + median_values = frame.rolling(window=window_length, center=True, min_periods=1).median() + # Compute the MAD + mad = abs(frame - median_values).median() + # standard deviation is proportional to median absolute deviation I'm told + robust_std = mad * 1.48 + # compute the Mahalanobis distance from rolling median + return abs((frame - median_values) / robust_std) + +def _compute_robust_rate_zscore(frame, window_length=None): + time_diffs = pd.Series((frame.index.values[1:] - frame.index.values[:-1]).astype(np.int64), + index=frame.index[1:]) + changes = frame.diff() / time_diffs + mad_diff = abs(changes - changes.median()).median() * 1.48 + return abs(frame.diff() / time_diffs) -def _find_6sigma_outliers(frame, window_length, use_mean=False): +def _find_6sigma_outliers(frame, window_length, estimation_func=_compute_robust_zscore): # Find outliers with deviation greater than 6 sigma - outlier_mask = _compute_robust_zscore(frame, window_length, use_mean) > 6 + outlier_mask = estimation_func(frame, window_length) > 6 return outlier_mask