From c9fc42d6396a394c495ecd869f091fb68e9c312f Mon Sep 17 00:00:00 2001 From: Coda Phillips <cphillips@sean.ssec.wisc.edu> Date: Mon, 11 Jul 2016 14:37:26 -0500 Subject: [PATCH] Factor zscore functions to util --- electronic_checks.py | 18 +----------------- interpret_qc.py | 20 ++++++++------------ radiometric_checks.py | 24 +++++++++++++++++++++++- util.py | 16 ++++++++++++++++ 4 files changed, 48 insertions(+), 30 deletions(-) diff --git a/electronic_checks.py b/electronic_checks.py index b954bbd..3bf31c1 100644 --- a/electronic_checks.py +++ b/electronic_checks.py @@ -1,4 +1,4 @@ -from util import BaseCheckList, annotate_all, invalidate_records, update_variable_qc +from util import BaseCheckList, annotate_all, invalidate_records, update_variable_qc, _compute_robust_zscore, _compute_robust_rate_zscore import numpy as np import pandas as pd @@ -52,22 +52,6 @@ def calibrationambienttemp_outlier_check(frame, parameters): class CheckList(BaseCheckList): checks = [ hbb_temp_outlier_check , abb_temp_outlier_check, calibrationambienttemp_outlier_check ] - -def _compute_robust_zscore(frame, window_length): - median_values = frame.rolling(window=window_length, center=True, min_periods=1).median() - # Compute the MAD - mad = abs(frame - median_values).median() - # standard deviation is proportional to median absolute deviation I'm told - robust_std = mad * 1.48 - # compute the Mahalanobis distance from rolling median - return abs((frame - median_values) / robust_std) - -def _compute_robust_rate_zscore(frame, window_length=None): - time_diffs = pd.Series((frame.index.values[1:] - frame.index.values[:-1]).astype(np.int64), - index=frame.index[1:]) - changes = frame.diff() / time_diffs - mad_diff = abs(changes - changes.median()).median() * 1.48 - return abs(frame.diff() / time_diffs) def _find_6sigma_outliers(frame, window_length, estimation_func=_compute_robust_zscore): # Find outliers with deviation greater than 6 sigma diff --git a/interpret_qc.py b/interpret_qc.py index 83d45e4..9b4cb04 100644 --- a/interpret_qc.py +++ b/interpret_qc.py @@ -36,18 +36,14 @@ def qc_day(qc_path): qc_frame_sum = (qc_frame > .95).sum(axis=0).to_string() plots = [] - - for qc_variable in ([ - 'ABBapexTemp', - 'ABBtopTemp', - 'ABBbottomTemp', - 'HBBapexTemp', - 'HBBtopTemp', - 'HBBbottomTemp', - 'calibrationAmbientTemp']): - plot = plot_variable_qc(frame, qc_variable) - if plot is not None: - plots.append(plot) + qc_variables = qc_frame.columns + + for qc_variable in qc_variables: + if qc_variable.startswith('qc_') and qc_variable not in ['qc_notes','qc_percent']: + qc_variable = qc_variable.replace('qc_','') + plot = plot_variable_qc(frame, qc_variable) + if plot is not None: + plots.append(plot) return flask.render_template('qc.html', qc_path=qc_path, plots=plots, qc_frame=qc_frame_sum) diff --git a/radiometric_checks.py b/radiometric_checks.py index 34332f3..4c1347c 100644 --- a/radiometric_checks.py +++ b/radiometric_checks.py @@ -1,6 +1,15 @@ -from util import BaseCheckList +from util import BaseCheckList, annotate_all, _compute_robust_zscore, invalidate_records, update_variable_qc +import pandas as pd +import numpy as np def imaginary_radiance_check(frame, parameters): + if 'skyViewImaginaryRadiance2510_2515' not in frame.columns: + return frame + threshold = parameters.get('imaginary_radiance_threshold', 1) + imaginary_radiance_problem = abs(frame.skyViewImaginaryRadiance2510_2515) > threshold + frame['imaginary_radiance_check'] = imaginary_radiance_problem * 1 + annotate_all(frame, imaginary_radiance_problem, 'sky view imaginary radiance out of range') + frame = invalidate_records(frame, 'imaginary_radiance_check') return frame def hbb_radiance_check(frame, parameters): @@ -9,6 +18,19 @@ def hbb_radiance_check(frame, parameters): def responsivity_check(frame, parameters): # lw, sw + if not np.in1d(['LWresponsivity','SWresponsivity'], frame.columns).all(): + return frame + lw_zscore = _compute_robust_zscore(frame['LWresponsivity'], 50) + sw_zscore = _compute_robust_zscore(frame['SWresponsivity'], 50) + lw_problem = abs(lw_zscore) > 6 + sw_problem = abs(sw_zscore) > 6 + variable_qcs = pd.DataFrame({ + 'qc_LWresponsivity':lw_problem * 1, + 'qc_SWresponsivity':sw_problem * 1 + }) + frame['responsivity_check'] = (lw_problem | sw_problem) * 1 + frame = update_variable_qc(frame, variable_qcs) + frame = invalidate_records(frame, 'responsivity_check') return frame class CheckList(BaseCheckList): diff --git a/util.py b/util.py index cb8a8a6..e6ee40e 100644 --- a/util.py +++ b/util.py @@ -2,6 +2,22 @@ from itertools import takewhile import numpy as np import pandas as pd +def _compute_robust_zscore(frame, window_length): + median_values = frame.rolling(window=window_length, center=True, min_periods=1).median() + # Compute the MAD + mad = abs(frame - median_values).median() + # standard deviation is proportional to median absolute deviation I'm told + robust_std = mad * 1.48 + # compute the Mahalanobis distance from rolling median + return abs((frame - median_values) / robust_std) + +def _compute_robust_rate_zscore(frame, window_length=None): + time_diffs = pd.Series((frame.index.values[1:] - frame.index.values[:-1]).astype(np.int64), + index=frame.index[1:]) + changes = frame.diff() / time_diffs + mad_diff = abs(changes - changes.median()).median() * 1.48 + return abs(frame.diff() / time_diffs) + def annotate(frame, loc, annotation): notes = frame.loc[loc, 'qc_notes'] if type(notes) == str and len(notes) > 0: -- GitLab