Factor zscore functions to util

c9fc42d6 · Coda Phillips · b39fb73c · c9fc42d6 · c9fc42d6 · c9fc42d6
Commit c9fc42d6 authored 8 years ago by Coda Phillips
--- a/electronic_checks.py
+++ b/electronic_checks.py
-from util import BaseCheckList, annotate_all, invalidate_records, update_variable_qc
+from util import BaseCheckList, annotate_all, invalidate_records, update_variable_qc, _compute_robust_zscore, _compute_robust_rate_zscore
 import numpy as np
 import pandas as pd

@@ -52,22 +52,6 @@ def calibrationambienttemp_outlier_check(frame, parameters):

 class CheckList(BaseCheckList):
    checks = [ hbb_temp_outlier_check , abb_temp_outlier_check, calibrationambienttemp_outlier_check ]
-
-def _compute_robust_zscore(frame, window_length):
-    median_values = frame.rolling(window=window_length, center=True, min_periods=1).median()
-    # Compute the MAD
-    mad = abs(frame - median_values).median()
-    # standard deviation is proportional to median absolute deviation I'm told
-    robust_std = mad * 1.48
-    # compute the Mahalanobis distance from rolling median
-    return abs((frame - median_values) / robust_std)
-
-def _compute_robust_rate_zscore(frame, window_length=None):
-    time_diffs = pd.Series((frame.index.values[1:] - frame.index.values[:-1]).astype(np.int64),
-                           index=frame.index[1:])
-    changes = frame.diff() / time_diffs
-    mad_diff = abs(changes - changes.median()).median() * 1.48
-    return abs(frame.diff() / time_diffs)
    
 def _find_6sigma_outliers(frame, window_length, estimation_func=_compute_robust_zscore):
    # Find outliers with deviation greater than 6 sigma

--- a/interpret_qc.py
+++ b/interpret_qc.py
@@ -36,18 +36,14 @@ def qc_day(qc_path):
    qc_frame_sum = (qc_frame > .95).sum(axis=0).to_string()

    plots = []
-
-    for qc_variable in ([
-        'ABBapexTemp',
-        'ABBtopTemp',
-        'ABBbottomTemp',
-        'HBBapexTemp',
-        'HBBtopTemp',
-        'HBBbottomTemp',
-        'calibrationAmbientTemp']):
-        plot = plot_variable_qc(frame, qc_variable)
-        if plot is not None:
-            plots.append(plot)
+    qc_variables = qc_frame.columns
+
+    for qc_variable in qc_variables:
+        if qc_variable.startswith('qc_') and qc_variable not in ['qc_notes','qc_percent']:
+            qc_variable = qc_variable.replace('qc_','')
+            plot = plot_variable_qc(frame, qc_variable)
+            if plot is not None:
+                plots.append(plot)

    return flask.render_template('qc.html', qc_path=qc_path, plots=plots, qc_frame=qc_frame_sum)


--- a/radiometric_checks.py
+++ b/radiometric_checks.py
-from util import BaseCheckList
+from util import BaseCheckList, annotate_all, _compute_robust_zscore, invalidate_records, update_variable_qc
+import pandas as pd
+import numpy as np

 def imaginary_radiance_check(frame, parameters):
+    if 'skyViewImaginaryRadiance2510_2515' not in frame.columns:
+        return frame
+    threshold = parameters.get('imaginary_radiance_threshold', 1)
+    imaginary_radiance_problem = abs(frame.skyViewImaginaryRadiance2510_2515) > threshold
+    frame['imaginary_radiance_check'] = imaginary_radiance_problem * 1
+    annotate_all(frame, imaginary_radiance_problem, 'sky view imaginary radiance out of range')
+    frame = invalidate_records(frame, 'imaginary_radiance_check')
    return frame

 def hbb_radiance_check(frame, parameters):
@@ -9,6 +18,19 @@ def hbb_radiance_check(frame, parameters):

 def responsivity_check(frame, parameters):
    # lw, sw
+    if not np.in1d(['LWresponsivity','SWresponsivity'], frame.columns).all():
+        return frame
+    lw_zscore = _compute_robust_zscore(frame['LWresponsivity'], 50)
+    sw_zscore = _compute_robust_zscore(frame['SWresponsivity'], 50)
+    lw_problem = abs(lw_zscore) > 6
+    sw_problem = abs(sw_zscore) > 6
+    variable_qcs = pd.DataFrame({
+        'qc_LWresponsivity':lw_problem * 1,
+        'qc_SWresponsivity':sw_problem * 1
+    })
+    frame['responsivity_check'] = (lw_problem | sw_problem) * 1
+    frame = update_variable_qc(frame, variable_qcs)
+    frame = invalidate_records(frame, 'responsivity_check')
    return frame

 class CheckList(BaseCheckList):

--- a/util.py
+++ b/util.py
@@ -2,6 +2,22 @@ from itertools import takewhile
 import numpy as np
 import pandas as pd

+def _compute_robust_zscore(frame, window_length):
+    median_values = frame.rolling(window=window_length, center=True, min_periods=1).median()
+    # Compute the MAD
+    mad = abs(frame - median_values).median()
+    # standard deviation is proportional to median absolute deviation I'm told
+    robust_std = mad * 1.48
+    # compute the Mahalanobis distance from rolling median
+    return abs((frame - median_values) / robust_std)
+
+def _compute_robust_rate_zscore(frame, window_length=None):
+    time_diffs = pd.Series((frame.index.values[1:] - frame.index.values[:-1]).astype(np.int64),
+                           index=frame.index[1:])
+    changes = frame.diff() / time_diffs
+    mad_diff = abs(changes - changes.median()).median() * 1.48
+    return abs(frame.diff() / time_diffs)
+
 def annotate(frame, loc, annotation):
    notes = frame.loc[loc, 'qc_notes']
    if type(notes) == str and len(notes) > 0: