From c9fc42d6396a394c495ecd869f091fb68e9c312f Mon Sep 17 00:00:00 2001
From: Coda Phillips <cphillips@sean.ssec.wisc.edu>
Date: Mon, 11 Jul 2016 14:37:26 -0500
Subject: [PATCH] Factor zscore functions to util

---
 electronic_checks.py  | 18 +-----------------
 interpret_qc.py       | 20 ++++++++------------
 radiometric_checks.py | 24 +++++++++++++++++++++++-
 util.py               | 16 ++++++++++++++++
 4 files changed, 48 insertions(+), 30 deletions(-)

diff --git a/electronic_checks.py b/electronic_checks.py
index b954bbd..3bf31c1 100644
--- a/electronic_checks.py
+++ b/electronic_checks.py
@@ -1,4 +1,4 @@
-from util import BaseCheckList, annotate_all, invalidate_records, update_variable_qc
+from util import BaseCheckList, annotate_all, invalidate_records, update_variable_qc, _compute_robust_zscore, _compute_robust_rate_zscore
 import numpy as np
 import pandas as pd
 
@@ -52,22 +52,6 @@ def calibrationambienttemp_outlier_check(frame, parameters):
 
 class CheckList(BaseCheckList):
     checks = [ hbb_temp_outlier_check , abb_temp_outlier_check, calibrationambienttemp_outlier_check ]
-
-def _compute_robust_zscore(frame, window_length):
-    median_values = frame.rolling(window=window_length, center=True, min_periods=1).median()
-    # Compute the MAD
-    mad = abs(frame - median_values).median()
-    # standard deviation is proportional to median absolute deviation I'm told
-    robust_std = mad * 1.48
-    # compute the Mahalanobis distance from rolling median
-    return abs((frame - median_values) / robust_std)
-
-def _compute_robust_rate_zscore(frame, window_length=None):
-    time_diffs = pd.Series((frame.index.values[1:] - frame.index.values[:-1]).astype(np.int64),
-                           index=frame.index[1:])
-    changes = frame.diff() / time_diffs
-    mad_diff = abs(changes - changes.median()).median() * 1.48
-    return abs(frame.diff() / time_diffs)
     
 def _find_6sigma_outliers(frame, window_length, estimation_func=_compute_robust_zscore):
     # Find outliers with deviation greater than 6 sigma
diff --git a/interpret_qc.py b/interpret_qc.py
index 83d45e4..9b4cb04 100644
--- a/interpret_qc.py
+++ b/interpret_qc.py
@@ -36,18 +36,14 @@ def qc_day(qc_path):
     qc_frame_sum = (qc_frame > .95).sum(axis=0).to_string()
 
     plots = []
-
-    for qc_variable in ([
-        'ABBapexTemp',
-        'ABBtopTemp',
-        'ABBbottomTemp',
-        'HBBapexTemp',
-        'HBBtopTemp',
-        'HBBbottomTemp',
-        'calibrationAmbientTemp']):
-        plot = plot_variable_qc(frame, qc_variable)
-        if plot is not None:
-            plots.append(plot)
+    qc_variables = qc_frame.columns
+
+    for qc_variable in qc_variables:
+        if qc_variable.startswith('qc_') and qc_variable not in ['qc_notes','qc_percent']:
+            qc_variable = qc_variable.replace('qc_','')
+            plot = plot_variable_qc(frame, qc_variable)
+            if plot is not None:
+                plots.append(plot)
 
     return flask.render_template('qc.html', qc_path=qc_path, plots=plots, qc_frame=qc_frame_sum)
 
diff --git a/radiometric_checks.py b/radiometric_checks.py
index 34332f3..4c1347c 100644
--- a/radiometric_checks.py
+++ b/radiometric_checks.py
@@ -1,6 +1,15 @@
-from util import BaseCheckList
+from util import BaseCheckList, annotate_all, _compute_robust_zscore, invalidate_records, update_variable_qc
+import pandas as pd
+import numpy as np
 
 def imaginary_radiance_check(frame, parameters):
+    if 'skyViewImaginaryRadiance2510_2515' not in frame.columns:
+        return frame
+    threshold = parameters.get('imaginary_radiance_threshold', 1)
+    imaginary_radiance_problem = abs(frame.skyViewImaginaryRadiance2510_2515) > threshold
+    frame['imaginary_radiance_check'] = imaginary_radiance_problem * 1
+    annotate_all(frame, imaginary_radiance_problem, 'sky view imaginary radiance out of range')
+    frame = invalidate_records(frame, 'imaginary_radiance_check')
     return frame
 
 def hbb_radiance_check(frame, parameters):
@@ -9,6 +18,19 @@ def hbb_radiance_check(frame, parameters):
 
 def responsivity_check(frame, parameters):
     # lw, sw
+    if not np.in1d(['LWresponsivity','SWresponsivity'], frame.columns).all():
+        return frame
+    lw_zscore = _compute_robust_zscore(frame['LWresponsivity'], 50)
+    sw_zscore = _compute_robust_zscore(frame['SWresponsivity'], 50)
+    lw_problem = abs(lw_zscore) > 6
+    sw_problem = abs(sw_zscore) > 6
+    variable_qcs = pd.DataFrame({
+        'qc_LWresponsivity':lw_problem * 1,
+        'qc_SWresponsivity':sw_problem * 1
+    })
+    frame['responsivity_check'] = (lw_problem | sw_problem) * 1
+    frame = update_variable_qc(frame, variable_qcs)
+    frame = invalidate_records(frame, 'responsivity_check')
     return frame
 
 class CheckList(BaseCheckList):
diff --git a/util.py b/util.py
index cb8a8a6..e6ee40e 100644
--- a/util.py
+++ b/util.py
@@ -2,6 +2,22 @@ from itertools import takewhile
 import numpy as np
 import pandas as pd
 
+def _compute_robust_zscore(frame, window_length):
+    median_values = frame.rolling(window=window_length, center=True, min_periods=1).median()
+    # Compute the MAD
+    mad = abs(frame - median_values).median()
+    # standard deviation is proportional to median absolute deviation I'm told
+    robust_std = mad * 1.48
+    # compute the Mahalanobis distance from rolling median
+    return abs((frame - median_values) / robust_std)
+
+def _compute_robust_rate_zscore(frame, window_length=None):
+    time_diffs = pd.Series((frame.index.values[1:] - frame.index.values[:-1]).astype(np.int64),
+                           index=frame.index[1:])
+    changes = frame.diff() / time_diffs
+    mad_diff = abs(changes - changes.median()).median() * 1.48
+    return abs(frame.diff() / time_diffs)
+
 def annotate(frame, loc, annotation):
     notes = frame.loc[loc, 'qc_notes']
     if type(notes) == str and len(notes) > 0:
-- 
GitLab