Skip to content
Snippets Groups Projects
Commit c9fc42d6 authored by Coda Phillips's avatar Coda Phillips
Browse files

Factor zscore functions to util

parent b39fb73c
No related branches found
No related tags found
No related merge requests found
from util import BaseCheckList, annotate_all, invalidate_records, update_variable_qc
from util import BaseCheckList, annotate_all, invalidate_records, update_variable_qc, _compute_robust_zscore, _compute_robust_rate_zscore
import numpy as np
import pandas as pd
......@@ -52,22 +52,6 @@ def calibrationambienttemp_outlier_check(frame, parameters):
class CheckList(BaseCheckList):
checks = [ hbb_temp_outlier_check , abb_temp_outlier_check, calibrationambienttemp_outlier_check ]
def _compute_robust_zscore(frame, window_length):
median_values = frame.rolling(window=window_length, center=True, min_periods=1).median()
# Compute the MAD
mad = abs(frame - median_values).median()
# standard deviation is proportional to median absolute deviation I'm told
robust_std = mad * 1.48
# compute the Mahalanobis distance from rolling median
return abs((frame - median_values) / robust_std)
def _compute_robust_rate_zscore(frame, window_length=None):
time_diffs = pd.Series((frame.index.values[1:] - frame.index.values[:-1]).astype(np.int64),
index=frame.index[1:])
changes = frame.diff() / time_diffs
mad_diff = abs(changes - changes.median()).median() * 1.48
return abs(frame.diff() / time_diffs)
def _find_6sigma_outliers(frame, window_length, estimation_func=_compute_robust_zscore):
# Find outliers with deviation greater than 6 sigma
......
......@@ -36,18 +36,14 @@ def qc_day(qc_path):
qc_frame_sum = (qc_frame > .95).sum(axis=0).to_string()
plots = []
for qc_variable in ([
'ABBapexTemp',
'ABBtopTemp',
'ABBbottomTemp',
'HBBapexTemp',
'HBBtopTemp',
'HBBbottomTemp',
'calibrationAmbientTemp']):
plot = plot_variable_qc(frame, qc_variable)
if plot is not None:
plots.append(plot)
qc_variables = qc_frame.columns
for qc_variable in qc_variables:
if qc_variable.startswith('qc_') and qc_variable not in ['qc_notes','qc_percent']:
qc_variable = qc_variable.replace('qc_','')
plot = plot_variable_qc(frame, qc_variable)
if plot is not None:
plots.append(plot)
return flask.render_template('qc.html', qc_path=qc_path, plots=plots, qc_frame=qc_frame_sum)
......
from util import BaseCheckList
from util import BaseCheckList, annotate_all, _compute_robust_zscore, invalidate_records, update_variable_qc
import pandas as pd
import numpy as np
def imaginary_radiance_check(frame, parameters):
if 'skyViewImaginaryRadiance2510_2515' not in frame.columns:
return frame
threshold = parameters.get('imaginary_radiance_threshold', 1)
imaginary_radiance_problem = abs(frame.skyViewImaginaryRadiance2510_2515) > threshold
frame['imaginary_radiance_check'] = imaginary_radiance_problem * 1
annotate_all(frame, imaginary_radiance_problem, 'sky view imaginary radiance out of range')
frame = invalidate_records(frame, 'imaginary_radiance_check')
return frame
def hbb_radiance_check(frame, parameters):
......@@ -9,6 +18,19 @@ def hbb_radiance_check(frame, parameters):
def responsivity_check(frame, parameters):
# lw, sw
if not np.in1d(['LWresponsivity','SWresponsivity'], frame.columns).all():
return frame
lw_zscore = _compute_robust_zscore(frame['LWresponsivity'], 50)
sw_zscore = _compute_robust_zscore(frame['SWresponsivity'], 50)
lw_problem = abs(lw_zscore) > 6
sw_problem = abs(sw_zscore) > 6
variable_qcs = pd.DataFrame({
'qc_LWresponsivity':lw_problem * 1,
'qc_SWresponsivity':sw_problem * 1
})
frame['responsivity_check'] = (lw_problem | sw_problem) * 1
frame = update_variable_qc(frame, variable_qcs)
frame = invalidate_records(frame, 'responsivity_check')
return frame
class CheckList(BaseCheckList):
......
......@@ -2,6 +2,22 @@ from itertools import takewhile
import numpy as np
import pandas as pd
def _compute_robust_zscore(frame, window_length):
median_values = frame.rolling(window=window_length, center=True, min_periods=1).median()
# Compute the MAD
mad = abs(frame - median_values).median()
# standard deviation is proportional to median absolute deviation I'm told
robust_std = mad * 1.48
# compute the Mahalanobis distance from rolling median
return abs((frame - median_values) / robust_std)
def _compute_robust_rate_zscore(frame, window_length=None):
time_diffs = pd.Series((frame.index.values[1:] - frame.index.values[:-1]).astype(np.int64),
index=frame.index[1:])
changes = frame.diff() / time_diffs
mad_diff = abs(changes - changes.median()).median() * 1.48
return abs(frame.diff() / time_diffs)
def annotate(frame, loc, annotation):
notes = frame.loc[loc, 'qc_notes']
if type(notes) == str and len(notes) > 0:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment