From d938d6960a6b715aaacb8a5de3978a6ec0d29f38 Mon Sep 17 00:00:00 2001 From: Coda Phillips <cphillips@sean.ssec.wisc.edu> Date: Thu, 25 Aug 2016 09:28:18 -0500 Subject: [PATCH] Big refactor --- electronic_checks.py | 62 ++++-------------- global_checks.py | 12 +--- main.py | 46 ++++---------- radiometric_checks.py | 57 ++++++----------- scene_checks.py | 60 ++---------------- state_checks.py | 142 ++++++------------------------------------ thermal_checks.py | 12 +--- util.py | 46 -------------- 8 files changed, 77 insertions(+), 360 deletions(-) diff --git a/electronic_checks.py b/electronic_checks.py index b41d1f8..748b650 100644 --- a/electronic_checks.py +++ b/electronic_checks.py @@ -1,9 +1,19 @@ -from util import BaseCheckList, annotate_all, invalidate_records, update_variable_qc, _compute_robust_zscore, _compute_robust_rate_zscore import numpy as np import pandas as pd import itertools +from all_checks import checklist -electronic_checks = BaseCheckList() +######### CHECKS ########### +@checklist.add_check(depends=['datetime','HBBapexTemp','HBBbottomTemp','HBBtopTemp' ], affects_calibration=True, description='outlier in HBB temperature discovered') +def hbb_temp_outlier_check(frame, parameters): + return find_bb_outliers(frame, parameters, 'HBB') + +@checklist.add_check(depends=['datetime','ABBapexTemp','ABBbottomTemp','ABBtopTemp' ], affects_calibration=True, description='outlier in ABB temperature discovered') +def abb_temp_outlier_check(frame, parameters): + return find_bb_outliers(frame, parameters, 'ABB') + + +###### UTILTIY ############# def find_bb_outliers(frame, parameters, bb): """ @@ -20,38 +30,7 @@ def find_bb_outliers(frame, parameters, bb): bba_outliers = np.array(list(_scan_for_outliers(frame, '{}apexTemp'.format(bb), delta_thresh))) bbt_outliers = np.array(list(_scan_for_outliers(frame, '{}topTemp'.format(bb), delta_thresh))) - variable_qcs = pd.DataFrame({ - 'qc_{}bottomTemp'.format(bb) : bbb_outliers * 1, - 'qc_{}topTemp'.format(bb) : bbt_outliers * 1, - 'qc_{}apexTemp'.format(bb) : bba_outliers * 1 - }, index=frame.index) - frame = update_variable_qc(frame, variable_qcs) - - annotate_all(frame, bbb_outliers, '{} bottom temperature outlier'.format(bb)) - annotate_all(frame, bba_outliers, '{} apex temperature outlier'.format(bb)) - annotate_all(frame, bbt_outliers, '{} top temperature outlier'.format(bb)) - - frame['{}_temp_outlier_check'.format(bb.lower())] = ( bbb_outliers | bba_outliers | bbt_outliers ) * 1 - frame = invalidate_records(frame, '{}_temp_outlier_check'.format(bb.lower())) - return frame - -@electronic_checks.check( - depends=[ 'HBBapexTemp', 'HBBbottomTemp', 'HBBtopTemp' ], - updates=['HBBbottomTemp', 'HBBapexTemp','HBBtopTemp'] -) -def hbb_temp_outlier_check(frame, parameters): - return find_bb_outliers(frame, parameters, 'HBB') - -@electronic_checks.check( - depends=[ 'ABBapexTemp', 'ABBbottomTemp', 'ABBtopTemp' ], - updates=['ABBbottomTemp', 'ABBapexTemp','ABBtopTemp'] -) -def abb_temp_outlier_check(frame, parameters): - return find_bb_outliers(frame, parameters, 'ABB') - -def _find_6sigma_outliers(frame, window_length, estimation_func=_compute_robust_zscore): - # Find outliers with deviation greater than 6 sigma - return estimation_func(frame, window_length) > 6 + return ( bbb_outliers | bba_outliers | bbt_outliers ) def _scan_for_outliers(frame, variable, delta_thresh): """ @@ -72,18 +51,3 @@ def _scan_for_outliers(frame, variable, delta_thresh): last = row else: yield True - - -#### TESTS #### - -#def test_hbb_temp_outlier_check(): - #frame = pd.DataFrame({ - #'HBBapexTemp':[0,1,10,1], - #'HBBbottomTemp':[1,1,1,1], - #'HBBtopTemp':[0,1,10,1], - #'qc_notes':'', - #'sceneMirrorPosition':[ord(x) for x in 'HASA'] - #}) - #assert hbb_temp_outlier_check(frame, {})['hbb_temp_outlier_check'].values.tolist() == [0,0,1,0] -# - diff --git a/global_checks.py b/global_checks.py index aec762a..b20b64c 100644 --- a/global_checks.py +++ b/global_checks.py @@ -1,14 +1,8 @@ -from util import BaseCheckList, annotate_all import numpy as np import pandas as pd +from all_checks import checklist -global_checks = BaseCheckList() - -@global_checks.check(depends=['missingDataFlag']) +checklist.add_check(depends=['missingDataFlag'], affects_calibration=True, description='record is missing') def missing_data_flag_check(frame, parameters): - missing_records = set(np.arange(frame.sum_index.max())).difference(frame.sum_index) - - frame = pd.concat([frame, - pd.DataFrame({'sum_index':list(missing_records), 'datetime':pd.NaT, 'missing_data_flag_check':1.0, 'qc_notes':'missing data'}).set_index('datetime')]) - return frame + return ~np.isnan(frame['missingDataFlag']) & (frame['missingDataFlag'] == 1) diff --git a/main.py b/main.py index eb35615..01c137c 100644 --- a/main.py +++ b/main.py @@ -9,25 +9,8 @@ from zipfile import ZipFile from io import BytesIO from tempfile import mkstemp from shutil import move - -from electronic_checks import electronic_checks -from global_checks import global_checks -from radiometric_checks import radiometric_checks -from scene_checks import scene_checks -from state_checks import state_checks -from thermal_checks import thermal_checks -from bomem_file import read_stream -from igm_checks import spike_check from datetime import datetime - -levels = [ - global_checks, - scene_checks, - state_checks, - electronic_checks, - radiometric_checks, - thermal_checks -] +from igm_checks import spike_check def save_quality(frame, qc_path): """ @@ -46,12 +29,11 @@ def save_quality(frame, qc_path): qc_notes = ncdf.createVariable('qc_notes', str, ('time',)) # Write the columns ending in _check (aggregate tests) - for check_mask in frame.filter(like='_check'): - ncdf.createVariable(check_mask, 'f4', ('time',))[:] = frame[check_mask].values - # Write the columns starting with qc_ (tests applied directly to variables) - for variable_qc in frame.filter(like='qc_'): - if variable_qc not in ['qc_notes','qc_percent']: - ncdf.createVariable(variable_qc, 'f4', ('time',))[:] = frame[variable_qc].values + for check in checklist.checks: + variable = ncdf.createVariable(check.name, 'f4', ('time',)) + variable.depends = ','.join(check.depends) + variable.description = check.description + variable[:] = frame[check.name].values # Write time information base_time[:] = frame.datetime.dropna().iloc[0].to_datetime64() @@ -111,20 +93,16 @@ def read_igms(spc_zip_path): 'subfile':index } -def check_frame(frame, parameters): +def check_frame(frame, parameters, checklist): """ Start with housekeeping DataFrame and iteratively run checks to compute quality """ frame['qc_percent'] = 0 frame['qc_notes'] = None - for check_column in frame.filter(like='_check').columns: - frame['qc_percent'] = frame['qc_percent'] + frame[check_column].fillna(0) - frame['qc_percent'] * frame[check_column].fillna(0) - for level in levels: - level.set_params(parameters) - frame = level.compute(frame) + frame = checklist.check_everything(frame, parameters) return frame -def update_all(ftp_dir, sci_dir, parameters=None): +def update_all(ftp_dir, sci_dir, checklist, parameters=None): """ Given the root directories for ftp and sci, find all days lacking an up-to-date qc file and generate new qc """ @@ -157,7 +135,7 @@ def update_all(ftp_dir, sci_dir, parameters=None): # Reindex back to housekeeping frame (union of sum and cxs records), removing interleaved spike data frame_with_spikes = frame_with_spikes.ix[frame.index] # Perform qc on housekeeping frame - frame_with_spikes = check_frame(frame_with_spikes, parameters) + frame_with_spikes = check_frame(frame_with_spikes, parameters, checklist) save_quality(frame_with_spikes, qc_file) @@ -200,4 +178,6 @@ if __name__ == '__main__': args = parser.parse_args() - update_all(args.ftp, args.sci) + from all_checks import checklist + + update_all(args.ftp, args.sci, checklist) diff --git a/radiometric_checks.py b/radiometric_checks.py index fea3741..f55056b 100644 --- a/radiometric_checks.py +++ b/radiometric_checks.py @@ -1,56 +1,39 @@ -from util import BaseCheckList, annotate_all, _compute_robust_zscore, invalidate_records, update_variable_qc +from util import _compute_robust_zscore import pandas as pd import numpy as np import scipy.stats +from all_checks import checklist -radiometric_checks = BaseCheckList() - -@radiometric_checks.check(depends=['skyViewImaginaryRadiance2510_2515']) +@checklist.add_check(depends=['skyViewImaginaryRadiance2510_2515'], affects_calibration=False, description='sky view imaginary radiance is out of range') def imaginary_radiance_check(frame, parameters): threshold = parameters.get('imaginary_radiance_threshold', 1) - imaginary_radiance_problem = abs(frame.skyViewImaginaryRadiance2510_2515) > threshold - frame['imaginary_radiance_check'] = imaginary_radiance_problem * 1 - annotate_all(frame, imaginary_radiance_problem, 'sky view imaginary radiance out of range') - frame = invalidate_records(frame, 'imaginary_radiance_check') - return frame + return abs(frame.skyViewImaginaryRadiance2510_2515) > threshold -@radiometric_checks.check(depends=['HBBviewStdDevRadiance985_990','LW_HBB_NEN','SW_HBB_NEN']) -def hbb_radiance_check(frame, parameters): - # Std dev, nen, lw, sw +@checklist.add_check(depends=['HBBviewStdDevRadiance985_990'], affects_calibration=True, description='HBB radiance has too much variance') +def hbb_std_dev_check(frame, parameters): hbb_std_dist = scipy.stats.chi2.fit(frame.HBBviewStdDevRadiance985_990) _, hbb_std_dist_hi = scipy.stats.chi2.interval(.995, *hbb_std_dist) - hbb_std_dist_problem = frame.HBBviewStdDevRadiance985_990 > hbb_std_dist_hi + return frame.HBBviewStdDevRadiance985_990 > hbb_std_dist_hi +@checklist.add_check(depends=['LW_HBB_NEN'], affects_calibration=True, description='Longwave HBB radiance NEN too high') +def hbb_lw_nen_check(frame, parameters): lw_hbb_nen_dist = scipy.stats.chi2.fit(frame.LW_HBB_NEN) _, lw_hbb_nen_hi = scipy.stats.chi2.interval(.995, *lw_hbb_nen_dist) - lw_hbb_nen_problem = frame.LW_HBB_NEN > lw_hbb_nen_hi + return frame.LW_HBB_NEN > lw_hbb_nen_hi +@checklist.add_check(depends=['SW_HBB_NEN'], affects_calibration=True, description='Shortwave HBB radiance NEN too high') +def hbb_sw_nen_check(frame, parameters): sw_hbb_nen_dist = scipy.stats.chi2.fit(frame.SW_HBB_NEN) _, sw_hbb_nen_hi = scipy.stats.chi2.interval(.995, *sw_hbb_nen_dist) - sw_hbb_nen_problem = frame.SW_HBB_NEN > sw_hbb_nen_hi - - frame['hbb_radiance_check'] = (hbb_std_dist_problem | lw_hbb_nen_problem | sw_hbb_nen_problem) * 1 - annotate_all(frame, hbb_std_dist_problem, 'HBB radiance Std.Dev. too high') - annotate_all(frame, lw_hbb_nen_problem, 'LW HBB NEN too high') - annotate_all(frame, sw_hbb_nen_problem, 'SW HBB NEN too high') - frame = invalidate_records(frame, 'hbb_radiance_check') - return frame + return frame.SW_HBB_NEN > sw_hbb_nen_hi -@radiometric_checks.check(depends=['LWresponsivity','SWresponsivity'], updates=['LWresponsivity', 'SWresponsivity']) -def responsivity_check(frame, parameters): - # lw, sw +@checklist.add_check(depends=['LWresponsivity'], affects_calibration=False, description='Longwave responsivity outlier') +def lw_responsivity_check(frame, parameters): lw_zscore = _compute_robust_zscore(frame['LWresponsivity'], 50) + return abs(lw_zscore) > 6 + +@checklist.add_check(depends=['SWresponsivity'], affects_calibration=False, description='Shortwave responsivity outlier') +def sw_responsivity_check(frame, parameters): sw_zscore = _compute_robust_zscore(frame['SWresponsivity'], 50) - lw_problem = abs(lw_zscore) > 6 - sw_problem = abs(sw_zscore) > 6 - variable_qcs = pd.DataFrame({ - 'qc_LWresponsivity':lw_problem * 1, - 'qc_SWresponsivity':sw_problem * 1 - }) - frame['responsivity_check'] = (lw_problem | sw_problem) * 1 - frame = update_variable_qc(frame, variable_qcs) - annotate_all(frame, lw_problem, 'LW responsivity outlier') - annotate_all(frame, sw_problem, 'SW responsivity outlier') - frame = invalidate_records(frame, 'responsivity_check') - return frame + return abs(sw_zscore) > 6 diff --git a/scene_checks.py b/scene_checks.py index 5353b96..cd8e3e1 100644 --- a/scene_checks.py +++ b/scene_checks.py @@ -1,73 +1,23 @@ -from util import BaseCheckList, invalidate_records, annotate_all import pandas as pd import numpy as np +from all_checks import checklist -scene_checks = BaseCheckList() - -@scene_checks.check(depends=['hatchOpen','sceneMirrorPosition']) +@checklist.add_check(depends=['hatchOpen','sceneMirrorPosition'], affects_calibration=False, description='hatch is closed') def hatch_check(frame, parameters): """ Check that the hatch is open on sky views """ - hatch_closed_during_viewing = ((frame.hatchOpen != 1) & - (~frame.sceneMirrorPosition.isin([ord('H'), ord('A')]))) - - frame['hatch_check'] = hatch_closed_during_viewing * 1 - annotate_all(frame, hatch_closed_during_viewing, 'hatch closed') - - return frame + return ((frame.hatchOpen != 1) & (~frame.sceneMirrorPosition.isin([ord('H'), ord('A')]))) -@scene_checks.check(depends=['hatchOpen','sceneMirrorPosition']) +@checklist.add_check(depends=['hatchOpen','sceneMirrorPosition'], affects_calibration=True, description='hatch moving during calibration view, mirror might have safed') def safing_check(frame, parameters): """ Check that the mirror doesn't safe during a calibration view and contaminate other records """ hatch_closing = (frame.hatchOpen == -3) - mirror_safing = (hatch_closing & frame.sceneMirrorPosition.isin([ord('H'), ord('A')])) - frame['safing_check'] = mirror_safing * 1 - annotate_all(frame, mirror_safing, 'mirror likely safed during view') - frame = invalidate_records(frame, 'safing_check') - - return frame + return hatch_closing & frame.sceneMirrorPosition.isin([ord('H'), ord('A')]) def encoder_check(frame, parameters): return frame - - - -#### TESTS ##### - -def test_hatch_check(): - frame = pd.DataFrame({ - 'hatchOpen':[1,1,0], - 'sceneMirrorPosition':[ord('H'), ord('A'), ord('S')], - 'qc_notes':'' - }) - assert hatch_check(frame, {})['hatch_check'].values.tolist() == [0,0,1] - - frame = pd.DataFrame({ - 'hatchOpen':[1,0,1], - 'sceneMirrorPosition':[ord('H'), ord('A'), ord('S')], - 'qc_notes':'' - }) - assert hatch_check(frame, {})['hatch_check'].values.tolist() == [0,0,0] - - -def test_safing_check(): - frame = pd.DataFrame({ - 'hatchOpen':[0,0,0], - 'sceneMirrorPosition':[ord('H'), ord('A'), ord('S')], - 'qc_notes':'' - }) - assert safing_check(frame, {})['safing_check'].values.tolist() == [0,0,0] - - frame = pd.DataFrame({ - 'hatchOpen':[1,-3,0,0], - 'sceneMirrorPosition':[ord('S'), ord('H'), ord('A'), ord('S')], - 'qc_notes':'' - }) - assert safing_check(frame, {})['safing_check'].values.tolist() == [1,1,0,1] - - diff --git a/state_checks.py b/state_checks.py index 25cb9ce..c6a42df 100644 --- a/state_checks.py +++ b/state_checks.py @@ -1,35 +1,36 @@ -from util import BaseCheckList, invalidate_records, annotate_all, update_variable_qc import pandas as pd import numpy as np +from all_checks import checklist -state_checks = BaseCheckList() - -@state_checks.check(depends=['detectorTemp'], updates=['detectorTemp']) +@checklist.add_check(depends=['detectorTemp'], affects_calibration=True, description='detector temperature too high') def detector_check(frame, parameters): """ Check that the detector temp is in range """ - detector_temp_too_high = (frame['detectorTemp'] > 90) - frame['detector_check'] = detector_temp_too_high * 1 - frame['qc_detectorTemp'] = detector_temp_too_high * 1 - annotate_all(frame, detector_temp_too_high, 'detector temperature too high') - frame = invalidate_records(frame, 'detector_check') - return frame + return frame['detectorTemp'] > 90 -@state_checks.check( - depends=['HBBapexTemp','HBBbottomTemp','HBBtopTemp'], - updates=['qc_HBBapexTemp','qc_HBBbottomTemp','qc_HBBtopTemp'] -) +@checklist.add_check(depends=['HBBapexTemp','HBBbottomTemp','HBBtopTemp'], affects_calibration=True, description='HBB thermistors outside range') def hbb_thermistor_check(frame, parameters): return thermistor_check(frame, 'HBB', 331, 335) -@state_checks.check( - depends=['ABBapexTemp','ABBbottomTemp','ABBtopTemp'], - updates=['qc_ABBapexTemp','qc_ABBbottomTemp','qc_ABBtopTemp'] -) +@checklist.add_check(depends=['ABBapexTemp','ABBbottomTemp','ABBtopTemp'], affects_calibration=True, description='ABB thermistors outside range') def abb_thermistor_check(frame, parameters): return thermistor_check(frame, 'ABB', 150, 335) +@checklist.add_check(depends=['datetime', 'HBBbottomTemp','HBBtopTemp','HBBapexTemp'], affects_calibration=True, description='HBB temperature is changing too quickly') +def hbb_stable_check(frame, parameters): + interval_seconds = frame['datetime'].diff().astype(np.int64) / 1e9 + hbbb_diff = frame['HBBbottomTemp'].diff() / interval_seconds + hbba_diff = frame['HBBapexTemp'].diff() / interval_seconds + hbbt_diff = frame['HBBtopTemp'].diff() / interval_seconds + hsr = parameters.get('hbb_stable_rate', .002) + hbbb_diff_problem = abs(hbbb_diff.fillna(0)) > hsr + hbba_diff_problem = abs(hbba_diff.fillna(0)) > hsr + hbbt_diff_problem = abs(hbbt_diff.fillna(0)) > hsr + + return (hbbb_diff_problem | hbbt_diff_problem | hbbt_diff_problem) + + def thermistor_check(frame, bb, low, high): """ Check that all thermistor temps are in range @@ -44,108 +45,5 @@ def thermistor_check(frame, bb, low, high): bba_problem = bba_too_low | bba_too_high bbt_problem = bbt_too_low | bbt_too_high - # Record qc for each thermistor - # qc variables are probabilites between 0 and 1 - variable_qcs = pd.DataFrame({ - 'qc_{}bottomTemp'.format(bb) :bbb_problem * 1, - 'qc_{}apexTemp'.format(bb) : bba_problem * 1, - 'qc_{}topTemp'.format(bb) : bbt_problem * 1 - }, index=frame.index) - frame = update_variable_qc(frame, variable_qcs) - - # Compute overall BB quality - frame['{}_thermistor_check'.format(bb.lower())] = (bbb_problem | bba_problem | bbt_problem) * 1 - annotate_all(frame, bbb_too_low, '{} bottom temperature too low'.format(bb)) - annotate_all(frame, bbt_too_low, '{} top temperature too low'.format(bb)) - annotate_all(frame, bba_too_low, '{} apex temperature too low'.format(bb)) - annotate_all(frame, bbb_too_high, '{} bottom temperature too high'.format(bb)) - annotate_all(frame, bbt_too_high, '{} top temperature too high'.format(bb)) - annotate_all(frame, bba_too_high, '{} apex temperature too high'.format(bb)) - frame = invalidate_records(frame, '{}_thermistor_check'.format(bb.lower())) - return frame - -def hbb_stable_check(frame, parameters): - interval_seconds = frame['datetime'].diff().astype(np.int64) / 1e9 - hbbb_diff = frame['HBBbottomTemp'].diff() / interval_seconds - hbba_diff = frame['HBBapexTemp'].diff() / interval_seconds - hbbt_diff = frame['HBBtopTemp'].diff() / interval_seconds - hsr = parameters.get('hbb_stable_rate', .002) - hbbb_diff_problem = abs(hbbb_diff.fillna(0)) > hsr - hbba_diff_problem = abs(hbba_diff.fillna(0)) > hsr - hbbt_diff_problem = abs(hbbt_diff.fillna(0)) > hsr - - variable_qcs = pd.DataFrame({ - 'qc_HBBbottomTemp':hbbb_diff_problem * 1, - 'qc_HBBapexTemp' : hbba_diff_problem * 1, - 'qc_HBBtopTemp' : hbbt_diff_problem * 1 - }, index=frame.index) - frame = update_variable_qc(frame, variable_qcs) - - frame['hbb_stable_check'] = (hbbb_diff_problem | hbbt_diff_problem | hbbt_diff_problem) * 1 - annotate_all(frame, hbbb_diff_problem, 'HBB bottom temperature not stable') - annotate_all(frame, hbba_diff_problem, 'HBB apex temperature not stable') - annotate_all(frame, hbbt_diff_problem, 'HBB top temperature not stable') - frame = invalidate_records(frame, 'hbb_stable_check') - return frame - - - -#### TESTS #### - -def test_hbb_stable_check(): - # Check with rates of .0016 - dummy_data = pd.DataFrame({ - 'HBBbottomTemp':np.arange(0,2,.1), - 'HBBapexTemp':np.arange(0,2,.1), - 'HBBtopTemp':np.arange(0,2,.1), - 'datetime':pd.date_range('1/1/2000', periods=20, freq='60s'), - 'qc_notes':'', - 'qc_percent':0, - 'sceneMirrorPosition':ord('S') - }) - frame = hbb_stable_check( - dummy_data, {'hbb_stable_rate':.002} - ) - assert all(frame['hbb_stable_check'] == 0) - - dummy_data.ix[10:,'HBBbottomTemp'] = np.arange(1,3,.2) - dummy_data.ix[10:,'HBBtopTemp'] = np.arange(1,3,.2) - dummy_data.ix[10:,'HBBbottomTemp'] = np.arange(1,3,.2) - - frame = hbb_stable_check( - dummy_data, {'hbb_stable_rate':.002} - ) - assert all(frame.ix[:10, 'hbb_stable_check'] == 0) - assert all(frame.ix[11:, 'hbb_stable_check'] == 1) - -def test_hbb_thermistor_check(): - frame = hbb_thermistor_check(pd.DataFrame({ - 'HBBbottomTemp':[300,333,336], - 'HBBapexTemp':[300,333,336], - 'HBBtopTemp':[300,333,336], - 'sceneMirrorPosition':[ord('H'), ord('A'), ord('S')], - 'qc_notes':'' - }), {}) - assert all(frame['hbb_thermistor_check'] == [1,0,1]) - assert all('HBB {} temperature too low'.format(x) in frame.iloc[0].qc_notes.split(',') for x in ['bottom','apex','top']) - assert all('HBB {} temperature too high'.format(x) in frame.iloc[2].qc_notes.split(',') for x in ['bottom','apex','top']) - -def test_hbb_thermistor_check2(): - frame = hbb_thermistor_check(pd.DataFrame({ - 'HBBbottomTemp':[300,333,333], - 'HBBapexTemp':[300,333,333], - 'HBBtopTemp':[300,333,333], - 'sceneMirrorPosition':[ord('H'), ord('A'), ord('S')], - 'qc_notes':'' - }), {}) - assert all(frame['hbb_thermistor_check'] == [1,0,1]) - -def test_detector_check(): - frame = detector_check(pd.DataFrame({ - 'detectorTemp':[50,100], - 'sceneMirrorPosition':[ord('H'), ord('A')], - 'qc_notes':'' - }), {}) - assert all(frame['detector_check'] == [0,1]) - assert frame.iloc[1].qc_notes == 'detector temperature too high' + return (bbb_problem | bba_problem | bbt_problem) diff --git a/thermal_checks.py b/thermal_checks.py index 8ba6ebf..fe33e8a 100644 --- a/thermal_checks.py +++ b/thermal_checks.py @@ -1,14 +1,9 @@ -from util import BaseCheckList import sklearn.covariance import scipy.stats import numpy as np +from all_checks import checklist -thermal_checks = BaseCheckList() - -def thermal_correlation_check(frame, parameters): - return frame - -@thermal_checks.check(depends=['HBBbottomTemp','HBBtopTemp','HBBapexTemp']) +@checklist.add_check(depends=['HBBbottomTemp','HBBtopTemp','HBBapexTemp'], affects_calibration=True, description='HBB thermistors disagree') def hbb_covariance_check(frame, parameters): hbbb_mean = parameters.get('HBBbottomTemp_mean', 333.101) hbbt_mean = parameters.get('HBBtopTemp_mean', 332.993) @@ -21,6 +16,5 @@ def hbb_covariance_check(frame, parameters): [ 0.0164418 , 0.01898899, 0.02326341], [ 0.01817402, 0.02326341, 0.02968953]]) distances = envelope.mahalanobis(frame[['HBBbottomTemp','HBBtopTemp','HBBapexTemp']]) ** .5 - frame['hbb_covariance_check'] = (distances > 6) * 1 - return frame + return (distances > 6) * 1 diff --git a/util.py b/util.py index 14a2ada..6b13382 100644 --- a/util.py +++ b/util.py @@ -84,49 +84,3 @@ def invalidate_record(frame, loc, check_name, value, annotation=''): return frame -class BaseCheckList: - - def __init__(self, *args, **kwargs): - self.check_results = {} - self.parameters = {} - self.checks = {} - - def set_params(self, parameters): - self.parameters = parameters - - def check(self, *, depends, updates=[]): - def decorate(func): - func_params = {'depends':depends} - # checks always update a variable with the name of function - func_params['updates'] = {func.__name__}.union(updates) - @wraps(func) - def wrapper(frame, parameters): - if not np.in1d(depends, frame.columns).all(): - return frame - return func(frame, parameters) - self.checks[wrapper] = func_params - return wrapper - return decorate - - def update_qc_percent(self, frame): - for check_func in self.checks: - name = check_func.__name__ - if name in frame.columns: - results = frame[name].fillna(0) - # Compute P(A U B) - previous_percent = frame['qc_percent'] - frame['qc_percent'] = previous_percent + results - previous_percent*results - return frame - - def compute(self, frame): - # Filter bad records from previous level - filtered_frame = frame.ix[frame.qc_percent < 1].copy() - for check, metadata in self.checks.items(): - original_shape = filtered_frame.shape - if len(filtered_frame) > 0: - filtered_frame = check(filtered_frame, self.parameters) - else: - for v in metadata['updates']: - filtered_frame[v] = [] - assert np.in1d(list(metadata['updates']), filtered_frame.columns).all() - return self.update_qc_percent(filtered_frame.combine_first(frame)) -- GitLab