diff --git a/electronic_checks.py b/electronic_checks.py index 3bf31c136eb5b792b6b8962035f88258e7418397..6eaf09f42cc1723ac17c0c50f52d60672271cc1f 100644 --- a/electronic_checks.py +++ b/electronic_checks.py @@ -1,11 +1,11 @@ from util import BaseCheckList, annotate_all, invalidate_records, update_variable_qc, _compute_robust_zscore, _compute_robust_rate_zscore import numpy as np import pandas as pd +import itertools -def find_bb_outliers(frame, parameters, bb): - if not np.in1d(['{}bottomTemp'.format(bb),'{}apexTemp'.format(bb),'{}topTemp'.format(bb)], frame.columns).all(): - return frame +electronic_checks = BaseCheckList() +def find_bb_outliers(frame, parameters, bb): window_length = parameters.get('window_length', 50) if bb == 'HBB': @@ -13,9 +13,10 @@ def find_bb_outliers(frame, parameters, bb): bba_outliers = _find_6sigma_outliers(frame['{}apexTemp'.format(bb)], window_length) bbt_outliers = _find_6sigma_outliers(frame['{}topTemp'.format(bb)], window_length) else: - bbb_outliers = _find_6sigma_outliers(frame['{}bottomTemp'.format(bb)], window_length, estimation_func=_compute_robust_rate_zscore) - bba_outliers = _find_6sigma_outliers(frame['{}apexTemp'.format(bb)], window_length, estimation_func=_compute_robust_rate_zscore) - bbt_outliers = _find_6sigma_outliers(frame['{}topTemp'.format(bb)], window_length, estimation_func=_compute_robust_rate_zscore) + delta_thresh = .01 + bbb_outliers = np.array(list(_scan_for_outliers(frame, '{}bottomTemp'.format(bb), delta_thresh))) + bba_outliers = np.array(list(_scan_for_outliers(frame, '{}apexTemp'.format(bb), delta_thresh))) + bbt_outliers = np.array(list(_scan_for_outliers(frame, '{}topTemp'.format(bb), delta_thresh))) variable_qcs = pd.DataFrame({ 'qc_{}bottomTemp'.format(bb) : bbb_outliers * 1, @@ -32,36 +33,42 @@ def find_bb_outliers(frame, parameters, bb): frame = invalidate_records(frame, '{}_temp_outlier_check'.format(bb.lower())) return frame +@electronic_checks.check( + depends=[ 'HBBapexTemp', 'HBBbottomTemp', 'HBBtopTemp' ], + updates=['HBBbottomTemp', 'HBBapexTemp','HBBtopTemp'] +) def hbb_temp_outlier_check(frame, parameters): return find_bb_outliers(frame, parameters, 'HBB') +@electronic_checks.check( + depends=[ 'ABBapexTemp', 'ABBbottomTemp', 'ABBtopTemp' ], + updates=['ABBbottomTemp', 'ABBapexTemp','ABBtopTemp'] +) def abb_temp_outlier_check(frame, parameters): return find_bb_outliers(frame, parameters, 'ABB') - -def calibrationambienttemp_outlier_check(frame, parameters): - if 'calibrationAmbientTemp' not in frame.columns: - return frame - - window_length = parameters.get('window_length', 50) - - temp_outliers = _find_6sigma_outliers(frame['calibrationAmbientTemp'], window_length) - frame['calibrationambienttemp_outlier_check'] = temp_outliers * 1 - frame = update_variable_qc(frame, pd.DataFrame({'qc_calibrationAmbientTemp':temp_outliers*1}, index=frame.index)) - annotate_all(frame, temp_outliers, 'calibrationAmbientTemp outlier') - return frame - -class CheckList(BaseCheckList): - checks = [ hbb_temp_outlier_check , abb_temp_outlier_check, calibrationambienttemp_outlier_check ] def _find_6sigma_outliers(frame, window_length, estimation_func=_compute_robust_zscore): # Find outliers with deviation greater than 6 sigma - outlier_mask = estimation_func(frame, window_length) > 6 - return outlier_mask + return estimation_func(frame, window_length) > 6 + +def _scan_for_outliers(frame, variable, delta_thresh): + last = None + for i, row in frame.iterrows(): + if last is None: + yield False + last = row + else: + time_diff = (row.datetime - last.datetime).total_seconds() + variable_diff = row[variable] - last[variable] + if abs(variable_diff / time_diff) < delta_thresh: + yield False + last = row + else: + yield True #### TESTS #### - def test_hbb_temp_outlier_check(): frame = pd.DataFrame({ 'HBBapexTemp':[0,1,10,1], @@ -72,20 +79,4 @@ def test_hbb_temp_outlier_check(): }) assert hbb_temp_outlier_check(frame, {})['hbb_temp_outlier_check'].values.tolist() == [0,0,1,0] -def test_abb_temp_outlier_check(): - frame = pd.DataFrame({ - 'ABBapexTemp':[0,1,10,1], - 'ABBbottomTemp':[1,1,1,1], - 'ABBtopTemp':[0,1,10,1], - 'qc_notes':'', - 'sceneMirrorPosition':[ord(x) for x in 'HASA'] - }) - assert abb_temp_outlier_check(frame, {})['abb_temp_outlier_check'].values.tolist() == [0,0,1,0] -def test_calibrationambienttemp_temp_outlier_check(): - frame = pd.DataFrame({ - 'calibrationAmbientTemp':[0,1,10,1], - 'qc_notes':'', - 'sceneMirrorPosition':[ord(x) for x in 'HASA'] - }) - assert calibrationambienttemp_outlier_check(frame, {})['calibrationambienttemp_outlier_check'].values.tolist() == [0,0,1,0] diff --git a/global_checks.py b/global_checks.py index e44da3ce59c53ebc0f50952a114a0a395e03e56d..c26a88d95f45d60435a4641368f0fcbc1c55a629 100644 --- a/global_checks.py +++ b/global_checks.py @@ -1,12 +1,11 @@ from util import BaseCheckList, annotate_all +global_checks = BaseCheckList() + +@global_checks.check(depends=['missingDataFlag']) def check_missing_data_flag(frame, parameters): - if 'missingDataFlag' not in frame: - return frame missing_data = (frame['missingDataFlag'] == 1) frame['check_missing_data_flag'] = missing_data * 1 annotate_all(frame, missing_data, 'missing data') return frame -class CheckList(BaseCheckList): - checks = [ check_missing_data_flag ] diff --git a/main.py b/main.py index 497368e01c1b836711bb50bfaa58d81b94f65e73..d559600f43666d739473c9569856e9e8a7081746 100644 --- a/main.py +++ b/main.py @@ -5,20 +5,20 @@ import netCDF4 from aeri_tools.io.dmv.housekeeping import get_all_housekeeping -import electronic_checks -import global_checks -import radiometric_checks -import scene_checks -import state_checks -import thermal_checks +from electronic_checks import electronic_checks +from global_checks import global_checks +from radiometric_checks import radiometric_checks +from scene_checks import scene_checks +from state_checks import state_checks +from thermal_checks import thermal_checks levels = [ - global_checks.CheckList(), - scene_checks.CheckList(), - state_checks.CheckList(), - electronic_checks.CheckList(), - radiometric_checks.CheckList(), - thermal_checks.CheckList() + global_checks, + scene_checks, + state_checks, + electronic_checks, + radiometric_checks, + thermal_checks ] def save_quality(frame, qc_path): diff --git a/radiometric_checks.py b/radiometric_checks.py index 545f7136639e93ecde598f052f4d402e61e2de50..dddd4387fb11541a276917faf8131accbaca868d 100644 --- a/radiometric_checks.py +++ b/radiometric_checks.py @@ -3,9 +3,10 @@ import pandas as pd import numpy as np import scipy.stats +radiometric_checks = BaseCheckList() + +@radiometric_checks.check(depends=['skyViewImaginaryRadiance2510_2515']) def imaginary_radiance_check(frame, parameters): - if 'skyViewImaginaryRadiance2510_2515' not in frame.columns: - return frame threshold = parameters.get('imaginary_radiance_threshold', 1) imaginary_radiance_problem = abs(frame.skyViewImaginaryRadiance2510_2515) > threshold frame['imaginary_radiance_check'] = imaginary_radiance_problem * 1 @@ -13,10 +14,9 @@ def imaginary_radiance_check(frame, parameters): frame = invalidate_records(frame, 'imaginary_radiance_check') return frame +@radiometric_checks.check(depends=['HBBviewStdDevRadiance985_990','LW_HBB_NEN','SW_HBB_NEN']) def hbb_radiance_check(frame, parameters): # Std dev, nen, lw, sw - if not np.in1d(['HBBviewStdDevRadiance985_990','LW_HBB_NEN','SW_HBB_NEN'], frame.columns).all(): - return frame hbb_std_dist = scipy.stats.chi2.fit(frame.HBBviewStdDevRadiance985_990) _, hbb_std_dist_hi = scipy.stats.chi2.interval(.995, *hbb_std_dist) hbb_std_dist_problem = frame.HBBviewStdDevRadiance985_990 > hbb_std_dist_hi @@ -36,10 +36,9 @@ def hbb_radiance_check(frame, parameters): frame = invalidate_records(frame, 'hbb_radiance_check') return frame +@radiometric_checks.check(depends=['LWresponsivity','SWresponsivity'], updates=['LWresponsivity', 'SWresponsivity']) def responsivity_check(frame, parameters): # lw, sw - if not np.in1d(['LWresponsivity','SWresponsivity'], frame.columns).all(): - return frame lw_zscore = _compute_robust_zscore(frame['LWresponsivity'], 50) sw_zscore = _compute_robust_zscore(frame['SWresponsivity'], 50) lw_problem = abs(lw_zscore) > 6 @@ -53,5 +52,3 @@ def responsivity_check(frame, parameters): frame = invalidate_records(frame, 'responsivity_check') return frame -class CheckList(BaseCheckList): - checks = [ imaginary_radiance_check, hbb_radiance_check, responsivity_check ] diff --git a/scene_checks.py b/scene_checks.py index 6883b460545d8f929664ff3d84c07744a143a71f..5353b96f365aadb0ca16f91ce462a4c6d1be31dc 100644 --- a/scene_checks.py +++ b/scene_checks.py @@ -2,12 +2,13 @@ from util import BaseCheckList, invalidate_records, annotate_all import pandas as pd import numpy as np +scene_checks = BaseCheckList() + +@scene_checks.check(depends=['hatchOpen','sceneMirrorPosition']) def hatch_check(frame, parameters): """ Check that the hatch is open on sky views """ - if not np.in1d(['hatchOpen','sceneMirrorPosition'], frame.columns).all(): - return frame hatch_closed_during_viewing = ((frame.hatchOpen != 1) & (~frame.sceneMirrorPosition.isin([ord('H'), ord('A')]))) @@ -17,13 +18,11 @@ def hatch_check(frame, parameters): return frame +@scene_checks.check(depends=['hatchOpen','sceneMirrorPosition']) def safing_check(frame, parameters): """ Check that the mirror doesn't safe during a calibration view and contaminate other records """ - if not np.in1d(['hatchOpen','sceneMirrorPosition'], frame.columns).all(): - return frame - hatch_closing = (frame.hatchOpen == -3) mirror_safing = (hatch_closing & frame.sceneMirrorPosition.isin([ord('H'), ord('A')])) frame['safing_check'] = mirror_safing * 1 @@ -37,10 +36,6 @@ def encoder_check(frame, parameters): return frame -class CheckList(BaseCheckList): - - checks = [hatch_check, safing_check, encoder_check] - #### TESTS ##### diff --git a/state_checks.py b/state_checks.py index 1ef7bdbc1068eaeb17e9e70e308b2bdb1be9fe5e..25cb9cea80551574daed284904f243f6e4769427 100644 --- a/state_checks.py +++ b/state_checks.py @@ -2,12 +2,13 @@ from util import BaseCheckList, invalidate_records, annotate_all, update_variabl import pandas as pd import numpy as np +state_checks = BaseCheckList() + +@state_checks.check(depends=['detectorTemp'], updates=['detectorTemp']) def detector_check(frame, parameters): """ Check that the detector temp is in range """ - if 'detectorTemp' not in frame.columns: - return frame detector_temp_too_high = (frame['detectorTemp'] > 90) frame['detector_check'] = detector_temp_too_high * 1 frame['qc_detectorTemp'] = detector_temp_too_high * 1 @@ -15,9 +16,17 @@ def detector_check(frame, parameters): frame = invalidate_records(frame, 'detector_check') return frame +@state_checks.check( + depends=['HBBapexTemp','HBBbottomTemp','HBBtopTemp'], + updates=['qc_HBBapexTemp','qc_HBBbottomTemp','qc_HBBtopTemp'] +) def hbb_thermistor_check(frame, parameters): return thermistor_check(frame, 'HBB', 331, 335) +@state_checks.check( + depends=['ABBapexTemp','ABBbottomTemp','ABBtopTemp'], + updates=['qc_ABBapexTemp','qc_ABBbottomTemp','qc_ABBtopTemp'] +) def abb_thermistor_check(frame, parameters): return thermistor_check(frame, 'ABB', 150, 335) @@ -25,8 +34,6 @@ def thermistor_check(frame, bb, low, high): """ Check that all thermistor temps are in range """ - if not np.in1d([x.format(bb) for x in ['{}bottomTemp','{}apexTemp','{}topTemp']], frame.columns).all(): - return frame bbb_too_low = frame['{}bottomTemp'.format(bb)] < low bba_too_low = frame['{}apexTemp'.format(bb)] < low bbt_too_low = frame['{}topTemp'.format(bb)] < low @@ -81,8 +88,6 @@ def hbb_stable_check(frame, parameters): frame = invalidate_records(frame, 'hbb_stable_check') return frame -class CheckList(BaseCheckList): - checks = [detector_check, hbb_thermistor_check, hbb_stable_check, abb_thermistor_check] #### TESTS #### diff --git a/test_data/AE160708/problems.csv b/test_data/AE160708/problems.csv index d257f4fd126f549ebba6750e19a1d909e0f1f101..38bf6028d131b1aadb6e04dfef06e9553df5c964 100644 --- a/test_data/AE160708/problems.csv +++ b/test_data/AE160708/problems.csv @@ -1,4 +1,5 @@ record_cxs,problem +0,missing data # 3,hatch closed 4,hatch closed diff --git a/test_data/AE160709/problems.csv b/test_data/AE160709/problems.csv index f42d1a375f38f14e87a48cbeb3837de1f78fc6ff..1b6fff11956be28abf57bf268e862389704007f9 100644 --- a/test_data/AE160709/problems.csv +++ b/test_data/AE160709/problems.csv @@ -1,5 +1,42 @@ record_cxs,problem # +4352,missing data +4353,missing data +4354,missing data +4355,missing data +4356,missing data +4357,missing data +4358,missing data +4359,missing data +4360,missing data +4361,missing data +4362,missing data +4363,missing data +4364,missing data +4365,missing data +4366,missing data +4367,missing data +4368,missing data +4369,missing data +4370,missing data +4371,missing data +4372,missing data +4373,missing data +4374,missing data +4375,missing data +4376,missing data +4377,missing data +4378,missing data +4379,missing data +4380,missing data +4381,missing data +4382,missing data +4383,missing data +4384,missing data +4385,missing data +4386,missing data +4387,missing data +# 2,hatch closed 3,hatch closed 4,hatch closed diff --git a/test_data/AE160710/problems.csv b/test_data/AE160710/problems.csv index 798e95b4d9bdc52be46c2bbb6c5637aa886cec81..0f470d1bc6137fe9fa033faf031a6958ec90dd23 100644 --- a/test_data/AE160710/problems.csv +++ b/test_data/AE160710/problems.csv @@ -1,5 +1,7 @@ record_cxs,problem # +0,missing data +# 3,hatch closed 4,hatch closed 5,hatch closed diff --git a/test_data/AE160711/problems.csv b/test_data/AE160711/problems.csv index 1bd6389b4fb2fe60148594d30d37fe242f374731..87a5d9010b3e0a60007e1d1d8ce494c2cd8e5dfa 100644 --- a/test_data/AE160711/problems.csv +++ b/test_data/AE160711/problems.csv @@ -1,5 +1,14 @@ record_cxs,problem # +4461,missing data +4462,missing data +4463,missing data +4464,missing data +4465,missing data +4466,missing data +4467,missing data +4468,missing data +# 2,hatch closed 3,hatch closed 4,hatch closed diff --git a/test_data/AE160712/problems.csv b/test_data/AE160712/problems.csv index 3bf71be0d1fa4368ecceb54495a2b1679abc5e4f..d48e0518333b8cfd118408de5fc1ae667703acd6 100644 --- a/test_data/AE160712/problems.csv +++ b/test_data/AE160712/problems.csv @@ -1,5 +1,60 @@ record_cxs,problem # +4453,missing data +4454,missing data +4455,missing data +4456,missing data +4457,missing data +4458,missing data +4459,missing data +4460,missing data +4461,missing data +4462,missing data +4463,missing data +4464,missing data +4465,missing data +4466,missing data +4467,missing data +4468,missing data +4469,missing data +4470,missing data +4471,missing data +4472,missing data +4473,missing data +4474,missing data +4475,missing data +4476,missing data +4477,missing data +4478,missing data +4479,missing data +4480,missing data +4481,missing data +4482,missing data +4483,missing data +4484,missing data +4485,missing data +4486,missing data +4487,missing data +4488,missing data +4489,missing data +4490,missing data +4491,missing data +4492,missing data +4493,missing data +4494,missing data +4495,missing data +4496,missing data +4497,missing data +4498,missing data +4499,missing data +4500,missing data +4501,missing data +4502,missing data +4503,missing data +4504,missing data +4505,missing data +4506,missing data +# 2,hatch closed 3,hatch closed 4,hatch closed diff --git a/test_data/AE160713/problems.csv b/test_data/AE160713/problems.csv deleted file mode 100644 index c5e624456a66f48b9ffc2d45de0f6e04f6e26888..0000000000000000000000000000000000000000 --- a/test_data/AE160713/problems.csv +++ /dev/null @@ -1,117 +0,0 @@ -record_cxs,problem -# -2,hatch closed -3,hatch closed -4,hatch closed -5,hatch closed -6,hatch closed -7,hatch closed -1683,hatch closed -1684,hatch closed -1685,hatch closed -1686,hatch closed -1687,hatch closed -1688,hatch closed -1691,hatch closed -1692,hatch closed -1693,hatch closed -1694,hatch closed -1695,hatch closed -1696,hatch closed -1697,hatch closed -2125,hatch closed -2126,hatch closed -2127,hatch closed -2130,hatch closed -2131,hatch closed -2132,hatch closed -2133,hatch closed -2134,hatch closed -2135,hatch closed -2136,hatch closed -2137,hatch closed -2496,hatch closed -2499,hatch closed -2500,hatch closed -2501,hatch closed -2502,hatch closed -2503,hatch closed -2504,hatch closed -2505,hatch closed -2506,hatch closed -2509,hatch closed -2510,hatch closed -2699,hatch closed -2700,hatch closed -2701,hatch closed -2702,hatch closed -2703,hatch closed -2704,hatch closed -2705,hatch closed -3061,hatch closed -3062,hatch closed -3063,hatch closed -3064,hatch closed -3065,hatch closed -3066,hatch closed -3067,hatch closed -3741,hatch closed -3742,hatch closed -3743,hatch closed -3744,hatch closed -3745,hatch closed -3746,hatch closed -# -2130,safing -2131,safing -2132,safing -2133,safing -2134,safing -2135,safing -2136,safing -2137,safing -2138,safing -2139,safing -2140,safing -2141,safing -2142,safing -2143,safing -2144,safing -2145,safing -2146,safing -2147,safing -2490,safing -2491,safing -2492,safing -2493,safing -2494,safing -2495,safing -2496,safing -2497,safing -2499,safing -2500,safing -2501,safing -2502,safing -2503,safing -2504,safing -2505,safing -2506,safing -2507,safing -2508,safing -2509,safing -2510,safing -2511,safing -2512,safing -2513,safing -2514,safing -2515,safing -2516,safing -# -3059,HBBbottomTemp outlier -3060,HBBbottomTemp outlier -# -3059,HBBtopTemp outlier -3060,HBBtopTemp outlier -# -3059,HBBapexTemp outlier -3060,HBBapexTemp outlier diff --git a/thermal_checks.py b/thermal_checks.py index ce87a353b6d70e77770ef9de5a37f7967aeaa305..8ba6ebfea3be5e96162685549bafff86536462ff 100644 --- a/thermal_checks.py +++ b/thermal_checks.py @@ -3,9 +3,12 @@ import sklearn.covariance import scipy.stats import numpy as np +thermal_checks = BaseCheckList() + def thermal_correlation_check(frame, parameters): return frame +@thermal_checks.check(depends=['HBBbottomTemp','HBBtopTemp','HBBapexTemp']) def hbb_covariance_check(frame, parameters): hbbb_mean = parameters.get('HBBbottomTemp_mean', 333.101) hbbt_mean = parameters.get('HBBtopTemp_mean', 332.993) @@ -21,6 +24,3 @@ def hbb_covariance_check(frame, parameters): frame['hbb_covariance_check'] = (distances > 6) * 1 return frame - -class CheckList(BaseCheckList): - checks = [ thermal_correlation_check , hbb_covariance_check] diff --git a/util.py b/util.py index 8df0278c376778027f621fdb480c0caf36fd4150..79d67bbe700441977c02206d5e00d3e72f7b7fbd 100644 --- a/util.py +++ b/util.py @@ -1,6 +1,7 @@ from itertools import takewhile import numpy as np import pandas as pd +from functools import wraps def _compute_robust_zscore(frame, window_length): median_values = frame.rolling(window=window_length, center=True, min_periods=1).median() @@ -87,10 +88,25 @@ class BaseCheckList: def __init__(self, *args, **kwargs): self.check_results = {} self.parameters = {} + self.checks = {} def set_params(self, parameters): self.parameters = parameters + def check(self, *, depends, updates=[]): + def decorate(func): + func_params = {'depends':depends} + # checks always update a variable with the name of function + func_params['updates'] = {func.__name__}.union(updates) + @wraps(func) + def wrapper(frame, parameters): + if not np.in1d(depends, frame.columns).all(): + return frame + return func(frame, parameters) + self.checks[wrapper] = func_params + return wrapper + return decorate + def update_qc_percent(self, frame): for check_func in self.checks: name = check_func.__name__