Skip to content
Snippets Groups Projects
Commit 242730a6 authored by Coda Phillips's avatar Coda Phillips
Browse files

Add qc variables to netcdf

parent 99ac579c
No related branches found
No related tags found
No related merge requests found
from util import BaseCheckList, annotate_all, invalidate_records from util import BaseCheckList, annotate_all, invalidate_records, update_variable_qc
import numpy as np import numpy as np
import pandas as pd import pandas as pd
...@@ -12,6 +12,13 @@ def find_bb_outliers(frame, parameters, bb): ...@@ -12,6 +12,13 @@ def find_bb_outliers(frame, parameters, bb):
bba_outliers = _find_6sigma_outliers(frame['{}apexTemp'.format(bb)], window_length) bba_outliers = _find_6sigma_outliers(frame['{}apexTemp'.format(bb)], window_length)
bbt_outliers = _find_6sigma_outliers(frame['{}topTemp'.format(bb)], window_length) bbt_outliers = _find_6sigma_outliers(frame['{}topTemp'.format(bb)], window_length)
variable_qcs = pd.DataFrame({
'qc_{}bottomTemp'.format(bb) : bbb_outliers * 1,
'qc_{}topTemp'.format(bb) : bbt_outliers * 1,
'qc_{}apexTemp'.format(bb) : bba_outliers * 1
}, index=frame.index)
frame = update_variable_qc(frame, variable_qcs)
annotate_all(frame, bbb_outliers, '{} bottom temperature outlier'.format(bb)) annotate_all(frame, bbb_outliers, '{} bottom temperature outlier'.format(bb))
annotate_all(frame, bba_outliers, '{} apex temperature outlier'.format(bb)) annotate_all(frame, bba_outliers, '{} apex temperature outlier'.format(bb))
annotate_all(frame, bbt_outliers, '{} top temperature outlier'.format(bb)) annotate_all(frame, bbt_outliers, '{} top temperature outlier'.format(bb))
...@@ -34,6 +41,7 @@ def calibrationambienttemp_outlier_check(frame, parameters): ...@@ -34,6 +41,7 @@ def calibrationambienttemp_outlier_check(frame, parameters):
temp_outliers = _find_6sigma_outliers(frame['calibrationAmbientTemp'], window_length, use_mean=True) temp_outliers = _find_6sigma_outliers(frame['calibrationAmbientTemp'], window_length, use_mean=True)
frame['calibrationambienttemp_outlier_check'] = temp_outliers * 1 frame['calibrationambienttemp_outlier_check'] = temp_outliers * 1
frame = update_variable_qc(frame, pd.DataFrame({'qc_calibrationAmbientTemp':temp_outliers*1}, index=frame.index))
annotate_all(frame, temp_outliers, 'calibrationAmbientTemp outlier') annotate_all(frame, temp_outliers, 'calibrationAmbientTemp outlier')
return frame return frame
......
...@@ -4,6 +4,7 @@ import re ...@@ -4,6 +4,7 @@ import re
import netCDF4 import netCDF4
from aeri_tools.io.dmv.housekeeping import get_all_housekeeping from aeri_tools.io.dmv.housekeeping import get_all_housekeeping
import electronic_checks import electronic_checks
import global_checks import global_checks
import radiometric_checks import radiometric_checks
...@@ -25,13 +26,16 @@ def save_quality(frame, qc_path): ...@@ -25,13 +26,16 @@ def save_quality(frame, qc_path):
ncdf = netCDF4.Dataset(qc_path, 'w') ncdf = netCDF4.Dataset(qc_path, 'w')
time = ncdf.createDimension('time', len(frame)) time = ncdf.createDimension('time', len(frame))
base_time = ncdf.createVariable('base_time', 'i8', ()) base_time = ncdf.createVariable('base_time', 'i8', ())
time_offset = ncdf.createVariable('time_offset', 'f8', ('time',)) time_offset = ncdf.createVariable('time_offset', 'i8', ('time',))
qc_percent = ncdf.createVariable('qc_percent', 'f4', ('time',)) qc_percent = ncdf.createVariable('qc_percent', 'f4', ('time',))
qc_notes = ncdf.createVariable('qc_notes', str, ('time',)) qc_notes = ncdf.createVariable('qc_notes', str, ('time',))
for check_mask in frame.filter(like='check'): for check_mask in frame.filter(like='_check'):
ncdf.createVariable(check_mask, 'f4', ('time',))[:] = frame[check_mask].values ncdf.createVariable(check_mask, 'f4', ('time',))[:] = frame[check_mask].values
base_time[:] = frame.datetime[0].timestamp() for variable_qc in frame.filter(like='qc_'):
time_offset[:] = (frame.datetime - frame.datetime[0]).values / 1e9 if variable_qc not in ['qc_notes','qc_percent']:
ncdf.createVariable(variable_qc, 'f4', ('time',))[:] = frame[variable_qc].values
base_time[:] = frame.datetime[0].to_datetime64()
time_offset[:] = (frame.datetime - frame.datetime[0]).values
qc_percent[:] = frame['qc_percent'].values qc_percent[:] = frame['qc_percent'].values
qc_notes[:] = frame['qc_notes'].fillna('').values qc_notes[:] = frame['qc_notes'].fillna('').values
ncdf.close() ncdf.close()
...@@ -61,7 +65,7 @@ def update_all(ftp_dir, parameters=None): ...@@ -61,7 +65,7 @@ def update_all(ftp_dir, parameters=None):
frame = check_frame(frame, parameters) frame = check_frame(frame, parameters)
save_quality(frame, qc_file) save_quality(frame, qc_file)
def files_to_update(cxs_files): def files_to_update(cxs_files, update_only=True):
for cxs_file in cxs_files: for cxs_file in cxs_files:
possible_sum = os.path.join(os.path.dirname(cxs_file), cxs_file.replace('B1.CXS','.SUM')) possible_sum = os.path.join(os.path.dirname(cxs_file), cxs_file.replace('B1.CXS','.SUM'))
possible_qc = os.path.join(os.path.dirname(cxs_file), cxs_file.replace('B1.CXS','.qc')) possible_qc = os.path.join(os.path.dirname(cxs_file), cxs_file.replace('B1.CXS','.qc'))
...@@ -73,6 +77,8 @@ def files_to_update(cxs_files): ...@@ -73,6 +77,8 @@ def files_to_update(cxs_files):
qc_file = possible_qc qc_file = possible_qc
if max(os.path.getmtime(sum_file), os.path.getmtime(cxs_file)) > os.path.getmtime(qc_file): if max(os.path.getmtime(sum_file), os.path.getmtime(cxs_file)) > os.path.getmtime(qc_file):
yield (qc_file, cxs_file, sum_file) yield (qc_file, cxs_file, sum_file)
elif not update_only:
yield (qc_file, cxs_file, sum_file)
else: else:
yield (possible_qc, cxs_file, sum_file) yield (possible_qc, cxs_file, sum_file)
......
from util import BaseCheckList, invalidate_records, annotate_all from util import BaseCheckList, invalidate_records, annotate_all, update_variable_qc
import pandas as pd import pandas as pd
import numpy as np import numpy as np
...@@ -10,6 +10,7 @@ def detector_check(frame, parameters): ...@@ -10,6 +10,7 @@ def detector_check(frame, parameters):
return frame return frame
detector_temp_too_high = (frame['detectorTemp'] > 90) detector_temp_too_high = (frame['detectorTemp'] > 90)
frame['detector_check'] = detector_temp_too_high * 1 frame['detector_check'] = detector_temp_too_high * 1
frame['qc_detectorTemp'] = detector_temp_too_high * 1
annotate_all(frame, detector_temp_too_high, 'detector temperature too high') annotate_all(frame, detector_temp_too_high, 'detector temperature too high')
frame = invalidate_records(frame, 'detector_check') frame = invalidate_records(frame, 'detector_check')
return frame return frame
...@@ -29,6 +30,17 @@ def hbb_thermistor_check(frame, parameters): ...@@ -29,6 +30,17 @@ def hbb_thermistor_check(frame, parameters):
hbbb_problem = hbbb_too_low | hbbb_too_high hbbb_problem = hbbb_too_low | hbbb_too_high
hbba_problem = hbba_too_low | hbba_too_high hbba_problem = hbba_too_low | hbba_too_high
hbbt_problem = hbbt_too_low | hbbt_too_high hbbt_problem = hbbt_too_low | hbbt_too_high
# Record qc for each thermistor
# qc variables are probabilites between 0 and 1
variable_qcs = pd.DataFrame({
'qc_HBBbottomTemp':hbbb_problem * 1,
'qc_HBBapexTemp' : hbba_problem * 1,
'qc_HBBtopTemp' : hbbt_problem * 1
}, index=frame.index)
frame = update_variable_qc(frame, variable_qcs)
# Compute overall BB quality
frame['hbb_thermistor_check'] = (hbbb_problem | hbba_problem | hbbt_problem) * 1 frame['hbb_thermistor_check'] = (hbbb_problem | hbba_problem | hbbt_problem) * 1
annotate_all(frame, hbbb_too_low, 'HBB bottom temperature too low') annotate_all(frame, hbbb_too_low, 'HBB bottom temperature too low')
annotate_all(frame, hbbt_too_low, 'HBB top temperature too low') annotate_all(frame, hbbt_too_low, 'HBB top temperature too low')
......
...@@ -9,6 +9,15 @@ def annotate(frame, loc, annotation): ...@@ -9,6 +9,15 @@ def annotate(frame, loc, annotation):
else: else:
frame.loc[loc, 'qc_notes'] = annotation frame.loc[loc, 'qc_notes'] = annotation
def update_variable_qc(frame, variable_qcs):
def proba_update(x,y,conversion=False):
try:
return x+y-x*y
except TypeError:
return x
return frame.drop('qc_notes', axis=1).combine(variable_qcs, proba_update, fill_value=0).combine_first(frame[['qc_notes']])
def annotate_all(frame, mask, annotation): def annotate_all(frame, mask, annotation):
for loc in frame.index[mask]: for loc in frame.index[mask]:
annotate(frame, loc, annotation) annotate(frame, loc, annotation)
...@@ -83,4 +92,4 @@ class BaseCheckList: ...@@ -83,4 +92,4 @@ class BaseCheckList:
original_shape = filtered_frame.shape original_shape = filtered_frame.shape
filtered_frame = check(filtered_frame, self.parameters) filtered_frame = check(filtered_frame, self.parameters)
assert filtered_frame.shape[0] == original_shape[0] assert filtered_frame.shape[0] == original_shape[0]
return self.update_qc_percent(frame.combine_first(filtered_frame)) return self.update_qc_percent(filtered_frame.combine_first(frame))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment