Skip to content
Snippets Groups Projects
Commit 242730a6 authored by Coda Phillips's avatar Coda Phillips
Browse files

Add qc variables to netcdf

parent 99ac579c
No related branches found
No related tags found
No related merge requests found
from util import BaseCheckList, annotate_all, invalidate_records
from util import BaseCheckList, annotate_all, invalidate_records, update_variable_qc
import numpy as np
import pandas as pd
......@@ -12,6 +12,13 @@ def find_bb_outliers(frame, parameters, bb):
bba_outliers = _find_6sigma_outliers(frame['{}apexTemp'.format(bb)], window_length)
bbt_outliers = _find_6sigma_outliers(frame['{}topTemp'.format(bb)], window_length)
variable_qcs = pd.DataFrame({
'qc_{}bottomTemp'.format(bb) : bbb_outliers * 1,
'qc_{}topTemp'.format(bb) : bbt_outliers * 1,
'qc_{}apexTemp'.format(bb) : bba_outliers * 1
}, index=frame.index)
frame = update_variable_qc(frame, variable_qcs)
annotate_all(frame, bbb_outliers, '{} bottom temperature outlier'.format(bb))
annotate_all(frame, bba_outliers, '{} apex temperature outlier'.format(bb))
annotate_all(frame, bbt_outliers, '{} top temperature outlier'.format(bb))
......@@ -34,6 +41,7 @@ def calibrationambienttemp_outlier_check(frame, parameters):
temp_outliers = _find_6sigma_outliers(frame['calibrationAmbientTemp'], window_length, use_mean=True)
frame['calibrationambienttemp_outlier_check'] = temp_outliers * 1
frame = update_variable_qc(frame, pd.DataFrame({'qc_calibrationAmbientTemp':temp_outliers*1}, index=frame.index))
annotate_all(frame, temp_outliers, 'calibrationAmbientTemp outlier')
return frame
......
......@@ -4,6 +4,7 @@ import re
import netCDF4
from aeri_tools.io.dmv.housekeeping import get_all_housekeeping
import electronic_checks
import global_checks
import radiometric_checks
......@@ -25,13 +26,16 @@ def save_quality(frame, qc_path):
ncdf = netCDF4.Dataset(qc_path, 'w')
time = ncdf.createDimension('time', len(frame))
base_time = ncdf.createVariable('base_time', 'i8', ())
time_offset = ncdf.createVariable('time_offset', 'f8', ('time',))
time_offset = ncdf.createVariable('time_offset', 'i8', ('time',))
qc_percent = ncdf.createVariable('qc_percent', 'f4', ('time',))
qc_notes = ncdf.createVariable('qc_notes', str, ('time',))
for check_mask in frame.filter(like='check'):
for check_mask in frame.filter(like='_check'):
ncdf.createVariable(check_mask, 'f4', ('time',))[:] = frame[check_mask].values
base_time[:] = frame.datetime[0].timestamp()
time_offset[:] = (frame.datetime - frame.datetime[0]).values / 1e9
for variable_qc in frame.filter(like='qc_'):
if variable_qc not in ['qc_notes','qc_percent']:
ncdf.createVariable(variable_qc, 'f4', ('time',))[:] = frame[variable_qc].values
base_time[:] = frame.datetime[0].to_datetime64()
time_offset[:] = (frame.datetime - frame.datetime[0]).values
qc_percent[:] = frame['qc_percent'].values
qc_notes[:] = frame['qc_notes'].fillna('').values
ncdf.close()
......@@ -61,7 +65,7 @@ def update_all(ftp_dir, parameters=None):
frame = check_frame(frame, parameters)
save_quality(frame, qc_file)
def files_to_update(cxs_files):
def files_to_update(cxs_files, update_only=True):
for cxs_file in cxs_files:
possible_sum = os.path.join(os.path.dirname(cxs_file), cxs_file.replace('B1.CXS','.SUM'))
possible_qc = os.path.join(os.path.dirname(cxs_file), cxs_file.replace('B1.CXS','.qc'))
......@@ -73,6 +77,8 @@ def files_to_update(cxs_files):
qc_file = possible_qc
if max(os.path.getmtime(sum_file), os.path.getmtime(cxs_file)) > os.path.getmtime(qc_file):
yield (qc_file, cxs_file, sum_file)
elif not update_only:
yield (qc_file, cxs_file, sum_file)
else:
yield (possible_qc, cxs_file, sum_file)
......
from util import BaseCheckList, invalidate_records, annotate_all
from util import BaseCheckList, invalidate_records, annotate_all, update_variable_qc
import pandas as pd
import numpy as np
......@@ -10,6 +10,7 @@ def detector_check(frame, parameters):
return frame
detector_temp_too_high = (frame['detectorTemp'] > 90)
frame['detector_check'] = detector_temp_too_high * 1
frame['qc_detectorTemp'] = detector_temp_too_high * 1
annotate_all(frame, detector_temp_too_high, 'detector temperature too high')
frame = invalidate_records(frame, 'detector_check')
return frame
......@@ -29,6 +30,17 @@ def hbb_thermistor_check(frame, parameters):
hbbb_problem = hbbb_too_low | hbbb_too_high
hbba_problem = hbba_too_low | hbba_too_high
hbbt_problem = hbbt_too_low | hbbt_too_high
# Record qc for each thermistor
# qc variables are probabilites between 0 and 1
variable_qcs = pd.DataFrame({
'qc_HBBbottomTemp':hbbb_problem * 1,
'qc_HBBapexTemp' : hbba_problem * 1,
'qc_HBBtopTemp' : hbbt_problem * 1
}, index=frame.index)
frame = update_variable_qc(frame, variable_qcs)
# Compute overall BB quality
frame['hbb_thermistor_check'] = (hbbb_problem | hbba_problem | hbbt_problem) * 1
annotate_all(frame, hbbb_too_low, 'HBB bottom temperature too low')
annotate_all(frame, hbbt_too_low, 'HBB top temperature too low')
......
......@@ -9,6 +9,15 @@ def annotate(frame, loc, annotation):
else:
frame.loc[loc, 'qc_notes'] = annotation
def update_variable_qc(frame, variable_qcs):
def proba_update(x,y,conversion=False):
try:
return x+y-x*y
except TypeError:
return x
return frame.drop('qc_notes', axis=1).combine(variable_qcs, proba_update, fill_value=0).combine_first(frame[['qc_notes']])
def annotate_all(frame, mask, annotation):
for loc in frame.index[mask]:
annotate(frame, loc, annotation)
......@@ -83,4 +92,4 @@ class BaseCheckList:
original_shape = filtered_frame.shape
filtered_frame = check(filtered_frame, self.parameters)
assert filtered_frame.shape[0] == original_shape[0]
return self.update_qc_percent(frame.combine_first(filtered_frame))
return self.update_qc_percent(filtered_frame.combine_first(frame))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment