Skip to content
Snippets Groups Projects
Commit 1cf5ca7d authored by Coda Phillips's avatar Coda Phillips
Browse files

Introduce igm checks

parent 665d345f
No related branches found
Tags cspp-r20200921-0
No related merge requests found
......@@ -53,12 +53,6 @@ class Check(object):
def __repr__(self):
return 'Check({})'.format(self.name)
def update_qc_percent(self, frame):
if 'qc_percent' not in frame.columns:
frame['qc_percent'] = 0
if self.name in frame.columns:
frame['qc_percent'] = frame['qc_percent'] + frame[self.name] - frame['qc_percent']*frame[self.name]
return frame
def invalidate_calibration(self,invalid_records, frame):
invalid_records = invalid_records.copy()
......@@ -101,7 +95,6 @@ class Checklist(object):
frame = check(checklist, frame, parameters)
another_check_completed = True
uncompleted_checks.remove(check)
frame = check.update_qc_percent(frame)
assert uncompleted_checks == []
return frame
......
......@@ -11,8 +11,8 @@ def spike_check(igms, parameters):
# Compute statistics
data_a_mean = igms.DataA.mean(axis=0)
data_b_mean = igms.DataB.mean(axis=0)
data_a_std = np.vstack(igms.DataA.values).std(axis=0)
data_b_std = np.vstack(igms.DataB.values).std(axis=0)
data_a_std = np.vstack(igms.DataA.dropna().values).std(axis=0)
data_b_std = np.vstack(igms.DataB.dropna().values).std(axis=0)
# Check z-scores in both DataA and DataB
any_spikes_in_data_a = igms.DataA.apply(lambda data_a: (abs((data_a - data_a_mean)/data_a_std) > 10).any())
......@@ -21,16 +21,14 @@ def spike_check(igms, parameters):
# Create DataFrame with flags
igms = igms.drop(['DataA','DataB'], axis=1)
igms['spike_check'] = any_spikes_in_data_a | any_spikes_in_data_b
datetime_grouped = igms.groupby('datetime')
cxs_index_grouped = igms.groupby('cxs_index')
# Each Igm file usually has two subfiles (one for each scan)
# each scan has the same time and sceneMirrorPosition
# reduce down to one row per datetime
return pd.concat([
datetime_grouped[['spike_check']].any() * 1.0,
datetime_grouped[['sceneMirrorPosition']].first()
], axis=1).reset_index()
frame = cxs_index_grouped.first()
frame['spike_check'] = cxs_index_grouped[['spike_check']].any() * 1.0
return frame.reset_index()
####
# Tests
......
......@@ -41,6 +41,18 @@ def save_quality(frame, qc_path, checklist):
variable.hides = ','.join(check.hides)
variable[:] = frame[check.name].values
spike_check_variable = ncdf.createVariable('spike_check','f4',('time',))
spike_check_variable.depends = ''
spike_check_variable.affects_calibration = "True"
spike_check_variable.affected_by_calibration = "False"
spike_check_variable.description = 'Check for spikes in interferometer caused by faulty electronics'
spike_check_variable.hides = ''
# Do the spike check separately for now
if 'spike_check' in frame.columns:
spike_check_variable[:] = frame.spike_check.values
else:
spike_check_variable[:] = 0
# Write time information
time[:] = frame.datetime.values
......@@ -179,8 +191,10 @@ def prepare_frame(cxs_file, sum_file, sci_dir):
), columns=['scene_index','sceneMirrorPosition']).reset_index().rename(columns={'index':'cxs_index'})
igms = pd.merge(igms, mapping_from_scene_to_record, how='inner', on=['sceneMirrorPosition','scene_index'])
# Add columns from igms, notably DataA, DataB
cal_graph = frame.calibration_graph
frame = frame.merge(igms, on=['cxs_index'], how='outer', suffixes=('','_igm'), sort=True)
frame['sceneMirrorPosition'].fillna(frame.sceneMirrorPosition_igm, inplace=True)
frame.calibration_graph = cal_graph
return frame
......@@ -193,7 +207,7 @@ def update_all(ftp_dir, sci_dir, checklist, parameters=None):
# For each CXS file find a matching SUM file and possible QC filename
for qc_file, cxs_file, sum_file in files_to_update(cxs_files):
print('Performing quality control for {}'.format(cxs_file))
frame = prepare_frame(cxs_file, sum_file, sci_dir, mirror_beg)
frame = prepare_frame(cxs_file, sum_file, sci_dir)
if parameters is None:
parameters = {}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment