diff --git a/aeri_qc/all_checks.py b/aeri_qc/all_checks.py index a3606c8ad07ef451be9f3b43b91e792756b4639a..17c8cfa3e3433a0f89344fb4c64c434e9086dc6e 100644 --- a/aeri_qc/all_checks.py +++ b/aeri_qc/all_checks.py @@ -53,12 +53,6 @@ class Check(object): def __repr__(self): return 'Check({})'.format(self.name) - def update_qc_percent(self, frame): - if 'qc_percent' not in frame.columns: - frame['qc_percent'] = 0 - if self.name in frame.columns: - frame['qc_percent'] = frame['qc_percent'] + frame[self.name] - frame['qc_percent']*frame[self.name] - return frame def invalidate_calibration(self,invalid_records, frame): invalid_records = invalid_records.copy() @@ -101,7 +95,6 @@ class Checklist(object): frame = check(checklist, frame, parameters) another_check_completed = True uncompleted_checks.remove(check) - frame = check.update_qc_percent(frame) assert uncompleted_checks == [] return frame diff --git a/aeri_qc/igm_checks.py b/aeri_qc/igm_checks.py index 7df2a82f473b0c5c7669aa53a2d918a807d0d96e..44cc2ea70422cc96fc921c7626d4e1bd83d0cd37 100644 --- a/aeri_qc/igm_checks.py +++ b/aeri_qc/igm_checks.py @@ -11,8 +11,8 @@ def spike_check(igms, parameters): # Compute statistics data_a_mean = igms.DataA.mean(axis=0) data_b_mean = igms.DataB.mean(axis=0) - data_a_std = np.vstack(igms.DataA.values).std(axis=0) - data_b_std = np.vstack(igms.DataB.values).std(axis=0) + data_a_std = np.vstack(igms.DataA.dropna().values).std(axis=0) + data_b_std = np.vstack(igms.DataB.dropna().values).std(axis=0) # Check z-scores in both DataA and DataB any_spikes_in_data_a = igms.DataA.apply(lambda data_a: (abs((data_a - data_a_mean)/data_a_std) > 10).any()) @@ -21,16 +21,14 @@ def spike_check(igms, parameters): # Create DataFrame with flags igms = igms.drop(['DataA','DataB'], axis=1) igms['spike_check'] = any_spikes_in_data_a | any_spikes_in_data_b - datetime_grouped = igms.groupby('datetime') + cxs_index_grouped = igms.groupby('cxs_index') # Each Igm file usually has two subfiles (one for each scan) # each scan has the same time and sceneMirrorPosition # reduce down to one row per datetime - return pd.concat([ - datetime_grouped[['spike_check']].any() * 1.0, - datetime_grouped[['sceneMirrorPosition']].first() - ], axis=1).reset_index() - + frame = cxs_index_grouped.first() + frame['spike_check'] = cxs_index_grouped[['spike_check']].any() * 1.0 + return frame.reset_index() #### # Tests diff --git a/aeri_qc/main.py b/aeri_qc/main.py index 717131bab228c54e7200ef5533a9e967c8f29cae..3258df5cfa09af38ab964e9de534412117937c29 100644 --- a/aeri_qc/main.py +++ b/aeri_qc/main.py @@ -41,6 +41,18 @@ def save_quality(frame, qc_path, checklist): variable.hides = ','.join(check.hides) variable[:] = frame[check.name].values + spike_check_variable = ncdf.createVariable('spike_check','f4',('time',)) + spike_check_variable.depends = '' + spike_check_variable.affects_calibration = "True" + spike_check_variable.affected_by_calibration = "False" + spike_check_variable.description = 'Check for spikes in interferometer caused by faulty electronics' + spike_check_variable.hides = '' + # Do the spike check separately for now + if 'spike_check' in frame.columns: + spike_check_variable[:] = frame.spike_check.values + else: + spike_check_variable[:] = 0 + # Write time information time[:] = frame.datetime.values @@ -179,8 +191,10 @@ def prepare_frame(cxs_file, sum_file, sci_dir): ), columns=['scene_index','sceneMirrorPosition']).reset_index().rename(columns={'index':'cxs_index'}) igms = pd.merge(igms, mapping_from_scene_to_record, how='inner', on=['sceneMirrorPosition','scene_index']) # Add columns from igms, notably DataA, DataB + cal_graph = frame.calibration_graph frame = frame.merge(igms, on=['cxs_index'], how='outer', suffixes=('','_igm'), sort=True) frame['sceneMirrorPosition'].fillna(frame.sceneMirrorPosition_igm, inplace=True) + frame.calibration_graph = cal_graph return frame @@ -193,7 +207,7 @@ def update_all(ftp_dir, sci_dir, checklist, parameters=None): # For each CXS file find a matching SUM file and possible QC filename for qc_file, cxs_file, sum_file in files_to_update(cxs_files): print('Performing quality control for {}'.format(cxs_file)) - frame = prepare_frame(cxs_file, sum_file, sci_dir, mirror_beg) + frame = prepare_frame(cxs_file, sum_file, sci_dir) if parameters is None: parameters = {}