From 9713d098a5bae3070ac6bf12a5b151a38b7a2d27 Mon Sep 17 00:00:00 2001 From: Coda Phillips <cphillips@sean.ssec.wisc.edu> Date: Fri, 26 Aug 2016 19:43:36 -0500 Subject: [PATCH] Merge CXS and SUM by imputing scene mirror positions and removing calibrated scenes --- main.py | 66 +++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 18 deletions(-) diff --git a/main.py b/main.py index f2aec36..353075d 100644 --- a/main.py +++ b/main.py @@ -52,28 +52,58 @@ def compute_calibration_graph(sceneMirrorPosition): affected_records[blackbody_index].update(sky_view_indices) return affected_records +def fill_na_sceneMirrorPosition(sceneMirrorPosition): + if not sceneMirrorPosition.isnull().any(): + return sceneMirrorPosition + datetime_index = sceneMirrorPosition.index + # Reset the index so NaTs don't mess things up + sceneMirrorPosition = sceneMirrorPosition.reset_index(drop=True) + # Keep shifting until everything matches up except missing scenes + for i in range(1, len(sceneMirrorPosition)): + if abs(sceneMirrorPosition - sceneMirrorPosition.shift(i)).dropna().sum() == 0: + period = i + break + else: + return sceneMirrorPosition + # Now try to fill with time-shifted versions of the same variable + for i in np.arange(1, len(sceneMirrorPosition)/period+1): + sceneMirrorPosition.fillna(sceneMirrorPosition.shift(i*period), inplace=True) + sceneMirrorPosition.fillna(sceneMirrorPosition.shift(-i*period), inplace=True) + if not sceneMirrorPosition.isnull().any(): + break + + sceneMirrorPosition.index = datetime_index + + return sceneMirrorPosition + + def read_frame(cxs_file, sum_file): """ Read housekeeping from CXS file and SUM file together Returns DataFrame with range index, datetime column, and sum_index column, and housekeeping data """ - # Get CXS housekeeping as dataframe + # Get CXS housekeeping as dataframe, remove datetime index into a column and replace with integers cxs = get_all_housekeeping(cxs_file) - # Save the record numbers for future use - cxs['cxs_index'] = np.arange(len(cxs)) - # missing records will appear as rows with NaT index, clear them - cxs = cxs.ix[pd.notnull(cxs.index)] + cxs.index.name = 'datetime' + cxs.reset_index(inplace=True) + cxs.index.name = 'cxs_index' + # Fill in any missing scenesMirrorPositions + cxs['sceneMirrorPosition'] = fill_na_sceneMirrorPosition(cxs.sceneMirrorPosition) + # Find all non-calibration views + non_cal_records = cxs.ix[~cxs.sceneMirrorPosition.isin([ord('H'), ord('A')])].copy() + non_cal_records.reset_index(inplace=True) + non_cal_records.index.name = 'sum_index' # Read SUM as well sum_ = get_all_housekeeping(sum_file) - sum_['sum_index'] = np.arange(len(sum_)) - sum_ = sum_.ix[pd.notnull(sum_.index)] + sum_.index.name = 'datetime' + sum_.reset_index(inplace=True) + sum_.index.name = 'sum_index' - # Combine extra data from SUM into CXS, many columns will have during calibration views - hk = cxs.combine_first(sum_) - hk.index.name = 'datetime' - hk = hk.reset_index() + # Combine extra data from SUM into CXS + non_cal_records = non_cal_records.combine_first(sum_) + hk = cxs.combine_first(non_cal_records.reset_index().set_index('cxs_index')) hk.calibration_graph = compute_calibration_graph(hk.sceneMirrorPosition) return hk @@ -134,18 +164,18 @@ def update_all(ftp_dir, sci_dir, checklist, parameters=None): parameters = {} # check for spikes in interferograms and add that quality column to housekeeping frame # merging by datetimes is not expected to work, will probably interleave with SUM records - frame_with_spikes = frame.merge(spike_check(igms, parameters), on='datetime', how='outer', suffixes=('','_igm'), sort=True) + #frame_with_spikes = frame.merge(spike_check(igms, parameters), on='datetime', how='outer', suffixes=('','_igm')) # Propogate spike data to surrounding records # Only propogate presence of spikes, not abscence - frame_with_spikes.ix[frame_with_spikes.spike_check == False] = pd.np.nan - frame_with_spikes['spike_check'] = frame_with_spikes.spike_check.ffill(limit=1).bfill(limit=1) + #frame_with_spikes.ix[frame_with_spikes.spike_check == False] = pd.np.nan + #frame_with_spikes['spike_check'] = frame_with_spikes.spike_check.ffill(limit=1).bfill(limit=1) # Reindex back to housekeeping frame (union of sum and cxs records), removing interleaved spike data - frame_with_spikes = frame_with_spikes.ix[frame.index] - frame_with_spikes.calibration_graph = frame.calibration_graph + #frame_with_spikes = frame_with_spikes.ix[frame.index] + #frame_with_spikes.calibration_graph = frame.calibration_graph # Perform qc on housekeeping frame - frame_with_spikes = check_frame(frame_with_spikes, parameters, checklist) + frame = check_frame(frame, parameters, checklist) - save_quality(frame_with_spikes, qc_file) + save_quality(frame, qc_file) def files_to_update(cxs_files, update_only=True): """ -- GitLab