From 9713d098a5bae3070ac6bf12a5b151a38b7a2d27 Mon Sep 17 00:00:00 2001
From: Coda Phillips <cphillips@sean.ssec.wisc.edu>
Date: Fri, 26 Aug 2016 19:43:36 -0500
Subject: [PATCH] Merge CXS and SUM by imputing scene mirror positions and
 removing calibrated scenes

---
 main.py | 66 +++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 18 deletions(-)

diff --git a/main.py b/main.py
index f2aec36..353075d 100644
--- a/main.py
+++ b/main.py
@@ -52,28 +52,58 @@ def compute_calibration_graph(sceneMirrorPosition):
             affected_records[blackbody_index].update(sky_view_indices)
     return affected_records
 
+def fill_na_sceneMirrorPosition(sceneMirrorPosition):
+    if not sceneMirrorPosition.isnull().any():
+        return sceneMirrorPosition
+    datetime_index = sceneMirrorPosition.index
+    # Reset the index so NaTs don't mess things up
+    sceneMirrorPosition = sceneMirrorPosition.reset_index(drop=True)
+    # Keep shifting until everything matches up except missing scenes
+    for i in range(1, len(sceneMirrorPosition)):
+        if abs(sceneMirrorPosition - sceneMirrorPosition.shift(i)).dropna().sum() == 0:
+            period = i
+            break
+    else:
+        return sceneMirrorPosition
+    # Now try to fill with time-shifted versions of the same variable
+    for i in np.arange(1, len(sceneMirrorPosition)/period+1):
+        sceneMirrorPosition.fillna(sceneMirrorPosition.shift(i*period), inplace=True)
+        sceneMirrorPosition.fillna(sceneMirrorPosition.shift(-i*period), inplace=True)
+        if not sceneMirrorPosition.isnull().any():
+            break
+
+    sceneMirrorPosition.index = datetime_index
+
+    return sceneMirrorPosition
+    
+
 def read_frame(cxs_file, sum_file):
     """
     Read housekeeping from CXS file and SUM file together
 
     Returns DataFrame with range index, datetime column, and sum_index column, and housekeeping data
     """
-    # Get CXS housekeeping as dataframe
+    # Get CXS housekeeping as dataframe, remove datetime index into a column and replace with integers
     cxs = get_all_housekeeping(cxs_file)
-    # Save the record numbers for future use
-    cxs['cxs_index'] = np.arange(len(cxs))
-    # missing records will appear as rows with NaT index, clear them
-    cxs = cxs.ix[pd.notnull(cxs.index)]
+    cxs.index.name = 'datetime'
+    cxs.reset_index(inplace=True)
+    cxs.index.name = 'cxs_index'
+    # Fill in any missing scenesMirrorPositions
+    cxs['sceneMirrorPosition'] = fill_na_sceneMirrorPosition(cxs.sceneMirrorPosition)
+    # Find all non-calibration views
+    non_cal_records = cxs.ix[~cxs.sceneMirrorPosition.isin([ord('H'), ord('A')])].copy()
+    non_cal_records.reset_index(inplace=True)
+    non_cal_records.index.name = 'sum_index'
 
     # Read SUM as well
     sum_ = get_all_housekeeping(sum_file)
-    sum_['sum_index'] = np.arange(len(sum_))
-    sum_ = sum_.ix[pd.notnull(sum_.index)]
+    sum_.index.name = 'datetime'
+    sum_.reset_index(inplace=True)
+    sum_.index.name = 'sum_index'
     
-    # Combine extra data from SUM into CXS, many columns will have during calibration views
-    hk = cxs.combine_first(sum_)
-    hk.index.name = 'datetime'
-    hk = hk.reset_index()
+    # Combine extra data from SUM into CXS
+    non_cal_records = non_cal_records.combine_first(sum_)
+    hk = cxs.combine_first(non_cal_records.reset_index().set_index('cxs_index'))
     hk.calibration_graph = compute_calibration_graph(hk.sceneMirrorPosition)
     return hk
 
@@ -134,18 +164,18 @@ def update_all(ftp_dir, sci_dir, checklist, parameters=None):
             parameters = {}
         # check for spikes in interferograms and add that quality column to housekeeping frame
         # merging by datetimes is not expected to work, will probably interleave with SUM records
-        frame_with_spikes = frame.merge(spike_check(igms, parameters), on='datetime', how='outer', suffixes=('','_igm'), sort=True)
+        #frame_with_spikes = frame.merge(spike_check(igms, parameters), on='datetime', how='outer', suffixes=('','_igm'))
         # Propogate spike data to surrounding records
         # Only propogate presence of spikes, not abscence
-        frame_with_spikes.ix[frame_with_spikes.spike_check == False] = pd.np.nan
-        frame_with_spikes['spike_check'] = frame_with_spikes.spike_check.ffill(limit=1).bfill(limit=1)
+        #frame_with_spikes.ix[frame_with_spikes.spike_check == False] = pd.np.nan
+        #frame_with_spikes['spike_check'] = frame_with_spikes.spike_check.ffill(limit=1).bfill(limit=1)
         # Reindex back to housekeeping frame (union of sum and cxs records), removing interleaved spike data
-        frame_with_spikes = frame_with_spikes.ix[frame.index]
-        frame_with_spikes.calibration_graph = frame.calibration_graph
+        #frame_with_spikes = frame_with_spikes.ix[frame.index]
+        #frame_with_spikes.calibration_graph = frame.calibration_graph
         # Perform qc on housekeeping frame
-        frame_with_spikes = check_frame(frame_with_spikes, parameters, checklist)
+        frame = check_frame(frame, parameters, checklist)
 
-        save_quality(frame_with_spikes, qc_file)
+        save_quality(frame, qc_file)
 
 def files_to_update(cxs_files, update_only=True):
     """
-- 
GitLab