From b1cf2235c4c0fc673e46ecedc088851b1a5f91b0 Mon Sep 17 00:00:00 2001
From: "(no author)" <(no author)@8a9318a1-56ba-4d59-b755-99d26321be01>
Date: Thu, 20 May 2010 16:34:55 +0000
Subject: [PATCH] changing how bin/tuple reordering is done

git-svn-id: https://svn.ssec.wisc.edu/repos/glance/trunk@112 8a9318a1-56ba-4d59-b755-99d26321be01
---
 pyglance/glance/delta.py         | 194 +++++++++++++++++++++----------
 pyglance/glance/plotcreatefns.py |  19 ++-
 2 files changed, 145 insertions(+), 68 deletions(-)

diff --git a/pyglance/glance/delta.py b/pyglance/glance/delta.py
index ae4d85e..f705326 100644
--- a/pyglance/glance/delta.py
+++ b/pyglance/glance/delta.py
@@ -141,80 +141,146 @@ def convert_mag_dir_to_U_V_vector(magnitude_data, direction_data, invalidMask=No
 
 # ------------- bin/tuple related functions --------------------
 
-# a method to make a list of index numbers for reordering a multi-dimensional array
-def _make_new_index_list(numberOfIndexes, firstIndexNumber=0, lastIndexNumber=None) :
+class BinTupleMapping (object) :
     """
-    the first and last index numbers represent the dimensions you want to be first and last (respectively)
-    when the list is reordered; any other indexes will retain their relative ordering
-    
-    newIndexList = _make_new_index_list(numIndexes, binIndex, tupleIndex)
+    This class represents a bin / tuple data remapping.
+    It encapsulates information about the dimensions that are considered the
+    bin and tuple dimensions and is able to transform data into the
+    [bin][case][tuple] form. It also allows for the reverse calculation of
+    indexes so that you can recreate positioning information in the original
+    data set based on the new shape of the case dimension. 
     """
     
-    if lastIndexNumber is None:
-        lastIndexNumber = numberOfIndexes - 1
-    
-    newIndexList = range(numberOfIndexes)
-    maxSpecial   = max(firstIndexNumber, lastIndexNumber)
-    minSpecial   = min(firstIndexNumber, lastIndexNumber)
-    del(newIndexList[maxSpecial])
-    del(newIndexList[minSpecial])
-    newIndexList = [firstIndexNumber] + newIndexList + [lastIndexNumber]
-    
-    return newIndexList
-
-def reorder_for_bin_tuple (data, binIndexNumber, tupleIndexNumber) :
     """
-    reorder the data given so that the bin index is first, the tuple index is last,
-    and any additional dimensions are flattened into a middle "case" index
-    
-    the reordered data and the shape of flattened case indexes will be returned
-    (note if the original data was only 2 dimensional, None will be returned for the
-    shape of the flattened case indexes, since there were no other dimensions to flatten)
-    """
-    
-    # put the bin and tuple dimensions in the correct places
-    newIndexList = _make_new_index_list(len(data.shape), binIndexNumber, tupleIndexNumber)
-    newData = data.transpose(newIndexList)
+    internal instance variables:
     
-    # get the shape information on the internal dimensions we're going to combine
-    caseOriginalShape = newData.shape[1:-1]
+    bin_dimension_index   - the original index of the bin dimension
+    tuple_dimension_index - the original index of the tuple dimension
     
-    # combine the internal dimensions, to figure out what shape things
-    # will be with the flattened cases
-    sizeAfterFlattened = np.multiply.accumulate(caseOriginalShape)[-1]
-    newShape = (newData.shape[0], sizeAfterFlattened, newData.shape[-1])
+    original_data_shape   - the shape the data was before it was reordered
+    new_data_shape        - the data shape after it's been reordered
     
-    # flatten the case dimensions
-    newData = newData.reshape(newShape)
+    new_index_order       - a mapping that lists the order of the new dimension indexes
+    original_case_shape   - the shape of the case dimension(s) before being flattened
+    reverse_case_index    - a reverse index for finding the original positions of
+                            flattened case indexes
     
-    # TODO, remove once this is tested
-    #print ('original data shape: ' + str(data.shape))
-    #print ('original case shape: ' + str(caseOriginalShape))
-    #print ('new data shape:      ' + str(newData.shape))
-    
-    return newData, caseOriginalShape
-
-def determine_case_indecies (flatIndex, originalCaseShape) :
-    """
-    determine the original indexes of the case
-    given the flat index number and the original shape
-    
-    Note: this method is very memory inefficent
-    TODO, find a better way of doing this? does numpy guarantee reshaping strategy?
+    TODO, in the long run, find a way to get rid of the reverse_case_index
     """
     
-    # create a long flat array with the contents being the index number
-    numCases = np.multiply.accumulate(originalCaseShape)[-1]
-    temp = np.array(range(numCases))
-    
-    # reshape the flat array back to the original shape
-    # then figure out where our index went
-    temp = temp.reshape(originalCaseShape)
-    positionOfIndex = np.where(temp == flatIndex)
-    
-    del temp
-    
-    return positionOfIndex
+    def __init__ (self, dataShape, binIndexNumber=0, tupleIndexNumber=None) :
+        """
+        Given information on the original data and the desired bin/tuple,
+        build the mapping object
+        """
+        
+        # minimally, we need to have a shape
+        assert(dataShape is not None)
+        
+        # get the number of dimensions present in our data
+        numberOfDimensions = len(dataShape)
+        
+        # is our shape ok?
+        assert(numberOfDimensions >=2)
+        
+        self.original_data_shape = dataShape
+        
+        # set up our tuple if it wasn't selected
+        if (tupleIndexNumber is None) :
+            tupleIndexNumber = numberOfDimensions - 1
+        
+        # are the bin and tuple ok?
+        assert(binIndexNumber is not None)
+        assert(binIndexNumber   >= 0)
+        assert(binIndexNumber   < numberOfDimensions)
+        assert(tupleIndexNumber >= 0)
+        assert(tupleIndexNumber < numberOfDimensions)
+        
+        self.bin_dimension_index   = binIndexNumber
+        self.tuple_dimension_index = tupleIndexNumber
+        
+        # get the new index ordering for the data # TODO, bring call into class
+        self.new_index_order     = BinTupleMapping._make_new_index_list(numberOfDimensions,
+                                                                        self.bin_dimension_index,
+                                                                        self.tuple_dimension_index)
+        temp_data_shape = [ ]
+        for index in self.new_index_order:
+            temp_data_shape = temp_data_shape + [dataShape[index]]
+        temp_data_shape = tuple(temp_data_shape)
+        """
+        temp_data_shape          = np.array(dataShape).transpose(self.new_index_order)
+        """
+        self.original_case_shape = temp_data_shape[1:-1]
+        
+        # figure out the new size with the flattened cases
+        number_of_cases     = np.multiply.accumulate(self.original_case_shape)[-1]
+        self.new_data_shape = (temp_data_shape[0], number_of_cases, temp_data_shape[-1])
+        
+        # build the reverse index for looking up flat case indexes
+        self.reverse_case_index = np.arange(number_of_cases).reshape(self.original_case_shape)
+    
+    @staticmethod
+    def _make_new_index_list(numberOfIndexes, firstIndexNumber, lastIndexNumber) :
+        """
+        a utility method to make a list of index numbers for reordering a
+        multi-dimensional array
+        
+        the first and last index numbers represent the dimensions you want to be
+        first and last (respectively) when the list is reordered; any other indexes
+        will retain their relative ordering
+        
+        Note: This is a private method of the BinTupleMapping class and assumes
+        that the index numbers passed to it will have been preverified to be
+        acceptable.
+        """
+        
+        # make the new list
+        newIndexList = range(numberOfIndexes)
+        
+        # remove our two "important" indexes, in the correct order
+        maxSpecial   = max(firstIndexNumber, lastIndexNumber)
+        minSpecial   = min(firstIndexNumber, lastIndexNumber)
+        del(newIndexList[maxSpecial])
+        del(newIndexList[minSpecial])
+        
+        # add our two important indexes back into the list in their new places
+        newIndexList = [firstIndexNumber] + newIndexList + [lastIndexNumber]
+        
+        return newIndexList
+    
+    def reorder_for_bin_tuple (self, data) :
+        """
+        reorder the data so that the bin index is first, the tuple index is last,
+        and any additional dimensions are flattened into a middle "case" index
+        
+        the reordered data and the shape of flattened case indexes will be returned
+        (note: the shape of the data must match the shape with which the BinTupleMatching
+        object was originally constructed)
+        """
+        
+        assert(data.shape == self.original_data_shape)
+        
+        # put the bin and tuple dimensions in the correct places
+        newData = data.transpose(self.new_index_order)
+        
+        # flatten the case dimensions
+        newData = newData.reshape(self.new_data_shape)
+        
+        return newData
+    
+    def determine_case_indecies (self, flatIndex) :
+        """
+        determine the original indexes of the case from the flat case index number
+        
+        Note: this method requires the object to hold a large data structure
+        TODO, find a better way of doing this? does numpy guarantee reshaping strategy?
+        TODO, can I find information on reshape and do this with pure math?
+        """
+        
+        # find the flat index in our reverse case index
+        positionOfIndex = np.where(self.reverse_case_index == flatIndex)
+        
+        return positionOfIndex
 
 if __name__=='__main__':
     import doctest
diff --git a/pyglance/glance/plotcreatefns.py b/pyglance/glance/plotcreatefns.py
index 465b158..6cfd744 100644
--- a/pyglance/glance/plotcreatefns.py
+++ b/pyglance/glance/plotcreatefns.py
@@ -752,6 +752,17 @@ class BinTupleAnalysisFunctionFactory (PlottingFunctionFactory) :
         assert(tupleIndex < len(aData.shape))
         
         # reorder and reshape our data into the [bin][case][tuple] form
+        reorderMapObject = delta.BinTupleMapping(aData.shape, binIndexNumber=binIndex, tupleIndexNumber=tupleIndex)
+        aData = reorderMapObject.reorder_for_bin_tuple(aData)
+        bData = reorderMapObject.reorder_for_bin_tuple(bData)
+        goodInAMask        = reorderMapObject.reorder_for_bin_tuple(goodInAMask)
+        goodInBMask        = reorderMapObject.reorder_for_bin_tuple(goodInBMask)
+        absDiffData        = reorderMapObject.reorder_for_bin_tuple(absDiffData)
+        rawDiffData        = reorderMapObject.reorder_for_bin_tuple(rawDiffData)
+        goodInBothMask     = reorderMapObject.reorder_for_bin_tuple(goodInBothMask)
+        troubleMask        = reorderMapObject.reorder_for_bin_tuple(troubleMask)
+        outsideEpsilonMask = reorderMapObject.reorder_for_bin_tuple(outsideEpsilonMask)
+        """
         aData,              caseInfo1 = delta.reorder_for_bin_tuple(aData,              binIndex, tupleIndex)
         bData,              caseInfo2 = delta.reorder_for_bin_tuple(bData,              binIndex, tupleIndex)
         goodInAMask,        caseInfo3 = delta.reorder_for_bin_tuple(goodInAMask,        binIndex, tupleIndex)
@@ -772,7 +783,7 @@ class BinTupleAnalysisFunctionFactory (PlottingFunctionFactory) :
         assert(caseInfo6 == caseInfo7)
         assert(caseInfo7 == caseInfo8)
         assert(caseInfo8 == caseInfo9)
-        
+        """
         # our list of functions that will later create the plots
         functionsToReturn = { }
         
@@ -807,8 +818,8 @@ class BinTupleAnalysisFunctionFactory (PlottingFunctionFactory) :
             tempFiniteMap = np.isfinite(rmsDiffValues)
             
             # figure out the min/max rms diff values
-            minRMSDiff = min(rmsDiffValues[tempFiniteMap])
-            maxRMSDiff = max(rmsDiffValues[tempFiniteMap])
+            minRMSDiff = np.min(rmsDiffValues[tempFiniteMap])
+            maxRMSDiff = np.max(rmsDiffValues[tempFiniteMap])
             
             # sort the cases by their rms diff values
             counts = np.zeros(numHistogramSections)
@@ -836,7 +847,7 @@ class BinTupleAnalysisFunctionFactory (PlottingFunctionFactory) :
                 caseNumber  = listOfCases[random.randint(0, len(listOfCases) - 1)]
                 
                 # make lineplot functions for the example cases
-                caseIndexes = delta.determine_case_indecies(caseNumber, caseInfo1)
+                caseIndexes = reorderMapObject.determine_case_indecies(caseNumber)
                 caseNumText = ''
                 for caseIndex in caseIndexes :
                     caseNumText = caseNumText + str(caseIndex)
-- 
GitLab