From b1cf2235c4c0fc673e46ecedc088851b1a5f91b0 Mon Sep 17 00:00:00 2001 From: "(no author)" <(no author)@8a9318a1-56ba-4d59-b755-99d26321be01> Date: Thu, 20 May 2010 16:34:55 +0000 Subject: [PATCH] changing how bin/tuple reordering is done git-svn-id: https://svn.ssec.wisc.edu/repos/glance/trunk@112 8a9318a1-56ba-4d59-b755-99d26321be01 --- pyglance/glance/delta.py | 194 +++++++++++++++++++++---------- pyglance/glance/plotcreatefns.py | 19 ++- 2 files changed, 145 insertions(+), 68 deletions(-) diff --git a/pyglance/glance/delta.py b/pyglance/glance/delta.py index ae4d85e..f705326 100644 --- a/pyglance/glance/delta.py +++ b/pyglance/glance/delta.py @@ -141,80 +141,146 @@ def convert_mag_dir_to_U_V_vector(magnitude_data, direction_data, invalidMask=No # ------------- bin/tuple related functions -------------------- -# a method to make a list of index numbers for reordering a multi-dimensional array -def _make_new_index_list(numberOfIndexes, firstIndexNumber=0, lastIndexNumber=None) : +class BinTupleMapping (object) : """ - the first and last index numbers represent the dimensions you want to be first and last (respectively) - when the list is reordered; any other indexes will retain their relative ordering - - newIndexList = _make_new_index_list(numIndexes, binIndex, tupleIndex) + This class represents a bin / tuple data remapping. + It encapsulates information about the dimensions that are considered the + bin and tuple dimensions and is able to transform data into the + [bin][case][tuple] form. It also allows for the reverse calculation of + indexes so that you can recreate positioning information in the original + data set based on the new shape of the case dimension. """ - if lastIndexNumber is None: - lastIndexNumber = numberOfIndexes - 1 - - newIndexList = range(numberOfIndexes) - maxSpecial = max(firstIndexNumber, lastIndexNumber) - minSpecial = min(firstIndexNumber, lastIndexNumber) - del(newIndexList[maxSpecial]) - del(newIndexList[minSpecial]) - newIndexList = [firstIndexNumber] + newIndexList + [lastIndexNumber] - - return newIndexList - -def reorder_for_bin_tuple (data, binIndexNumber, tupleIndexNumber) : """ - reorder the data given so that the bin index is first, the tuple index is last, - and any additional dimensions are flattened into a middle "case" index - - the reordered data and the shape of flattened case indexes will be returned - (note if the original data was only 2 dimensional, None will be returned for the - shape of the flattened case indexes, since there were no other dimensions to flatten) - """ - - # put the bin and tuple dimensions in the correct places - newIndexList = _make_new_index_list(len(data.shape), binIndexNumber, tupleIndexNumber) - newData = data.transpose(newIndexList) + internal instance variables: - # get the shape information on the internal dimensions we're going to combine - caseOriginalShape = newData.shape[1:-1] + bin_dimension_index - the original index of the bin dimension + tuple_dimension_index - the original index of the tuple dimension - # combine the internal dimensions, to figure out what shape things - # will be with the flattened cases - sizeAfterFlattened = np.multiply.accumulate(caseOriginalShape)[-1] - newShape = (newData.shape[0], sizeAfterFlattened, newData.shape[-1]) + original_data_shape - the shape the data was before it was reordered + new_data_shape - the data shape after it's been reordered - # flatten the case dimensions - newData = newData.reshape(newShape) + new_index_order - a mapping that lists the order of the new dimension indexes + original_case_shape - the shape of the case dimension(s) before being flattened + reverse_case_index - a reverse index for finding the original positions of + flattened case indexes - # TODO, remove once this is tested - #print ('original data shape: ' + str(data.shape)) - #print ('original case shape: ' + str(caseOriginalShape)) - #print ('new data shape: ' + str(newData.shape)) - - return newData, caseOriginalShape - -def determine_case_indecies (flatIndex, originalCaseShape) : - """ - determine the original indexes of the case - given the flat index number and the original shape - - Note: this method is very memory inefficent - TODO, find a better way of doing this? does numpy guarantee reshaping strategy? + TODO, in the long run, find a way to get rid of the reverse_case_index """ - # create a long flat array with the contents being the index number - numCases = np.multiply.accumulate(originalCaseShape)[-1] - temp = np.array(range(numCases)) - - # reshape the flat array back to the original shape - # then figure out where our index went - temp = temp.reshape(originalCaseShape) - positionOfIndex = np.where(temp == flatIndex) - - del temp - - return positionOfIndex + def __init__ (self, dataShape, binIndexNumber=0, tupleIndexNumber=None) : + """ + Given information on the original data and the desired bin/tuple, + build the mapping object + """ + + # minimally, we need to have a shape + assert(dataShape is not None) + + # get the number of dimensions present in our data + numberOfDimensions = len(dataShape) + + # is our shape ok? + assert(numberOfDimensions >=2) + + self.original_data_shape = dataShape + + # set up our tuple if it wasn't selected + if (tupleIndexNumber is None) : + tupleIndexNumber = numberOfDimensions - 1 + + # are the bin and tuple ok? + assert(binIndexNumber is not None) + assert(binIndexNumber >= 0) + assert(binIndexNumber < numberOfDimensions) + assert(tupleIndexNumber >= 0) + assert(tupleIndexNumber < numberOfDimensions) + + self.bin_dimension_index = binIndexNumber + self.tuple_dimension_index = tupleIndexNumber + + # get the new index ordering for the data # TODO, bring call into class + self.new_index_order = BinTupleMapping._make_new_index_list(numberOfDimensions, + self.bin_dimension_index, + self.tuple_dimension_index) + temp_data_shape = [ ] + for index in self.new_index_order: + temp_data_shape = temp_data_shape + [dataShape[index]] + temp_data_shape = tuple(temp_data_shape) + """ + temp_data_shape = np.array(dataShape).transpose(self.new_index_order) + """ + self.original_case_shape = temp_data_shape[1:-1] + + # figure out the new size with the flattened cases + number_of_cases = np.multiply.accumulate(self.original_case_shape)[-1] + self.new_data_shape = (temp_data_shape[0], number_of_cases, temp_data_shape[-1]) + + # build the reverse index for looking up flat case indexes + self.reverse_case_index = np.arange(number_of_cases).reshape(self.original_case_shape) + + @staticmethod + def _make_new_index_list(numberOfIndexes, firstIndexNumber, lastIndexNumber) : + """ + a utility method to make a list of index numbers for reordering a + multi-dimensional array + + the first and last index numbers represent the dimensions you want to be + first and last (respectively) when the list is reordered; any other indexes + will retain their relative ordering + + Note: This is a private method of the BinTupleMapping class and assumes + that the index numbers passed to it will have been preverified to be + acceptable. + """ + + # make the new list + newIndexList = range(numberOfIndexes) + + # remove our two "important" indexes, in the correct order + maxSpecial = max(firstIndexNumber, lastIndexNumber) + minSpecial = min(firstIndexNumber, lastIndexNumber) + del(newIndexList[maxSpecial]) + del(newIndexList[minSpecial]) + + # add our two important indexes back into the list in their new places + newIndexList = [firstIndexNumber] + newIndexList + [lastIndexNumber] + + return newIndexList + + def reorder_for_bin_tuple (self, data) : + """ + reorder the data so that the bin index is first, the tuple index is last, + and any additional dimensions are flattened into a middle "case" index + + the reordered data and the shape of flattened case indexes will be returned + (note: the shape of the data must match the shape with which the BinTupleMatching + object was originally constructed) + """ + + assert(data.shape == self.original_data_shape) + + # put the bin and tuple dimensions in the correct places + newData = data.transpose(self.new_index_order) + + # flatten the case dimensions + newData = newData.reshape(self.new_data_shape) + + return newData + + def determine_case_indecies (self, flatIndex) : + """ + determine the original indexes of the case from the flat case index number + + Note: this method requires the object to hold a large data structure + TODO, find a better way of doing this? does numpy guarantee reshaping strategy? + TODO, can I find information on reshape and do this with pure math? + """ + + # find the flat index in our reverse case index + positionOfIndex = np.where(self.reverse_case_index == flatIndex) + + return positionOfIndex if __name__=='__main__': import doctest diff --git a/pyglance/glance/plotcreatefns.py b/pyglance/glance/plotcreatefns.py index 465b158..6cfd744 100644 --- a/pyglance/glance/plotcreatefns.py +++ b/pyglance/glance/plotcreatefns.py @@ -752,6 +752,17 @@ class BinTupleAnalysisFunctionFactory (PlottingFunctionFactory) : assert(tupleIndex < len(aData.shape)) # reorder and reshape our data into the [bin][case][tuple] form + reorderMapObject = delta.BinTupleMapping(aData.shape, binIndexNumber=binIndex, tupleIndexNumber=tupleIndex) + aData = reorderMapObject.reorder_for_bin_tuple(aData) + bData = reorderMapObject.reorder_for_bin_tuple(bData) + goodInAMask = reorderMapObject.reorder_for_bin_tuple(goodInAMask) + goodInBMask = reorderMapObject.reorder_for_bin_tuple(goodInBMask) + absDiffData = reorderMapObject.reorder_for_bin_tuple(absDiffData) + rawDiffData = reorderMapObject.reorder_for_bin_tuple(rawDiffData) + goodInBothMask = reorderMapObject.reorder_for_bin_tuple(goodInBothMask) + troubleMask = reorderMapObject.reorder_for_bin_tuple(troubleMask) + outsideEpsilonMask = reorderMapObject.reorder_for_bin_tuple(outsideEpsilonMask) + """ aData, caseInfo1 = delta.reorder_for_bin_tuple(aData, binIndex, tupleIndex) bData, caseInfo2 = delta.reorder_for_bin_tuple(bData, binIndex, tupleIndex) goodInAMask, caseInfo3 = delta.reorder_for_bin_tuple(goodInAMask, binIndex, tupleIndex) @@ -772,7 +783,7 @@ class BinTupleAnalysisFunctionFactory (PlottingFunctionFactory) : assert(caseInfo6 == caseInfo7) assert(caseInfo7 == caseInfo8) assert(caseInfo8 == caseInfo9) - + """ # our list of functions that will later create the plots functionsToReturn = { } @@ -807,8 +818,8 @@ class BinTupleAnalysisFunctionFactory (PlottingFunctionFactory) : tempFiniteMap = np.isfinite(rmsDiffValues) # figure out the min/max rms diff values - minRMSDiff = min(rmsDiffValues[tempFiniteMap]) - maxRMSDiff = max(rmsDiffValues[tempFiniteMap]) + minRMSDiff = np.min(rmsDiffValues[tempFiniteMap]) + maxRMSDiff = np.max(rmsDiffValues[tempFiniteMap]) # sort the cases by their rms diff values counts = np.zeros(numHistogramSections) @@ -836,7 +847,7 @@ class BinTupleAnalysisFunctionFactory (PlottingFunctionFactory) : caseNumber = listOfCases[random.randint(0, len(listOfCases) - 1)] # make lineplot functions for the example cases - caseIndexes = delta.determine_case_indecies(caseNumber, caseInfo1) + caseIndexes = reorderMapObject.determine_case_indecies(caseNumber) caseNumText = '' for caseIndex in caseIndexes : caseNumText = caseNumText + str(caseIndex) -- GitLab