diff --git a/pyglance/glance/collocation.py b/pyglance/glance/collocation.py
index 35b1401a557c84901418a05f9af1c5cc353cb2ef..4c5ad9475a57bd130a7bfa23ec0dc26d71c639ae 100644
--- a/pyglance/glance/collocation.py
+++ b/pyglance/glance/collocation.py
@@ -34,20 +34,20 @@ class CollocationMapping :
     """
     
     """
-    number_of_matches       = -1
-    number_unmatchable_in_a = -1
-    number_unmatchable_in_b = -1
+    raw_lon_lat_matches  - the number of points matched, based on the longitude and latitude alone
+    raw_unmatchable_in_a - the number of unmacthed pointes in a, based on longitude and latitude alone
+    raw_unmatchable_in_b - the number of unmacthed pointes in b, based on longitude and latitude alone
     
-    a_point_mapping = None
-    b_point_mapping = None
+    a_point_mapping      - mapping of points in a to points in b; TODO give form
+    b_point_mapping      - mapping of points in b to points in a; TODO give form
     
-    matched_longitude = None
-    matched_latitude  = None
+    matched_longitude    - the list of matched longitude values
+    matched_latitude     - the list of matched latitude  values
     
-    unmatchable_longitude_a = None
-    unmatchable_latitude_a  = None
-    unmatchable_longitude_b = None
-    unmatchable_latitude_b  = None
+    unmatchable_longitude_a - the list of longitude values in a that could not be matched
+    unmatchable_latitude_a  - the list of latitude  values in a that could not be matched
+    unmatchable_longitude_b - the list of longitude values in b that could not be matched
+    unmatchable_latitude_b  - the list of latitude  values in b that could not be matched
     """
     
     def __init__(self,
@@ -71,7 +71,31 @@ class CollocationMapping :
         the match should be accepted
         """
         pass
+    
+    def _create_basic_mapping_from_lon_lat((alongitude, alatitude),
+                                           (blongitude, blatitude),
+                                           lonlatEpsilon,
+                                           invalidAMask=None, invalidBMask=None) :
+        """
+        match points together based on their longitude and latitude values
+        to match points must be within lonlatEpsilon degrees in both longitude and latitude
+        
+        if the longitude and latitude variables contain invalid data the invalidAMask and
+        invalidBMask should be passed with the appropriate masking to remove the invalid values
         
+        the return will be in the form of two dictionaries of points, one from a and one from b,
+        indexed on the index number in the A or B data where they can be found. Each entry will
+        consist of a list of:
+            [longitudeValue, latitudeValue, indexNumber, [list of matching indexes in the other set]]
+        
+        Note: the return will include all pairs of points that match,
+        this means an individual a or b point may be repeated if it matches
+        multiple points within the lonlatEpsilon provided
+        
+        Warning: This algorithm will fail to find all matching points if the lonlatEpsilon is set to a
+        value greater than or equal to 1.0 degrees. This is related to the bin size used for searching
+        thoretically the bin size could be corrected to scale with the lonlatEpsilon in the future. TODO
+        """
 
 def create_colocation_mapping_within_epsilon((alongitude, alatitude),
                                              (blongitude, blatitude),
diff --git a/pyglance/glance/compare.py b/pyglance/glance/compare.py
index a91d8fef3d67eee26e8e8106d4dcedd0011d3dfa..1783e571dc4f12036edf7e8afb5112c42725711f 100644
--- a/pyglance/glance/compare.py
+++ b/pyglance/glance/compare.py
@@ -460,12 +460,7 @@ def _check_lon_lat_equality(longitudeA, latitudeA,
     longitudeDiff = diffInfo.diff_data_object.data
     finiteLongitudeMask = diffInfo.diff_data_object.masks.valid_mask
     lon_not_equal_mask  = diffInfo.diff_data_object.masks.trouble_mask
-    """
-    longitudeDiff, finiteLongitudeMask, _, _, lon_not_equal_mask, _, _, _ = delta.diff(longitudeA, longitudeB,
-                                                                                       llepsilon,
-                                                                                       (None, None),
-                                                                                       (ignoreMaskA, ignoreMaskB))
-    """
+    
     aDataObject = dataobj.DataObject(latitudeA, ignoreMask=ignoreMaskA)
     bDataObject = dataobj.DataObject(latitudeB, ignoreMask=ignoreMaskB)
     diffInfo = dataobj.DiffInfoObject(aDataObject, bDataObject, epsilonValue=llepsilon) #TODO, needs epsilon percent
@@ -473,12 +468,6 @@ def _check_lon_lat_equality(longitudeA, latitudeA,
     latitudeDiff = diffInfo.diff_data_object.data
     finiteLatitudeMask = diffInfo.diff_data_object.masks.valid_mask
     lat_not_equal_mask = diffInfo.diff_data_object.masks.trouble_mask
-    """
-    latitudeDiff,  finiteLatitudeMask,  _, _, lat_not_equal_mask, _, _, _ = delta.diff(latitudeA,  latitudeB,
-                                                                                       llepsilon,
-                                                                                       (None, None),
-                                                                                       (ignoreMaskA, ignoreMaskB))
-    """
     
     lon_lat_not_equal_mask = lon_not_equal_mask | lat_not_equal_mask
     lon_lat_not_equal_points_count = sum(lon_lat_not_equal_mask)
diff --git a/pyglance/glance/data.py b/pyglance/glance/data.py
index 879871194a80771ff76a477ed30a197654cb1468..885319af18f564a9050c6a2a4a9b14802a45648d 100644
--- a/pyglance/glance/data.py
+++ b/pyglance/glance/data.py
@@ -105,7 +105,35 @@ class DataObject (object) :
         self.fill_value = fillValue
         self.masks      = BasicMaskSetObject(ignoreMask)
     
-    # TODO, analyze in issolation?
+    def self_analysis(self) :
+        """
+        Gather some basic information about a data set
+        """
+        
+        # hang onto the shape for convenience
+        shape = self.data.shape
+        
+        # if there isn't an ignore mask, make an empty one
+        if self.masks.ignore_mask is None :
+            self.masks.ignore_mask = np.zeros(shape, dtype=np.bool)
+        
+        # find the non-finite values
+        non_finite_mask = ~np.isfinite(self.data) & ~self.masks.ignore_mask
+        
+        # find and mark the missing values
+        missing_mask    = np.zeros(shape, dtype=np.bool)
+        # if the data has a fill value, mark where the missing data is
+        if self.fill_value is not None :
+            missing_mask[self.data == self.fill_value] = True
+            missing_mask[self.masks.ignore_mask]       = False
+        
+        # define the valid mask as places where the data is not missing,
+        # nonfinite, or ignored
+        valid_mask = ~ (missing_mask | non_finite_mask | self.masks.ignore_mask)
+        
+        # set our masks
+        self.masks = BasicMaskSetObject(self.masks.ignore_mask, valid_mask,
+                                        non_finite_mask, missing_mask)
 
 class DiffInfoObject (object) :
     """
@@ -133,126 +161,106 @@ class DiffInfoObject (object) :
         self.epsilon_value   = epsilonValue
         self.epsilon_percent = epsilonPercent
         
-        # diff the two data sets TODO, this doesn't use epsilon percent yet
-        raw_diff, valid_in_both, (valid_in_a_mask, valid_in_b_mask), trouble_pt_mask, outside_epsilon_mask,  \
-           (a_not_finite_mask, b_not_finite_mask), (a_missing_mask, b_missing_mask), (ignore_mask_a, ignore_mask_b) = \
-                            diff(aDataObject.data, bDataObject.data, epsilonValue,
-                                 (aDataObject.fill_value, bDataObject.fill_value),
-                                 (aDataObject.masks.ignore_mask, bDataObject.masks.ignore_mask))
-        
-        # set the various data in our two basic data objects
-        aDataObject.masks = BasicMaskSetObject(ignore_mask_a, valid_in_a_mask, a_not_finite_mask, a_missing_mask)
-        bDataObject.masks = BasicMaskSetObject(ignore_mask_b, valid_in_b_mask, b_not_finite_mask, b_missing_mask)
-        
-        # create our diff info object
-        self.diff_data_object = DataObject(raw_diff)
-        self.diff_data_object.masks = DiffMaskSetObject(ignore_mask_a | ignore_mask_b,
-                                                        valid_in_both, trouble_pt_mask, outside_epsilon_mask)
-
-# Upcasts to be used in difference computation to avoid overflow. Currently only unsigned
-# ints are upcast.
-# FUTURE: handle uint64s as well (there is no int128, so might have to detect overflow)
-datatype_upcasts = {
-    np.uint8:  np.int16,
-    np.uint16: np.int32,
-    np.uint32: np.int64
-    }
-
-# TODO, rethink how this works
-def _select_fill_data(dTypeValue) :
-    """
-    select a fill data value based on the type of data that is being
-    inspected/changed
-    """
-    
-    fill_value_to_return = None
-    
-    if np.issubdtype(dTypeValue, np.float) or np.issubdtype(dTypeValue, np.complex) :
-        fill_value_to_return = np.nan
-    elif np.issubdtype(dTypeValue, np.int) :
-        fill_value_to_return = np.iinfo(dTypeValue).min
-    elif np.issubdtype(dTypeValue, np.bool) :
-        fill_value_to_return = True
-    elif ((dTypeValue is np.uint8)  or
-          (dTypeValue is np.uint16) or
-          (dTypeValue is np.uint32) or
-          (dTypeValue is np.uint64)) :
-        fill_value_to_return = np.iinfo(dTypeValue).max
-    
-    return fill_value_to_return
-
-def diff(aData, bData, epsilon=0.,
-         (a_missing_value, b_missing_value)=(None, None),
-         (ignore_mask_a, ignore_mask_b)=(None, None)):
-    """
-    take two arrays of similar size and composition
-    if an ignoreMask is passed in values in the mask will not be analysed to
-    form the various return masks and the corresponding spots in the
-    "difference" return data array will contain fill values (selected
-    based on data type).
-    
-    return difference array filled with fill data where differences aren't valid,
-    good mask where values are finite in both a and b
-    trouble mask where missing values or nans don't match or delta > epsilon
-    (a-notfinite-mask, b-notfinite-mask)
-    (a-missing-mask, b-missing-mask)
-    """
-    shape = aData.shape
-    assert(bData.shape==shape)
-    assert(np.can_cast(aData.dtype, bData.dtype) or np.can_cast(bData.dtype, aData.dtype))
+        # analyze our data and get the difference object
+        self.diff_data_object = DiffInfoObject.analyze(aDataObject, bDataObject,
+                                                       epsilonValue, epsilonPercent)
     
-    # if the ignore masks do not exist, set them to include none of the data
-    if (ignore_mask_a is None) :
-        ignore_mask_a = np.zeros(shape,dtype=bool)
-    if (ignore_mask_b is None) :
-        ignore_mask_b = np.zeros(shape,dtype=bool)
+    # Upcasts to be used in difference computation to avoid overflow. Currently only unsigned
+    # ints are upcast.
+    # FUTURE: handle uint64s as well (there is no int128, so might have to detect overflow)
+    DATATYPE_UPCASTS = {
+        np.uint8:  np.int16,
+        np.uint16: np.int32,
+        np.uint32: np.int64
+        }
     
-    # deal with the basic masks
-    a_not_finite_mask, b_not_finite_mask = ~np.isfinite(aData) & ~ignore_mask_a, ~np.isfinite(bData) & ~ignore_mask_b
-    a_missing_mask, b_missing_mask = np.zeros(shape,dtype=bool), np.zeros(shape,dtype=bool)
-    # if we were given missing values, mark where they are in the data
-    if a_missing_value is not None:
-        a_missing_mask[aData == a_missing_value] = True
-        a_missing_mask[ignore_mask_a] = False # don't analyse the ignored values
-    if b_missing_value is not None:
-        b_missing_mask[bData == b_missing_value] = True
-        b_missing_mask[ignore_mask_b] = False # don't analyse the ignored values
-    
-    # build the comparison data that includes the "good" values
-    valid_in_a_mask = ~(a_not_finite_mask | a_missing_mask | ignore_mask_a)
-    valid_in_b_mask = ~(b_not_finite_mask | b_missing_mask | ignore_mask_b)
-    valid_in_both = valid_in_a_mask & valid_in_b_mask
-    
-    # figure out our shared data type
-    sharedType = aData.dtype
-    if (aData.dtype is not bData.dtype) :
-        sharedType = np.common_type(aData, bData)
-
-    # upcast if needed to avoid overflow in difference operation
-    if sharedType in datatype_upcasts:
-        sharedType = datatype_upcasts[sharedType]
-
-    LOG.debug('Shared data type that will be used for diff comparison: ' + str(sharedType))
-    
-    # construct our diff'ed array
-    raw_diff = np.zeros(shape, dtype=sharedType) #empty_like(aData)
-    
-    fill_data_value = _select_fill_data(sharedType)
-    
-    LOG.debug('current fill data value: ' + str(fill_data_value))
-    
-    raw_diff[~valid_in_both] = fill_data_value # throw away invalid data
-
-    # compute difference, using shared type in computation
-    raw_diff[valid_in_both] = bData[valid_in_both].astype(sharedType) - aData[valid_in_both].astype(sharedType)
+    @staticmethod
+    def _get_shared_type_and_fill_value(data1, data2, fill1=None, fill2=None) :
+        """
+        Figure out a shared type that can be used when adding or subtracting
+        the two data sets given (accounting for possible overflow)
+        Also returns a fill value that can be used.
+        """
+        
+        # figure out the shared type
+        type_to_return = data1.dtype
+        if data1.dtype is not data2.dtype:
+            type_to_return = np.common_type(data1, data2)
+        
+        # upcast the type if we need to
+        if type_to_return in DiffInfoObject.DATATYPE_UPCASTS :
+            type_to_return = DiffInfoObject.DATATYPE_UPCASTS[type_to_return]
+            LOG.debug('To prevent overflow, difference data will be upcast from ('
+                      + str(data1.dtype) + '/' + str(data2.dtype) + ') to: ' + str(type_to_return))
+        
+        # figure out the fill value
+        fill_value_to_return = None
+        
+        # if we're looking at float or complex data, use a nan
+        if (np.issubdtype(type_to_return, np.float) or
+            np.issubdtype(type_to_return, np.complex)) :
+            fill_value_to_return = np.nan
         
-    # the valid data which is too different between the two sets according to the given epsilon
-    outside_epsilon_mask = (abs(raw_diff) > epsilon) & valid_in_both
-    # trouble points = mismatched nans, mismatched missing-values, differences that are too large 
-    trouble_pt_mask = (a_not_finite_mask ^ b_not_finite_mask) | (a_missing_mask ^ b_missing_mask) | outside_epsilon_mask
+        # if we're looking at int data, use the minimum value
+        elif np.issubdtype(type_to_return, np.int) :
+            fill_value_to_return = np.iinfo(type_to_return).min
+        
+        # if we're looking at unsigned data, use the maximum value
+        elif ((type_to_return is np.uint8)  or
+              (type_to_return is np.uint16) or
+              (type_to_return is np.uint32) or
+              (type_to_return is np.uint64)) :
+            fill_value_to_return = np.iinfo(type_to_return).max
+        
+        return type_to_return, fill_value_to_return
     
-    return raw_diff, valid_in_both, (valid_in_a_mask, valid_in_b_mask), trouble_pt_mask, outside_epsilon_mask,  \
-           (a_not_finite_mask, b_not_finite_mask), (a_missing_mask, b_missing_mask), (ignore_mask_a, ignore_mask_b)
+    @staticmethod
+    def analyze(aDataObject, bDataObject,
+                epsilonValue=0.0, epsilonPercent=None):
+        """
+        analyze the differences between the two data sets
+        updates the two data objects with additional masks
+        and returns data object containing diff data and masks
+        """
+        shape = aDataObject.data.shape
+        assert(bDataObject.data.shape == shape)
+        assert(np.can_cast(aDataObject.data.dtype, bDataObject.data.dtype) or
+               np.can_cast(bDataObject.data.dtype, aDataObject.data.dtype))
+        
+        # do some basic analysis on the individual data sets
+        aDataObject.self_analysis()
+        bDataObject.self_analysis()
+        
+        # where is the shared valid data?
+        valid_in_both  = aDataObject.masks.valid_mask  & bDataObject.masks.valid_mask
+        ignore_in_both = aDataObject.masks.ignore_mask | bDataObject.masks.ignore_mask
+        
+        # get our shared data type and fill value
+        sharedType, fill_data_value = DiffInfoObject._get_shared_type_and_fill_value(aDataObject.data,
+                                                                                     bDataObject.data,
+                                                                                     aDataObject.fill_value,
+                                                                                     bDataObject.fill_value)
+        
+        # construct our diff'ed data set
+        raw_diff = np.zeros(shape, dtype=sharedType)
+        raw_diff[~valid_in_both] = fill_data_value # throw away invalid data
+        # compute difference, using shared type in computation
+        raw_diff[valid_in_both] = bDataObject.data[valid_in_both].astype(sharedType) -  \
+                                  aDataObject.data[valid_in_both].astype(sharedType)
+        
+        # the valid data which is too different between the two sets according to the given epsilon
+        outside_epsilon_mask = (abs(raw_diff) > epsilonValue) & valid_in_both
+        # trouble points = mismatched nans, mismatched missing-values, differences that are too large 
+        trouble_pt_mask = ( (aDataObject.masks.non_finite_mask ^ bDataObject.masks.non_finite_mask) |
+                            (aDataObject.masks.missing_mask    ^ bDataObject.masks.missing_mask)    |
+                            outside_epsilon_mask )
+        
+        # make our diff data object
+        diff_data_object = DataObject(raw_diff, fillValue=fill_data_value)
+        diff_data_object.masks = DiffMaskSetObject(ignore_in_both, valid_in_both,
+                                                   trouble_pt_mask, outside_epsilon_mask)
+        
+        return diff_data_object
 
 if __name__=='__main__':
     import doctest
diff --git a/pyglance/glance/plot.py b/pyglance/glance/plot.py
index 71f232edba6faabe312aa7bee103331d6c10a99b..12ed1bb89ae591b11e5ac1c38dd10a4b19be2556 100644
--- a/pyglance/glance/plot.py
+++ b/pyglance/glance/plot.py
@@ -249,12 +249,7 @@ def plot_and_save_comparison_figures (aData, bData,
     bMissingMask   = diffInfo.b_data_object.masks.missing_mask
     spaciallyInvalidMaskA = diffInfo.a_data_object.masks.ignore_mask
     spaciallyInvalidMaskB = diffInfo.b_data_object.masks.ignore_mask
-    """
-    rawDiffData, goodMask, (goodInAMask, goodInBMask), troubleMask, outsideEpsilonMask, \
-    (aNotFiniteMask, bNotFiniteMask), (aMissingMask, bMissingMask), \
-    (spaciallyInvalidMaskA, spaciallyInvalidMaskB) = delta.diff(aData, bData, epsilon, (missingValue, missingValueAltInB),
-                                                                (spaciallyInvalidMaskA, spaciallyInvalidMaskB))
-    """
+    
     absDiffData = np.abs(rawDiffData) # we also want to show the distance between our two, not just which one's bigger/smaller
     
     # from this point on, we will be forking to create child processes so we can parallelize our image and
diff --git a/pyglance/glance/stats.py b/pyglance/glance/stats.py
index 48ef9ca8c9b31f872a7a00563c67d1b1ae768a5a..f5df9f5cee2fd559a50807119cc072a0ec6400d2 100644
--- a/pyglance/glance/stats.py
+++ b/pyglance/glance/stats.py
@@ -36,13 +36,6 @@ def summarize(a, b, epsilon=0., (a_missing_value, b_missing_value)=(None,None),
     bmis   = diffInfo.b_data_object.masks.missing_mask
     ignoreInAMask = diffInfo.a_data_object.masks.ignore_mask
     ignoreInBMask = diffInfo.b_data_object.masks.ignore_mask
-    """
-    diffData, finite_mask, (finite_a_mask, finite_b_mask), \
-    trouble, outside_epsilon, (anfin, bnfin), \
-    (amis, bmis), (ignoreInAMask, ignoreInBMask) = nfo = delta.diff(a, b, epsilon,
-                                                                    (a_missing_value, b_missing_value),
-                                                                    (ignoreInAMask, ignoreInBMask))
-    """
     
     general_stats = _get_general_data_stats(a, b, a_missing_value, b_missing_value, epsilon, 
                                             ignoreInAMask, ignoreInBMask, ~finite_a_mask, ~finite_b_mask)