From 78e541f58fda7ac290374bf732ddb2594bef2d82 Mon Sep 17 00:00:00 2001
From: "(no author)" <(no author)@8a9318a1-56ba-4d59-b755-99d26321be01>
Date: Tue, 20 Apr 2010 16:53:27 +0000
Subject: [PATCH] Unsigned ints are upcast to higher precision signed ints
 before computing diff, to avoid possible overflow

git-svn-id: https://svn.ssec.wisc.edu/repos/glance/trunk@107 8a9318a1-56ba-4d59-b755-99d26321be01
---
 pyglance/glance/delta.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/pyglance/glance/delta.py b/pyglance/glance/delta.py
index a96f690..bdd998f 100644
--- a/pyglance/glance/delta.py
+++ b/pyglance/glance/delta.py
@@ -16,6 +16,14 @@ compute_r = pearsonr #spearmanr
 
 LOG = logging.getLogger(__name__)
 
+# Upcasts to be used in difference computation to avoid overflow. Currently only unsigned
+# ints are upcast.
+# FUTURE: handle uint64s as well (there is no int128, so might have to detect overflow)
+datatype_upcasts = {
+    uint8: int16,
+    uint16: int32,
+    uint32: int64
+    }
 
 def _missing(x,missing_value=None):
     if missing_value is not None:
@@ -68,6 +76,11 @@ def diff(aData, bData, epsilon=0.,
     sharedType = aData.dtype
     if (aData.dtype is not bData.dtype) :
         sharedType = common_type(aData, bData)
+
+    # upcast if needed to avoid overflow in difference operation
+    if sharedType in datatype_upcasts:
+        sharedType = datatype_upcasts[sharedType]
+
     LOG.debug('Shared data type that will be used for diff comparison: ' + str(sharedType))
     
     # construct our diff'ed array
@@ -78,8 +91,10 @@ def diff(aData, bData, epsilon=0.,
     LOG.debug('current fill data value: ' + str(fill_data_value))
     
     raw_diff[~valid_in_both] = fill_data_value # throw away invalid data
-    raw_diff[valid_in_both] = bData[valid_in_both] - aData[valid_in_both]
-    
+
+    # compute difference, using shared type in computation
+    raw_diff[valid_in_both] = bData[valid_in_both].astype(sharedType) - aData[valid_in_both].astype(sharedType)
+        
     # the valid data which is too different between the two sets according to the given epsilon
     outside_epsilon_mask = (abs(raw_diff) > epsilon) & valid_in_both
     # trouble points = mismatched nans, mismatched missing-values, differences that are too large 
-- 
GitLab