From 78e541f58fda7ac290374bf732ddb2594bef2d82 Mon Sep 17 00:00:00 2001 From: "(no author)" <(no author)@8a9318a1-56ba-4d59-b755-99d26321be01> Date: Tue, 20 Apr 2010 16:53:27 +0000 Subject: [PATCH] Unsigned ints are upcast to higher precision signed ints before computing diff, to avoid possible overflow git-svn-id: https://svn.ssec.wisc.edu/repos/glance/trunk@107 8a9318a1-56ba-4d59-b755-99d26321be01 --- pyglance/glance/delta.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/pyglance/glance/delta.py b/pyglance/glance/delta.py index a96f690..bdd998f 100644 --- a/pyglance/glance/delta.py +++ b/pyglance/glance/delta.py @@ -16,6 +16,14 @@ compute_r = pearsonr #spearmanr LOG = logging.getLogger(__name__) +# Upcasts to be used in difference computation to avoid overflow. Currently only unsigned +# ints are upcast. +# FUTURE: handle uint64s as well (there is no int128, so might have to detect overflow) +datatype_upcasts = { + uint8: int16, + uint16: int32, + uint32: int64 + } def _missing(x,missing_value=None): if missing_value is not None: @@ -68,6 +76,11 @@ def diff(aData, bData, epsilon=0., sharedType = aData.dtype if (aData.dtype is not bData.dtype) : sharedType = common_type(aData, bData) + + # upcast if needed to avoid overflow in difference operation + if sharedType in datatype_upcasts: + sharedType = datatype_upcasts[sharedType] + LOG.debug('Shared data type that will be used for diff comparison: ' + str(sharedType)) # construct our diff'ed array @@ -78,8 +91,10 @@ def diff(aData, bData, epsilon=0., LOG.debug('current fill data value: ' + str(fill_data_value)) raw_diff[~valid_in_both] = fill_data_value # throw away invalid data - raw_diff[valid_in_both] = bData[valid_in_both] - aData[valid_in_both] - + + # compute difference, using shared type in computation + raw_diff[valid_in_both] = bData[valid_in_both].astype(sharedType) - aData[valid_in_both].astype(sharedType) + # the valid data which is too different between the two sets according to the given epsilon outside_epsilon_mask = (abs(raw_diff) > epsilon) & valid_in_both # trouble points = mismatched nans, mismatched missing-values, differences that are too large -- GitLab