diff --git a/pyglance/glance/io.py b/pyglance/glance/io.py index 2156385cb3e2b5315fef57ecc5f1b3cbef19569b..b8a9118bfd2f1f724fc5cec43ec5c5a5c59b7ab6 100644 --- a/pyglance/glance/io.py +++ b/pyglance/glance/io.py @@ -8,7 +8,7 @@ Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved. """ import os, logging -import numpy as np +import numpy from functools import reduce LOG = logging.getLogger(__name__) @@ -69,10 +69,10 @@ SCALE_METHOD_STR = 'scaling_method' UNSIGNED_ATTR_STR = "_unsigned" SIGNED_TO_UNSIGNED_DTYPES = { - np.dtype(np.int8): np.dtype(np.uint8), - np.dtype(np.int16): np.dtype(np.uint16), - np.dtype(np.int32): np.dtype(np.uint32), - np.dtype(np.int64): np.dtype(np.uint64), + numpy.dtype(numpy.int8): numpy.dtype(numpy.uint8), + numpy.dtype(numpy.int16): numpy.dtype(numpy.uint16), + numpy.dtype(numpy.int32): numpy.dtype(numpy.uint32), + numpy.dtype(numpy.int64): numpy.dtype(numpy.uint64), } class IOUnimplimentedError(Exception): @@ -193,6 +193,27 @@ class CaseInsensitiveAttributeCache (object) : # TODO, are there any bad types for these files? return True +def _get_data_uptype (input_dtype) : + """ + Given an input data type, figure out what type we need to upcast it to. + + Note: Glance expects all it's data to get upcast into floats for the purposes of it's + later math manipulations. + """ + + default_uptype = numpy.float32 + default_finfo = numpy.finfo(default_uptype) + input_info = numpy.finfo(input_dtype) if numpy.issubdtype(input_dtype, numpy.floating,) else numpy.iinfo(input_dtype) + + # if our input won't fit into the default, pick a bigger type + if ( (default_finfo.min > input_info.min) or (default_finfo.max < input_info.max) ) : + LOG.debug("Input data will not fit in default float32 data type, using larger type.") + default_uptype = numpy.float64 + + # FUTURE, if we reach a point where a float64 isn't big enough, this will need to be revisited + + return default_uptype + class hdf (object): """wrapper for HDF4 dataset for comparison __call__ yields sequence of variable names @@ -240,10 +261,10 @@ class hdf (object): temp = self.attributeCache.get_variable_attributes(name) if ADD_OFFSET_STR in temp : add_offset = temp[ADD_OFFSET_STR] - data_type = np.dtype(type(add_offset)) + data_type = numpy.dtype(type(add_offset)) if SCALE_FACTOR_STR in temp : scale_factor = temp[SCALE_FACTOR_STR] - data_type = np.dtype(type(scale_factor)) + data_type = numpy.dtype(type(scale_factor)) if SCALE_METHOD_STR in temp : scaling_method = temp[SCALE_METHOD_STR] SDS.endaccess(variable_object) @@ -271,12 +292,12 @@ class hdf (object): # get information about where the data is the missing value missing_val = self.missing_value(name) - missing_mask = np.zeros(raw_data_copy.shape, dtype=np.bool) + missing_mask = numpy.zeros(raw_data_copy.shape, dtype=numpy.bool) if missing_val is not None : missing_mask[raw_data_copy == missing_val] = True # create the scaled version of the data - scaled_data_copy = np.array(raw_data_copy, dtype=data_type) + scaled_data_copy = numpy.array(raw_data_copy, dtype=data_type) scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues? return scaled_data_copy @@ -429,23 +450,26 @@ class nc (object): type found in the original file) """ - # defaults - data_type = np.float32 # TODO temporary this avoids type truncation issues, but is not a general solution - + LOG.debug("loading variable data for: " + name) + # get the variable object and use it to # get our raw data and scaling info variable_object = self.get_variable_object(name) # get our data, save the dtype, and make sure it's a more flexible dtype for now variable_object.set_auto_maskandscale(False) # for now just do the darn calculations ourselves - scaled_data_copy = np.array(variable_object[:], dtype=data_type) + temp_input_data = variable_object[:] + LOG.debug("Native input dtype: " + str(temp_input_data.dtype)) + dtype_to_use = _get_data_uptype(temp_input_data.dtype) + LOG.debug("Choosing dtype " + str(dtype_to_use) + " for our internal representation of this data.") + scaled_data_copy = numpy.array(temp_input_data, dtype=dtype_to_use,) # get the attribute cache so we can check on loading related attributes temp = self.attributeCache.get_variable_attributes(name) # get information about where the data is the missing value missing_val = self.missing_value(name) - missing_mask = np.zeros(scaled_data_copy.shape, dtype=np.bool) + missing_mask = numpy.zeros(scaled_data_copy.shape, dtype=numpy.bool) if missing_val is not None: missing_mask[scaled_data_copy == missing_val] = True @@ -455,7 +479,7 @@ class nc (object): if UNSIGNED_ATTR_STR in temp and str(temp[UNSIGNED_ATTR_STR]).lower() == ("true"): LOG.debug("Correcting for unsigned values in variable data.") where_temp = (scaled_data_copy < 0.0) & ~missing_mask # where we have negative but not missing data - scaled_data_copy[where_temp] += (np.iinfo(np.uint16).max + 1.0) # add the 2's complement + scaled_data_copy[where_temp] += (numpy.iinfo(numpy.uint16).max + 1.0) # add the 2's complement #***** end of handling the unsigned attribute @@ -497,14 +521,14 @@ class nc (object): # get the missing value and figure out the dtype of the original data missing_val = self.missing_value(name) - orig_dtype = np.array([missing_val,]).dtype + orig_dtype = numpy.array([missing_val,]).dtype needed_dtype = SIGNED_TO_UNSIGNED_DTYPES[orig_dtype] if orig_dtype in SIGNED_TO_UNSIGNED_DTYPES else None if needed_dtype is not None : # now figure out where all the corrupted values are, and shift them up to be positive needs_fix_mask = (scaled_data_copy < add_offset) & (scaled_data_copy != missing_val) # we are adding the 2's complement, but first we're scaling it appropriately - scaled_data_copy[needs_fix_mask] += ((np.iinfo(np.uint16).max + 1.0) * scale_factor) + scaled_data_copy[needs_fix_mask] += ((numpy.iinfo(numpy.uint16).max + 1.0) * scale_factor) """ return scaled_data_copy @@ -557,15 +581,15 @@ class nc (object): return None dataType = None - if np.issubdtype(data.dtype, int) : - dataType = np.int + if numpy.issubdtype(data.dtype, int) : + dataType = numpy.int #print("Picked INT") # TODO, at the moment the fill type is forcing me to use a double, when sometimes I want a float - #elif np.issubdtype(data.dtype, np.float32) : - # dataType = np.float + #elif numpy.issubdtype(data.dtype, numpy.float32) : + # dataType = numpy.float # print("Picked FLOAT") - elif np.issubdtype(data.dtype, float) : - dataType = np.float64 + elif numpy.issubdtype(data.dtype, float) : + dataType = numpy.float64 #print("Picked DOUBLE") # what do we do if it's some other type? @@ -753,13 +777,15 @@ class h5(object): # defaults scale_factor = 1.0 add_offset = 0.0 - data_type = np.float32 # TODO temporary # get the variable object and use it to # get our raw data and scaling info variable_object = self.get_variable_object(name) raw_data_copy = variable_object[:] - + + # pick a data type to use internally + data_type = _get_data_uptype(raw_data_copy.dtype) + #print ('*************************') #print (dir (variable_object.id)) # TODO, is there a way to get the scale and offset through this? #print ('*************************') @@ -778,12 +804,12 @@ class h5(object): # get information about where the data is the missing value missing_val = self.missing_value(name) - missing_mask = np.zeros(raw_data_copy.shape, dtype=np.bool) + missing_mask = numpy.zeros(raw_data_copy.shape, dtype=numpy.bool) if missing_val is not None: missing_mask[raw_data_copy == missing_val] = True # create the scaled version of the data - scaled_data_copy = np.array(raw_data_copy, dtype=data_type) + scaled_data_copy = numpy.array(raw_data_copy, dtype=data_type) scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues? return scaled_data_copy @@ -800,7 +826,7 @@ class h5(object): pListObj = variableObject.id.get_create_plist() fillValueStatus = pListObj.fill_value_defined() if (h5d.FILL_VALUE_DEFAULT is fillValueStatus) or (h5d.FILL_VALUE_USER_DEFINED is fillValueStatus) : - temp = np.array((1), dtype=variableObject.dtype) + temp = numpy.array((1), dtype=variableObject.dtype) pListObj.get_fill_value(temp) toReturn = temp @@ -946,10 +972,10 @@ class aeri(object): if name in self._vectors: vid = self._vectors[name] vdata = [ fp.vectorDepValues(rec, vid) for rec in recrange ] - return np.array(vdata) + return numpy.array(vdata) elif name in self._scalars: vdata = fp.metaValueMatrix(recrange, [self._scalars[name]]) - return np.array(vdata) + return numpy.array(vdata) else: raise LookupError('cannot find variable %s' % name) @@ -1299,8 +1325,8 @@ class jpss_adl(object): field = getattr(self._blob, name) if not hasattr(field,'_length_'): # FUTURE: is this rigorous? LOG.info('creating numpy array out of singleton value for %s' % name) - return np.array([field]) - return np.array(field) + return numpy.array([field]) + return numpy.array(field) def get_variable_object(self,name): return None