diff --git a/pyglance/glance/compare.py b/pyglance/glance/compare.py index 86e95dbffd87cb4ce4b0b5ff9db6e515b8e86960..50235d40e730a3ac59bf5f52dbddb3ce09a20678 100644 --- a/pyglance/glance/compare.py +++ b/pyglance/glance/compare.py @@ -259,12 +259,12 @@ def _get_and_analyze_lon_lat (fileObject, latitudeVariableName, longitudeVariabl and analyze them to identify spacially invalid data (ie. data that would fall off the earth) """ # get the data from the file - longitudeData = array(fileObject[longitudeVariableName][:], dtype=float) - latitudeData = array(fileObject[latitudeVariableName][:], dtype=float) + longitudeData = array(fileObject[longitudeVariableName], dtype=float) + latitudeData = array(fileObject[latitudeVariableName], dtype=float) # build a mask of our spacially invalid data invalidLatitude = (latitudeData < -90) | (latitudeData > 90) - invalidLongitude = (longitudeData < -180) | (longitudeData > 180) + invalidLongitude = (longitudeData < -180) | (longitudeData > 360) spaciallyInvalidMask = invalidLatitude | invalidLongitude # analyze our spacially invalid data @@ -506,17 +506,17 @@ python -m glance.compare plotDiffs A.hdf B.hdf [optional output path] pats = args[3:] or ['.*'] names = _parse_varnames( cnames, pats, options.epsilon, options.missing ) for name,epsilon,missing in names: - avar = a[name] - bvar = b[name] - nvar = noiz[name] + aData = a[name] + bData = b[name] + nData = noiz[name] if missing is None: amiss = a.missing_value(name) bmiss = b.missing_value(name) else: amiss,bmiss = missing,missing - x = avar[:] - y = bvar[:] - z = nvar[:] + x = aData + y = bData + z = nData def scat(x,xn,y): from pylab import plot,show,scatter scatter(x,y) @@ -555,16 +555,16 @@ python -m glance.compare plotDiffs A.hdf B.hdf [optional output path] doc_each = (options.verbose or options.debug) and len(names)==1 doc_atend = (options.verbose or options.debug) and len(names)!=1 for name,epsilon,missing in names: - avar = a[name] - bvar = b[name] + aData = a[name] + bData = b[name] if missing is None: amiss = a.missing_value(name) bmiss = b.missing_value(name) else: amiss,bmiss = missing,missing LOG.debug('comparing %s with epsilon %s and missing %s,%s' % (name,epsilon,amiss,bmiss)) - aval = avar[:] - bval = bvar[:] + aval = aData + bval = bData print '-'*32 print name print @@ -731,6 +731,7 @@ python -m glance.compare plotDiffs A.hdf B.hdf [optional output path] explanationName = name if (varRunInfo.has_key('alternate_name_in_B')) : explanationName = explanationName + " / " + varRunInfo['alternate_name_in_B'] + print('analyzing variable: ' + explanationName) # if B has an alternate variable name, figure that out has_alt_B_variable = False @@ -740,8 +741,8 @@ python -m glance.compare plotDiffs A.hdf B.hdf [optional output path] b_variable = varRunInfo['alternate_name_in_B'] # get the data for the variable - aData = aFile[varRunInfo['variable_name']][:] - bData = bFile[b_variable][:] + aData = aFile[varRunInfo['variable_name']] + bData = bFile[b_variable] # check if this data can be displayed if ((aData.shape == bData.shape) and @@ -857,7 +858,7 @@ python -m glance.compare plotDiffs A.hdf B.hdf [optional output path] LOG.info("\topening " + file_path) file_object = io.open(file_path) LOG.info("\tgetting " + old_var_name) - variable_object_old = file_object[old_var_name] + variable_object_old = file_object.get_variable_object(old_var_name) temp, old_rank, old_shape, old_type, old_num_attributes = SDS.info(variable_object_old) old_attributes = SDS.attributes(variable_object_old) diff --git a/pyglance/glance/delta.py b/pyglance/glance/delta.py index 299809d52d8fb0bb38d9b26d8a4d985ea3d93eaf..a38ab5c2e27124c3a12dbec93420f0d1bc10a339 100644 --- a/pyglance/glance/delta.py +++ b/pyglance/glance/delta.py @@ -258,6 +258,9 @@ def summarize(a, b, epsilon=0., (a_missing_value, b_missing_value)=(None,None), """return dictionary of statistics dictionaries stats not including 'nan' in name exclude nans in either arrays """ + #print('a type: ' + str(a.dtype)) + #print('b type: ' + str(b.dtype)) + # select/build our ignore masks # if the user didn't send us any, don't ignore anything if (ignoreInAMask is None) : diff --git a/pyglance/glance/io.py b/pyglance/glance/io.py index 139d66099e54175701017e17ff919956c12559a1..35d02a10431af16d2854184b56a62d6c219e0003 100644 --- a/pyglance/glance/io.py +++ b/pyglance/glance/io.py @@ -9,13 +9,15 @@ Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved. import os, sys, logging -from pyhdf.SD import SD,SDC +from pyhdf.SD import SD,SDC, SDS, HDF4Error try: import h5py except ImportError: pass from pycdf import CDF, NC +import numpy as np + LOG = logging.getLogger(__name__) class hdf(SD): @@ -31,9 +33,45 @@ class hdf(SD): "yield names of variables to be compared" return self.datasets().keys() + # this returns a numpy array with a copy of the full, scaled + # data for this variable, if the data type must be changed to allow + # for scaling it will be (so the return type may not reflect the + # type found in the original file) def __getitem__(self, name): - return self.select(name) + # defaults + scale_factor = 1.0 + add_offset = 0.0 + data_type = np.float32 # TODO temporary + + # get the variable object and use it to + # get our raw data and scaling info + variable_object = self.get_variable_object(name) + raw_data_copy = variable_object[:] + try : + # TODO, this currently won't work with geocat data, work around it for now + scale_factor, scale_factor_error, add_offset, add_offset_error, data_type = SDS.getcal(variable_object) + except HDF4Error: + # load just the scale factor and add offset + temp_attributes = variable_object.attributes() + if ('scale_factor' in temp_attributes) : + scale_factor = temp_attributes['scale_factor'] + if ('add_offset' in temp_attributes) : + add_offset = temp_attributes['add_offset'] + SDS.endaccess(variable_object) + # don't do lots of work if we don't need to scale things + if (scale_factor == 1.0) and (add_offset == 0.0) : + return raw_data_copy + + # create the scaled version of the data + scaled_data_copy = np.array(raw_data_copy, dtype=data_type) + scaled_data_copy = (scaled_data_copy - add_offset) * scale_factor #TODO, type truncation issues? + + return scaled_data_copy + + def get_variable_object(self, name): + return self.select(name) + def missing_value(self, name): return getattr(self.select(name),'_FillValue',None) @@ -51,9 +89,41 @@ class nc(CDF): "yield names of variables to be compared" return self.variables().keys() + # this returns a numpy array with a copy of the full, scaled + # data for this variable, if the data type must be changed to allow + # for scaling it will be (so the return type may not reflect the + # type found in the original file) def __getitem__(self, name): - return self.var(name) + # defaults + scale_factor = 1.0 + add_offset = 0.0 + data_type = np.float32 # TODO temporary + + # get the variable object and use it to + # get our raw data and scaling info + variable_object = self.get_variable_object(name) + raw_data_copy = variable_object[:] + # load the scale factor and add offset + temp_attributes = variable_object.attributes() + if ('scale_factor' in temp_attributes) : + scale_factor = temp_attributes['scale_factor'] + if ('add_offset' in temp_attributes) : + add_offset = temp_attributes['add_offset'] + # todo, does cdf have an equivalent of endaccess to close the variable? + + # don't do lots of work if we don't need to scale things + if (scale_factor == 1.0) and (add_offset == 0.0) : + return raw_data_copy + # create the scaled version of the data + scaled_data_copy = np.array(raw_data_copy, dtype=data_type) + scaled_data_copy = (scaled_data_copy - add_offset) * scale_factor #TODO, type truncation issues? + + return scaled_data_copy + + def get_variable_object(self, name): + return self.var(name) + def missing_value(self, name): return getattr(self.var(name),'_FillValue',getattr(self.var(name),'missing_value',None)) nc4 = nc @@ -78,7 +148,39 @@ class h5(object): def trav(h5,pth): return reduce( lambda x,a: x[a] if a else x, pth.split('/'), h5) - def __getitem__(self,name): + # this returns a numpy array with a copy of the full, scaled + # data for this variable, if the data type must be changed to allow + # for scaling it will be (so the return type may not reflect the + # type found in the original file) + def __getitem__(self, name): + # defaults + scale_factor = 1.0 + add_offset = 0.0 + data_type = np.float32 # TODO temporary + + # get the variable object and use it to + # get our raw data and scaling info + variable_object = self.get_variable_object(name) + raw_data_copy = variable_object[:] + # load the scale factor and add offset + temp_attributes = variable_object.attributes() + if ('scale_factor' in temp_attributes) : + scale_factor = temp_attributes['scale_factor'] + if ('add_offset' in temp_attributes) : + add_offset = temp_attributes['add_offset'] + # todo, does cdf have an equivalent of endaccess to close the variable? + + # don't do lots of work if we don't need to scale things + if (scale_factor == 1.0) and (add_offset == 0.0) : + return raw_data_copy + + # create the scaled version of the data + scaled_data_copy = np.array(raw_data_copy, dtype=data_type) + scaled_data_copy = (scaled_data_copy - add_offset) * scale_factor #TODO, type truncation issues? + + return scaled_data_copy + + def get_variable_object(self,name): return h5.trav(self._h5, name) def missing_value(self, name): diff --git a/pyglance/glance/plot.py b/pyglance/glance/plot.py index 2830332196137881adf22e7e4545ba17212fc0b9..3c81aff9ffb046074cf50a763175b563426ea4e2 100644 --- a/pyglance/glance/plot.py +++ b/pyglance/glance/plot.py @@ -200,7 +200,8 @@ def _create_mapped_figure(data, latitude, longitude, boundingAxes, title, kwargs['cmap'] = colorMap # draw our data placed on a map - bMap, x, y = maps.mapshow(longitudeCleaned, latitudeCleaned, data, boundingAxes, **kwargs) + bMap, x, y = maps.mapshow(longitudeCleaned, latitudeCleaned, data, boundingAxes, #projection='sinu', + **kwargs) # and some informational stuff axes.set_title(title) @@ -399,6 +400,9 @@ def plot_and_save_figure_comparison(aData, bData, visibleAxesA = _get_visible_axes (longitudeAData, latitudeAData, spaciallyInvalidMaskA) visibleAxesB = _get_visible_axes (longitudeBData, latitudeBData, spaciallyInvalidMaskB) visibleAxesBoth = _get_visible_axes (longitudeCommonData, latitudeCommonData, spaciallyInvalidMaskBoth) + LOG.debug ("visible axes in A: " + str(visibleAxesA)) + LOG.debug ("visible axes in B: " + str(visibleAxesB)) + LOG.debug ("visible axes in Both: " + str(visibleAxesBoth)) # make the original data figures print("\tcreating image of file a")