Commit 13810a45 authored by Eva Schiffer's avatar Eva Schiffer
Browse files

moving to using the netcdf library to load hdf4 files

parent a32f4a28
......@@ -14,16 +14,6 @@ from functools import reduce
LOG = logging.getLogger(__name__)
Loadable_Types = set()
try:
import pyhdf
from pyhdf.SD import SD,SDC, SDS, HDF4Error
Loadable_Types.add("hdf")
except:
LOG.info('no pyhdf module available for HDF4')
pyhdf = None
SD = SDC = SDS = object
HDF4Error = EnvironmentError
try:
import h5py
......@@ -36,7 +26,7 @@ except ImportError:
# the newer netCDF library that replaced pycdf
try:
import netCDF4
Loadable_Types.update(["nc", "nc4", "cdf", ])
Loadable_Types.update(["nc", "nc4", "cdf", "hdf", ])
except:
LOG.info("unable to import netcdf4 library")
netCDF4 = None
......@@ -231,192 +221,6 @@ def _get_data_uptype (input_dtype) :
return default_uptype
class hdf (object):
"""wrapper for HDF4 dataset for comparison
__call__ yields sequence of variable names
__getitem__ returns individual variables ready for slicing to numpy arrays
"""
_hdf = None
def __init__(self, filename, allowWrite=False):
if pyhdf is None:
LOG.error('pyhdf is not installed and is needed in order to read hdf4 files')
assert(pyhdf is not None)
mode = SDC.READ
if allowWrite:
mode = mode | SDC.WRITE
self._hdf = SD(filename, mode)
self.attributeCache = CaseInsensitiveAttributeCache(self)
def __call__(self):
"""
yield names of variables to be compared
"""
return list(self._hdf.datasets())
# this returns a numpy array with a copy of the full, scaled
# data for this variable, if the data type must be changed to allow
# for scaling it will be (so the return type may not reflect the
# type found in the original file)
def __getitem__(self, name):
# defaults
scale_factor = 1.0
add_offset = 0.0
data_type = None
scaling_method = None
# get the variable object and use it to
# get our raw data and scaling info
variable_object = self.get_variable_object(name)
raw_data_copy = variable_object[:]
try :
# TODO, this currently won't work with geocat data, work around it for now
scale_factor, scale_factor_error, add_offset, add_offset_error, data_type = SDS.getcal(variable_object)
except HDF4Error:
# load just the scale factor and add offset information by hand
temp = self.attributeCache.get_variable_attributes(name)
if ADD_OFFSET_STR in temp :
add_offset = temp[ADD_OFFSET_STR]
data_type = numpy.dtype(type(add_offset))
if SCALE_FACTOR_STR in temp :
scale_factor = temp[SCALE_FACTOR_STR]
data_type = numpy.dtype(type(scale_factor))
if SCALE_METHOD_STR in temp :
scaling_method = temp[SCALE_METHOD_STR]
SDS.endaccess(variable_object)
# don't do lots of work if we don't need to scale things
if (scale_factor == 1.0) and (add_offset == 0.0) :
return raw_data_copy
# at the moment geocat has several scaling methods that don't match the normal standards for hdf
"""
please see constant.f90 for a more up to date version of this information:
INTEGER(kind=int1) :: NO_SCALE ! 0
INTEGER(kind=int1) :: LINEAR_SCALE ! 1
INTEGER(kind=int1) :: LOG_SCALE ! 2
INTEGER(kind=int1) :: SQRT_SCALE ! 3
"""
if scaling_method == 0 :
return raw_data_copy
if not ((scaling_method is None) or (int(scaling_method) <= 1)) :
LOG.warning ('Scaling method of \"' + str(scaling_method) + '\" will be ignored in favor of hdf standard method. '
+ 'This may cause problems with data consistency')
# if we don't have a data type something strange has gone wrong
assert(not (data_type is None))
# get information about where the data is the missing value
missing_val = self.missing_value(name)
missing_mask = numpy.zeros(raw_data_copy.shape, dtype=numpy.bool)
if missing_val is not None :
missing_mask[raw_data_copy == missing_val] = True
# create the scaled version of the data
scaled_data_copy = numpy.array(raw_data_copy, dtype=data_type)
scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues?
return scaled_data_copy
def get_variable_object(self, name):
return self._hdf.select(name)
def missing_value(self, name):
return self.get_attribute(name, fillValConst1)
def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
"""
create a new variable with the given name
optionally set the missing value (fill value) and data to those given
the created variable will be returned, or None if a variable could not
be created
"""
raise IOUnimplimentedError('Unable to create variable in hdf file, this functionality is not yet available.')
#return None
def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
"""
if the attribute exists for the given variable, set it to the new value
if the attribute does not exist for the given variable, create it and set it to the new value
"""
raise IOUnimplimentedError('Unable add attribute to hdf file, this functionality is not yet available.')
#return
def get_variable_attributes (self, variableName, caseInsensitive=True) :
"""
returns all the attributes associated with a variable name
"""
#toReturn = None
if caseInsensitive :
toReturn = self.attributeCache.get_variable_attributes(variableName)
else :
toReturn = self.get_variable_object(variableName).attributes()
return toReturn
def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
"""
returns the value of the attribute if it is available for this variable, or None
"""
toReturn = None
if caseInsensitive :
toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
else :
temp_attributes = self.get_variable_attributes(variableName, caseInsensitive=False)
if attributeName in temp_attributes :
toReturn = temp_attributes[attributeName]
return toReturn
def get_global_attributes(self, caseInsensitive=True) :
"""
get a list of all the global attributes for this file or None
"""
#toReturn = None
if caseInsensitive :
toReturn = self.attributeCache.get_global_attributes()
else :
toReturn = self._hdf.attributes()
return toReturn
def get_global_attribute(self, attributeName, caseInsensitive=True) :
"""
returns the value of a global attribute if it is available or None
"""
toReturn = None
if caseInsensitive :
toReturn = self.attributeCache.get_global_attribute(attributeName)
else :
if attributeName in self._hdf.attributes() :
toReturn = self._hdf.attributes()[attributeName]
return toReturn
def is_loadable_type (self, name) :
"""
check to see if the indicated variable is a type that can be loaded
"""
# TODO, are there any bad types for these files?
return True
class nc (object):
"""wrapper for netcdf4-python data access for comparison
__call__ yields sequence of variable names
......@@ -518,12 +322,29 @@ class nc (object):
# Note, I had to turn this back on because the netcdf4 library is behaving erratically when unsigned is set
# get the scale factor and add offset from the attributes
scale_factor = 1.0
add_offset = 0.0
if SCALE_FACTOR_STR in temp :
scale_factor = temp[SCALE_FACTOR_STR]
if ADD_OFFSET_STR in temp :
add_offset = temp[ADD_OFFSET_STR]
scale_factor = 1.0 if SCALE_FACTOR_STR not in temp else temp[SCALE_FACTOR_STR]
add_offset = 0.0 if ADD_OFFSET_STR not in temp else temp[ADD_OFFSET_STR]
scaling_method = None if SCALE_METHOD_STR not in temp else temp[SCALE_METHOD_STR]
# at the moment geocat has several scaling methods that don't match the normal standards for hdf
# we don't ever expect to see this for netcdf files, but we are using the netcdf library for hdf 4 now
"""
please see constant.f90 for a more up to date version of this information:
INTEGER(kind=int1) :: NO_SCALE ! 0
INTEGER(kind=int1) :: LINEAR_SCALE ! 1
INTEGER(kind=int1) :: LOG_SCALE ! 2
INTEGER(kind=int1) :: SQRT_SCALE ! 3
"""
if scaling_method == 0 :
if scale_factor != 1.0 or add_offset != 0.0 :
LOG.warning(SCALE_METHOD_STR + " attribute indicates no scaling, but " + SCALE_FACTOR_STR +
" and " + ADD_OFFSET_STR +
" attributes will result in scaling. Defaulting to ignoring " +
SCALE_METHOD_STR + " attribute.")
if (scaling_method is not None) and (int(scaling_method) > 1) :
LOG.warning('Scaling method of \"' + str(
scaling_method) + '\" will be ignored in favor of netCDF standard linear scaling. '
+ 'This may cause problems with data consistency')
# don't do work if we don't need to unpack things
if (scale_factor != 1.0) or (add_offset != 0.0) :
......@@ -757,9 +578,10 @@ class nc (object):
"""
return True
# some other aliases for different valid netcdf file extentions
nc4 = nc
cdf = nc
hdf = nc # we are now using the netcdf library to load hdf4 files
# TODO remove
#FIXME_IDPS = [ '/All_Data/CrIS-SDR_All/ES' + ri + band for ri in ['Real','Imaginary'] for band in ['LW','MW','SW'] ]
......@@ -961,7 +783,6 @@ class h5(object):
# TODO, are there any bad types for these files?
return True
class aeri(object):
"""wrapper for AERI RNC/SUM/CXS/etc datasets
"""
......@@ -987,7 +808,7 @@ class aeri(object):
assert(allowWrite==False)
if dmvlib is None:
LOG.error('cannot open AERI files without dmv module being available')
return
assert (dmvlib is not None)
self._dmv = dmvlib.dmv()
rc = self._dmv.openFile(filename)
if rc!=0:
......@@ -1328,6 +1149,7 @@ def _search_xml(pathname):
yield pathname.replace('-', '_') + xs
yield os.path.splitext(pathname)[0].replace('-', '_') + xs
# DEPRECATED: loading this type of file is deprecated and will be removed in future
class jpss_adl(object):
"""wrapper for JPSS ADL BLOBs
This is a somewhat unique case in that the BLOB loader requires both an XML path and a BLOB path.
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment