io.py 56.5 KB
Newer Older
(no author)'s avatar
(no author) committed
1
2
3
4
5
6
7
8
9
#!/usr/bin/env python
# encoding: utf-8
"""
I/O routines supporting reading a number of file formats.

Created by rayg Apr 2009.
Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
"""

10
import os, logging
11
import numpy
12
from functools import reduce
(no author)'s avatar
   
(no author) committed
13
14

LOG = logging.getLogger(__name__)
(no author)'s avatar
(no author) committed
15

16
Loadable_Types = set()
(no author)'s avatar
   
(no author) committed
17
    
18
19
try:
    import h5py
20
    from h5py import h5d
21
    Loadable_Types.add("h5")
22
except ImportError:
(no author)'s avatar
   
(no author) committed
23
24
    LOG.info('no h5py module available for reading HDF5')
    h5py = None
(no author)'s avatar
(no author) committed
25

26
27
28
# the newer netCDF library that replaced pycdf
try:
    import netCDF4
29
    Loadable_Types.update(["nc", "nc4", "cdf", "hdf", ])
30
31
32
33
except:
    LOG.info("unable to import netcdf4 library")
    netCDF4 = None

(no author)'s avatar
(no author) committed
34
35
36
try:
    import dmv as dmvlib
    LOG.info('loaded dmv module for AERI data file access')
37
    Loadable_Types.update(["cxs", "rnc", "cxv", "csv", "spc", "sum", "uvs", "aeri", ])
(no author)'s avatar
(no author) committed
38
39
40
41
except ImportError:
    LOG.info('no AERI dmv data file format module')
    dmvlib = None

42
# DEPRECATED, will be removed in future!
(no author)'s avatar
   
(no author) committed
43
44
45
try:
    import adl_blob
    LOG.info('adl_blob module found for JPSS ADL data file access')
46
47
48
    LOG.warning('DEPRECATED: you have an adl_blob module installed; '
                'loading JPSS ADL data files is DEPRECATED and will be '
                'removed in a future version of Glance')
(no author)'s avatar
   
(no author) committed
49
50
51
52
except ImportError:
    LOG.info('no adl_blob format handler available')
    adl_blob = None

53
54
55
try :
    from osgeo import gdal
    LOG.info('loading osgeo module for GeoTIFF data file access')
56
    Loadable_Types.update(["tiff", "tif", "tifa", ])
57
58
59
60
except :
    LOG.info('no osgeo available for reading GeoTIFF data files')
    gdal = None

61
UNITS_CONSTANT = "units"
(no author)'s avatar
(no author) committed
62

63
64
65
fillValConst1 = '_FillValue'
fillValConst2 = 'missing_value'

66
67
68
69
ADD_OFFSET_STR   = 'add_offset'
SCALE_FACTOR_STR = 'scale_factor'
SCALE_METHOD_STR = 'scaling_method'

70
71
72
UNSIGNED_ATTR_STR = "_unsigned"

SIGNED_TO_UNSIGNED_DTYPES = {
73
74
75
76
                                numpy.dtype(numpy.int8):    numpy.dtype(numpy.uint8),
                                numpy.dtype(numpy.int16):   numpy.dtype(numpy.uint16),
                                numpy.dtype(numpy.int32):   numpy.dtype(numpy.uint32),
                                numpy.dtype(numpy.int64):   numpy.dtype(numpy.uint64),
77
78
                            }

79
80
81
82
83
84
85
86
87
88
89
class IOUnimplimentedError(Exception):
    """
    The exception raised when a requested io operation is not yet available.
    
        msg  -- explanation of the problem
    """
    def __init__(self, msg):
        self.msg = msg
    def __str__(self):
        return self.msg

90
91
92
93
94
95
class IONonnumericalTypeError(Exception):
    """
    A type was encountered that numpy doesn't know how to deal with - e.g. netCDF variable-length string arrays
    """
    pass

96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
class CaseInsensitiveAttributeCache (object) :
    """
    A cache of attributes for a single file and all of it's variables.
    This cache is considered uncased, it will store all attributes it caches
    in lower case and will lower case any strings it is asked to search for
    in the cache.
    When variable or global attribute sets are not yet loaded and something
    from that part of the file is requested the cache will transparently load
    attributes from the file behind the scenes and build the cache for that
    part of the file.
    """
    
    def __init__(self, fileObject) :
        """
        set up the empty cache and hang on to the file object we'll be caching
        """
        
        self.fileToCache             = fileObject
        self.globalAttributesLower   = None
        self.variableAttributesLower = { }
    
    def _load_global_attributes_if_needed (self) :
        """
        load up the global attributes if they need to be cached
        """
        
        # load the attributes from the file if they aren't cached
        if self.globalAttributesLower is None :
            LOG.debug ("Loading file global attributes into case-insensitive cache.")
            tempAttrs                  = self.fileToCache.get_global_attributes(caseInsensitive=False)
            self.globalAttributesLower = dict((k.lower(), v) for k, v in tempAttrs.items())
    
    def _load_variable_attributes_if_needed (self, variableName) :
        """
        load up the variable attributes if they need to be cached
        """
        
        # make a lower cased version of the variable name
        tempVariableName = variableName.lower()
        
        # load the variable's attributes from the file if they aren't cached
Eva Schiffer's avatar
Eva Schiffer committed
137
        if tempVariableName not in self.variableAttributesLower :
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
            LOG.debug ("Loading attributes for variable \"" + variableName + "\" into case-insensitive cache.")
            tempAttrs = self.fileToCache.get_variable_attributes(variableName, caseInsensitive=False)
            # now if there are any attributes, make a case insensitive version
            self.variableAttributesLower[tempVariableName] = dict((k.lower(), v) for k, v in tempAttrs.items())
    
    def get_variable_attribute (self, variableName, attributeName) :
        """
        get the specified attribute for the specified variable,
        if this variable's attributes have not yet been loaded
        they will be loaded and cached
        """
        
        self._load_variable_attributes_if_needed(variableName)
        
        toReturn = None
        tempVariableName  =  variableName.lower()
        tempAttributeName = attributeName.lower()
        if (tempVariableName in self.variableAttributesLower) and (tempAttributeName in self.variableAttributesLower[tempVariableName]) :
            toReturn = self.variableAttributesLower[tempVariableName][tempAttributeName]
        else:
            LOG.debug ("Attribute \"" + attributeName + "\" was not present for variable \"" + variableName + "\".")
        
        return toReturn
    
    def get_variable_attributes (self, variableName) :
        """
        get the variable attributes for the variable name given
        """
        
        self._load_variable_attributes_if_needed(variableName)
        
        toReturn = self.variableAttributesLower[variableName.lower()] if (variableName.lower() in self.variableAttributesLower) else None
        
        return toReturn
    
    def get_global_attribute (self, attributeName) :
        """
        get a global attribute with the given name
        """
        
        self._load_global_attributes_if_needed()
        
        toReturn = self.globalAttributesLower[attributeName.lower()] if (attributeName.lower() in self.globalAttributesLower) else None
        
        return toReturn
    
    def get_global_attributes (self) :
        """
        get the global attributes,
        """
        
        self._load_global_attributes_if_needed()
        
        toReturn = self.globalAttributesLower
        
        return toReturn
194
195
196
197
198
199
200
201
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
        
        # TODO, are there any bad types for these files?
        return True
202

203
204
205
206
207
208
209
210
211
212
213
214
215
def _get_data_uptype (input_dtype) :
    """
    Given an input data type, figure out what type we need to upcast it to.

    Note: Glance expects all it's data to get upcast into floats for the purposes of it's
    later math manipulations.
    """

    default_uptype = numpy.float32
    default_finfo  = numpy.finfo(default_uptype)
    input_info     = numpy.finfo(input_dtype) if  numpy.issubdtype(input_dtype, numpy.floating,) else numpy.iinfo(input_dtype)

    # if our input won't fit into the default, pick a bigger type
216
    if (default_finfo.min > input_info.min) or (default_finfo.max < input_info.max) :
217
218
219
220
221
222
223
        LOG.debug("Input data will not fit in default float32 data type, using larger type.")
        default_uptype = numpy.float64

    # FUTURE, if we reach a point where a float64 isn't big enough, this will need to be revisited

    return default_uptype

224
class nc (object):
225
    """wrapper for netcdf4-python data access for comparison
(no author)'s avatar
(no author) committed
226
227
228
229
    __call__ yields sequence of variable names
    __getitem__ returns individual variables ready for slicing to numpy arrays
    """
    
230
    _nc = None
231
    _var_map = None
232
    _path = None
233
234
235

    # walk down through all groups and get variable names and objects
    def _walkgroups(self, start_at, prefix=None, ):
236
        # look through the variables that are here
Eva Schiffer's avatar
Eva Schiffer committed
237
        for var_name in start_at.variables:
238
239
            temp_name = var_name if prefix is None or len(prefix) <= 0 else prefix + "/" + var_name
            yield temp_name, start_at[var_name]
240
        # look through the groups that are here
Eva Schiffer's avatar
Eva Schiffer committed
241
        for group_name in start_at.groups:
242
243
244
            grp_str = group_name if prefix is None or len(prefix) <= 0 else prefix + "/" + group_name
            for more_var_name, more_var_obj in self._walkgroups(start_at.groups[group_name], prefix=grp_str):
                yield more_var_name, more_var_obj
245
246
247
248
249
250
251
252
253
254
255
256
257

    # walk down through all groups and get all the dimensions info
    def _walkgroups_for_dims (self, start_at, prefix=None, ):
        # look through the dims that are here
        for dim_name in start_at.dimensions:
            temp_name = dim_name if prefix is None or len(prefix) <= 0 else prefix + "/" + dim_name
            yield temp_name, start_at.dimensions[dim_name]
        # look through the groups that are here
        for group_name in start_at.groups:
            grp_str = group_name if prefix is None or len(prefix) <= 0 else prefix + "/" + group_name
            for more_dims_name, more_dims_obj in self._walkgroups(start_at.groups[group_name], prefix=grp_str):
                yield more_dims_name, more_dims_obj

258
259
    def __init__(self, filename, allowWrite=False):
        
260
261
262
        if netCDF4 is None:
            LOG.error('netCDF4 is not installed and is needed in order to read NetCDF files')
            assert(netCDF4 is not None)
(no author)'s avatar
   
(no author) committed
263
        
264
        mode = 'r'
265
        if allowWrite :
266
267
            mode = 'a' # a is for append, if I use w it creates a whole new file, deleting the old one

268
        self._path = filename
269
        self._nc = netCDF4.Dataset(filename, mode)
270
        self.attributeCache = CaseInsensitiveAttributeCache(self)
271
272
273
        self._var_map = { }
        for var_name, var_obj in self._walkgroups(self._nc,) :
            self._var_map[var_name] = var_obj
274

275
276
277
278
        self._dims_map = {}
        for dim_name, dim_obj in self._walkgroups_for_dims(self._nc, ):
            self._dims_map[dim_name] = dim_obj

(no author)'s avatar
(no author) committed
279
    def __call__(self):
280
281
282
283
        """
        yield names of variables in this file
        """

Eva Schiffer's avatar
Eva Schiffer committed
284
        return list(self._var_map)
285

(no author)'s avatar
(no author) committed
286
    def __getitem__(self, name):
287
288
289
290
291
292
293
        """
        this returns a numpy array with a copy of the full, scaled
        data for this variable, if the data type must be changed to allow
        for scaling it will be (so the return type may not reflect the
        type found in the original file)
        """

294
295
        LOG.debug("loading variable data for: " + name)

296
297
298
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
299

300
301
        # get our data, save the dtype, and make sure it's a more flexible dtype for now
        variable_object.set_auto_maskandscale(False)  # for now just do the darn calculations ourselves
302
303
        temp_input_data = variable_object[:]
        LOG.debug("Native input dtype: " + str(temp_input_data.dtype))
304
305
        # if this is object data, stop because we can't run our regular analysis on that kind
        if temp_input_data.dtype == object :
306
307
            LOG.warning("Variable '" + name + "' has a data type of 'object'. This type of data cannot be analyzed by Glance. "
                        "This variable will not be analyzed.")
308
309
310
311
312
313
314
315
            raise IONonnumericalTypeError("Variable '" + name + "' is of data type 'object'. "
                                          "This program can't analyze non-numerical data.")
        """
            Note to self, if we ever do want to access data in a numpy array with dtype=object, for some
            reason this library is packing that into a a zero dimensional tuple or something similar.
            I was able to unpack the data using a construction like: temp_input_data = temp_input_data[()]
            After that the array can be indexed into as normal for a numpy array.
        """
316
317
318
        dtype_to_use = _get_data_uptype(temp_input_data.dtype)
        LOG.debug("Choosing dtype " + str(dtype_to_use) + " for our internal representation of this data.")
        scaled_data_copy = numpy.array(temp_input_data, dtype=dtype_to_use,)
319
320

        # get the attribute cache so we can check on loading related attributes
321
        temp = self.attributeCache.get_variable_attributes(name)
322
323
324

        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
325
        missing_mask = numpy.zeros(scaled_data_copy.shape, dtype=numpy.bool)
326
327
        if missing_val is not None:
            missing_mask[scaled_data_copy == missing_val] = True
328
329
330
331

        #***** just do the darn unsigned handling ourselves, ugh

        # if our data is labeled as being unsigned by the appropriately set attribute
332
        if UNSIGNED_ATTR_STR in temp and str(temp[UNSIGNED_ATTR_STR]).lower() == "true":
333
334
            LOG.debug("Correcting for unsigned values in variable data.")
            where_temp = (scaled_data_copy < 0.0) & ~missing_mask # where we have negative but not missing data
335
            scaled_data_copy[where_temp] += (numpy.iinfo(numpy.uint16).max + 1.0) # add the 2's complement
336
337
338
339
340
341
342

        #***** end of handling the unsigned attribute

        ###### the start of the scaling code
        # Note, I had to turn this back on because the netcdf4 library is behaving erratically when unsigned is set

        # get the scale factor and add offset from the attributes
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
        scale_factor = 1.0 if SCALE_FACTOR_STR not in temp else temp[SCALE_FACTOR_STR]
        add_offset = 0.0 if ADD_OFFSET_STR not in temp else temp[ADD_OFFSET_STR]
        scaling_method = None if SCALE_METHOD_STR not in temp else temp[SCALE_METHOD_STR]

        # at the moment geocat has several scaling methods that don't match the normal standards for hdf
        # we don't ever expect to see this for netcdf files, but we are using the netcdf library for hdf 4 now
        """
        please see constant.f90 for a more up to date version of this information:
            INTEGER(kind=int1) :: NO_SCALE              ! 0
            INTEGER(kind=int1) :: LINEAR_SCALE          ! 1
            INTEGER(kind=int1) :: LOG_SCALE             ! 2
            INTEGER(kind=int1) :: SQRT_SCALE            ! 3
        """
        if scaling_method == 0 :
            if scale_factor != 1.0 or add_offset != 0.0 :
                LOG.warning(SCALE_METHOD_STR + " attribute indicates no scaling, but " + SCALE_FACTOR_STR +
                            " and " + ADD_OFFSET_STR +
                            " attributes will result in scaling. Defaulting to ignoring " +
                            SCALE_METHOD_STR + " attribute.")
        if (scaling_method is not None) and (int(scaling_method) > 1) :
            LOG.warning('Scaling method of \"' + str(
                scaling_method) + '\" will be ignored in favor of netCDF standard linear scaling. '
                        + 'This may cause problems with data consistency')
366

367
368
        # don't do work if we don't need to unpack things
        if (scale_factor != 1.0) or (add_offset != 0.0) :
369

370
            LOG.debug("Manually applying scale (" + str(scale_factor) + ") and add offset (" + str(add_offset) + ").")
371

372
373
374
375
376
377
378
            # unpack the data
            scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset

        ###### end of the scaling code

        """
        #TODO, this section was for when we had to do the unsigned correction after unpacking
Eva Schiffer's avatar
Eva Schiffer committed
379
        if UNSIGNED_ATTR_STR in temp and str(temp[UNSIGNED_ATTR_STR]).lower() == ( "true" ) :
380
381
382
383
384
385
386

            LOG.debug("fixing unsigned values in variable " + name)

            # load the scale factor and add offset
            scale_factor = 1.0
            add_offset = 0.0
            temp = self.attributeCache.get_variable_attributes(name)
Eva Schiffer's avatar
Eva Schiffer committed
387
            if SCALE_FACTOR_STR in temp :
388
                scale_factor = temp[SCALE_FACTOR_STR]
Eva Schiffer's avatar
Eva Schiffer committed
389
            if ADD_OFFSET_STR in temp :
390
391
392
393
                add_offset = temp[ADD_OFFSET_STR]

            # get the missing value and figure out the dtype of the original data
            missing_val  = self.missing_value(name)
394
            orig_dtype   = numpy.array([missing_val,]).dtype
Eva Schiffer's avatar
Eva Schiffer committed
395
            needed_dtype = SIGNED_TO_UNSIGNED_DTYPES[orig_dtype] if orig_dtype in SIGNED_TO_UNSIGNED_DTYPES else None
396
397
398

            if needed_dtype is not None :
                # now figure out where all the corrupted values are, and shift them up to be positive
399
                needs_fix_mask = (scaled_data_copy < add_offset) & (scaled_data_copy != missing_val)
400
                # we are adding the 2's complement, but first we're scaling it appropriately
401
                scaled_data_copy[needs_fix_mask] += ((numpy.iinfo(numpy.uint16).max + 1.0) * scale_factor)
402
        """
403

404
        return scaled_data_copy
405
    
406
407
408
409
    # TODO, this hasn't been supported in other file types
    def close (self) :
        self._nc.close()
        self._nc = None
410
        self._var_map = None
411

412
    def get_variable_object(self, name):
413

414
        return self._var_map[name]
415
    
(no author)'s avatar
(no author) committed
416
    def missing_value(self, name):
417
        
418
419
420
421
422
423
424
425
426
427
428
        toReturn = None
        
        temp = self.attributeCache.get_variable_attribute(name, fillValConst1)
        if temp is not None :
            toReturn = temp
        else :
            temp = self.attributeCache.get_variable_attribute(name, fillValConst2)
            if temp is not None :
                toReturn = temp
        
        return toReturn
429

430
431
432
433
434
435
436
437
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
438
439

        # TODO, this will not work with groups
440
        #self._nc.nc_redef() # TODO?
441
442
        
        # if the variable already exists, stop with a warning
Eva Schiffer's avatar
Eva Schiffer committed
443
        if variablename in self._nc.variables :
444
445
            LOG.warning("New variable name requested (" + variablename + ") is already present in file. " +
                        "Skipping generation of new variable.")
446
            return None
447
448
        # if we have no data we won't be able to determine the data type to create the variable
        if (data is None) or (len(data) <= 0) :
449
450
            LOG.warning("Data type for new variable (" + variablename + ") could not be determined. " +
                        "Skipping generation of new variable.")
451
            return None
Eva Schiffer's avatar
Eva Schiffer committed
452

453
        # TODO, the type managment here is going to cause problems with larger floats, review this
454
        #dataType = None
455
456
        if numpy.issubdtype(data.dtype, int) :
            dataType = numpy.int
457
458
            #print("Picked INT")
        # TODO, at the moment the fill type is forcing me to use a double, when sometimes I want a float
459
460
        #elif numpy.issubdtype(data.dtype, numpy.float32) :
        #    dataType = numpy.float
461
        #    print("Picked FLOAT")
462
463
        elif numpy.issubdtype(data.dtype, float) :
            dataType = numpy.float64
464
465
            #print("Picked DOUBLE")
        # what do we do if it's some other type?
466
467
        else :
            dataType = data.dtype
468
469
470
471
472
        
        # create and set all the dimensions
        dimensions = [ ]
        dimensionNum = 0
        for dimSize in data.shape :
473
474
475
            tempName = variablename + '-index' + str(dimensionNum)
            self._nc.createDimension(tempName, dimSize)
            dimensions.append(tempName)
476
477
478
            dimensionNum = dimensionNum + 1
        
        # create the new variable
479
480
481
        #print('variable name: ' + variablename)
        #print('data type:     ' + str(dataType))
        #print('dimensions:    ' + str(dimensions))
482
        # if a missing value was given, use that
483
484
485
486
        if missingvalue is None :
            newVariable = self._nc.createVariable(variablename, dataType, tuple(dimensions))
        else :
            newVariable = self._nc.createVariable(variablename, dataType, tuple(dimensions), fill_value=missingvalue, )
487
488
489
        
        # if we have a variable to copy attributes from, do so
        if variabletocopyattributesfrom is not None :
490
491
            attributes = self.get_variable_attributes(variabletocopyattributesfrom, caseInsensitive=False)

Eva Schiffer's avatar
Eva Schiffer committed
492
            for attribute in attributes :
493
494
                if attribute.lower() != "_fillvalue" :
                    setattr(newVariable, attribute, attributes[attribute])
495

496
        #self._nc.nc_enddef() # TODO?
497

498
499
        # if data was given, use that
        if data is not None :
500
501

            newVariable[:] = data
502

503
        return newVariable
504

505
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue, variableObject=None,) :
506
507
508
509
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
510
511
        # TODO, this will not work with groups

512
513
        if variableObject is None :
            variableObject = self.get_variable_object(variableName)
514
        
515
        #self._nc.nc_redef() # TODO?
516
517
518

        setattr(variableObject, newAttributeName, newAttributeValue)

519
        #self._nc.nc_enddef() # TODO?
520

521
522
523
        # TODO, this will cause our attribute cache to be wrong!
        # TODO, for now, brute force clear the cache
        self.attributeCache = CaseInsensitiveAttributeCache(self)
524
525
        
        return
526
    
527
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
528
529
530
531
        """
        returns all the attributes associated with a variable name
        """
        
532
        toReturn = { }
533
534
535
536
        
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
537
538
            tempVarObj   = self.get_variable_object(variableName)
            tempAttrKeys = tempVarObj.ncattrs()
539

540
541
            for attrKey in tempAttrKeys :
                toReturn[attrKey] = getattr(tempVarObj, attrKey)
542
543
        
        return toReturn
544
    
545
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
546
547
548
549
550
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
551
552
553
554
555
556
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attributes = self.get_variable_attributes(variableName, caseInsensitive=False)
            
            if attributeName in temp_attributes :
557
                toReturn = getattr(self.get_variable_object, attributeName)
558
559
560
561
562
563
564
565
        
        return toReturn
    
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
566
        #toReturn = None
567
        
568
        if caseInsensitive :
569
            toReturn = self.attributeCache.get_global_attributes()
570
        else :
571
572
573
574
            toReturn = { }
            tempAttrKeys = self._nc.ncattrs()
            for attrKey in tempAttrKeys :
                toReturn[attrKey] = getattr(self._nc, attrKey)
575

576
        return toReturn
577
    
578
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
579
580
581
582
583
584
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
585
586
587
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
588
            if attributeName in self._nc.ncattrs() :
589
                toReturn = getattr(self._nc, attributeName)
590
591
        
        return toReturn
592
593
594
595
596
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
597
598

        return True
599
600
601
602
603
604
605
606
607
608
609
610

    def display_string (self, show_attrs=False, ) :
        """
        Create and return a display string that describes informational details but not
        the actual data that's inside the file.

        If show_attrs is true, then global and variable attributes and their values will
        be included in the returned display string.

        returns a string, describing the file in a user readable format.
        """

611
        # identify the file by it's path
612
613
        to_return = "File path: " + self._path + "\n"

614
615
616
617
618
619
620
        # add dimensions info
        to_return += "\tdimensions:\n"
        for dimName in self._dims_map :
            to_return += "\t\t" + dimName + " = " + str(self._dims_map[dimName].size) + "\n"

        # add detailed variables info
        to_return += "\tvariables:\n"
621
        temp_vars = self()
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
        for var_name in temp_vars:
            v_object = self.get_variable_object(var_name)
            to_return += "\t\t" + str(v_object.datatype) + " " + var_name  #+ " " + str(v_object.dimensions) + "\n"
            temp_v_dims = v_object.dimensions
            if len(temp_v_dims) < 1 :
                to_return += " (single scalar value) = " + str(self[var_name]) + "\n"
            else :
                temp_shape = v_object.shape
                to_return += " ("
                for dim_name in temp_v_dims :
                    to_return += dim_name + "=" + str(temp_shape[0]) + ", "
                    temp_shape = temp_shape [1:]
                to_return = to_return[:-1] + ")\n"
            if show_attrs :
                temp_attrs = self.get_variable_attributes(var_name, caseInsensitive=False,)
                for attr_name in temp_attrs :
                    to_return += "\t\t\t" + attr_name + " = " + str(temp_attrs[attr_name]) + "\n"

        # if appropriate, add global attributes info
        if show_attrs :
            to_return += "\tglobal attributes: \n"
            temp_g_attrs = self.get_global_attributes(caseInsensitive=False,)
            for g_attr_name in temp_g_attrs :
                to_return += "\t\t" + g_attr_name + " = " + str(temp_g_attrs[g_attr_name]) + "\n"
646
647
648

        return to_return

649
# some other aliases for different valid netcdf file extentions
(no author)'s avatar
(no author) committed
650
651
nc4 = nc
cdf = nc
652
hdf = nc # we are now using the netcdf library to load hdf4 files
(no author)'s avatar
(no author) committed
653

654
655
# TODO remove
#FIXME_IDPS = [ '/All_Data/CrIS-SDR_All/ES' + ri + band for ri in ['Real','Imaginary'] for band in ['LW','MW','SW'] ] 
656

(no author)'s avatar
(no author) committed
657
class h5(object):
658
659
660
    """wrapper for HDF5 datasets
    """
    _h5 = None
661
    _path = None
662
    _var_map = { }
663
    
664
    def __init__(self, filename, allowWrite=False):
665
        self.attributeCache = CaseInsensitiveAttributeCache(self)
666
667
668
669

        self._path = filename

        mode = 'r' if not allowWrite else 'r+'
(no author)'s avatar
   
(no author) committed
670
671
672
        if h5py is None:
            LOG.error('h5py module is not installed and is needed in order to read h5 files')
            assert(h5py is not None)
673
        self._h5 = h5py.File(filename, mode)
674
675

        self._var_map = h5.get_variables_in_h5(self._h5)
676
677
    
    def __call__(self):
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
        """
        Get the list of all the variable names in the file
        """
        
        #variableList = [ ]
        #def testFn (name, obj) :
        #    #print ('checking name: ' + name)
        #    #print ('object: ' + str(obj))
        #
        #    if isinstance(obj, h5py.Dataset) :
        #        try :
        #            tempType = obj.dtype # this is required to provoke a type error for closed data sets
        #
        #            #LOG.debug ('type: ' + str(tempType))
        #            variableList.append(name)
        #        except TypeError :
        #            LOG.debug('TypeError prevents the use of variable ' + name
        #                      + '. This variable will be ignored')
        #
        #self._h5.visititems(testFn)
        #
        #LOG.debug('variables from visiting h5 file structure: ' + str(variableList))
        #
        #return variableList

        return self._var_map.keys()

    @staticmethod
    def get_variables_in_h5(file_obj, ):
        variableMap = {}

        def testFn(name, obj):
            # print ('checking name: ' + name)
            # print ('object: ' + str(obj))

            if isinstance(obj, h5py.Dataset):
                try:
                    tempType = obj.dtype  # this is required to provoke a type error for closed data sets

                    # LOG.debug ('type: ' + str(tempType))
                    variableMap[name] = obj
                except TypeError:
720
721
                    LOG.debug('TypeError prevents the use of variable ' + name
                              + '. This variable will be ignored')
722
723
724
725
726
727
728
729
730

        file_obj.visititems(testFn)

        return variableMap

    # trav gets a variable object from inside a h5 file object; pth is the path and var name
    #@staticmethod
    #def trav(h5_file_obj, pth):
    #    return reduce(lambda x,a: x[a] if a else x, pth.split('/'), h5_file_obj)
731
        
732
733
734
735
736
    # this returns a numpy array with a copy of the full, scaled
    # data for this variable, if the data type must be changed to allow
    # for scaling it will be (so the return type may not reflect the
    # type found in the original file)
    def __getitem__(self, name):
737
        
738
739
740
741
742
743
744
745
        # defaults
        scale_factor = 1.0
        add_offset = 0.0
        
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
        raw_data_copy = variable_object[:]
746
747
748
749

        # pick a data type to use internally
        data_type = _get_data_uptype(raw_data_copy.dtype)

750
751
752
753
        #print ('*************************')
        #print (dir (variable_object.id)) # TODO, is there a way to get the scale and offset through this?
        #print ('*************************')
        
754
        # load the scale factor and add offset
755
        temp = self.attributeCache.get_variable_attributes(name)
Eva Schiffer's avatar
Eva Schiffer committed
756
        if SCALE_FACTOR_STR in temp :
757
            scale_factor = temp[SCALE_FACTOR_STR]
Eva Schiffer's avatar
Eva Schiffer committed
758
        if ADD_OFFSET_STR in temp :
759
            add_offset = temp[ADD_OFFSET_STR]
760
761
762
763
764
765
        # todo, does cdf have an equivalent of endaccess to close the variable?
        
        # don't do lots of work if we don't need to scale things
        if (scale_factor == 1.0) and (add_offset == 0.0) :
            return raw_data_copy
        
766
767
        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
768
        missing_mask = numpy.zeros(raw_data_copy.shape, dtype=numpy.bool)
769
770
        if missing_val is not None:
            missing_mask[raw_data_copy == missing_val] = True
771
        
772
        # create the scaled version of the data
773
        scaled_data_copy = numpy.array(raw_data_copy, dtype=data_type)
774
        scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues?
775
776
777
778
        
        return scaled_data_copy
    
    def get_variable_object(self,name):
779
780
        #return h5.trav(self._h5, name)
        return self._var_map[name]
781
782
    
    def missing_value(self, name):
783
784
785
786
787
788
789
        
        toReturn = None
        
        # get the missing value if it has been set
        variableObject = self.get_variable_object(name)
        pListObj = variableObject.id.get_create_plist()
        fillValueStatus = pListObj.fill_value_defined()
Eva Schiffer's avatar
Eva Schiffer committed
790
        if (h5d.FILL_VALUE_DEFAULT == fillValueStatus) or (h5d.FILL_VALUE_USER_DEFINED == fillValueStatus) :
791
            temp = numpy.array((1), dtype=variableObject.dtype)
792
793
794
795
            pListObj.get_fill_value(temp)
            toReturn = temp
        
        return toReturn
796
797
798
799
800
801
802
803
804
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
805
        
806
        raise IOUnimplimentedError('Unable to create variable in hdf 5 file, this functionality is not yet available.')
807
        
808
        #return None
809
810
811
812
813
814
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
815
816
        
        raise IOUnimplimentedError('Unable to add attribute to hdf 5 file, this functionality is not yet available.')
817
        
818
        #return
819
    
820
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
821
822
823
824
        """
        returns all the attributes associated with a variable name
        """
        
825
        #toReturn = None
826
827
828
829
830
831
832
        
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
            toReturn = self.get_variable_object(variableName).attrs
        
        return toReturn
833
    
834
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
835
836
837
838
839
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
840
841
842
843
844
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attrs = self.get_variable_attributes(variableName, caseInsensitive=False)
            
845
            if attributeName in temp_attrs :
846
847
848
849
850
851
852
853
854
                toReturn = temp_attrs[attributeName]
        
        return toReturn
    
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
855
        #toReturn = None
856
        
857
        if caseInsensitive :
858
            toReturn = self.attributeCache.get_global_attributes()
859
860
        else :
            toReturn = self._h5.attrs
861
862
        
        return toReturn
(no author)'s avatar
(no author) committed
863
    
864
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
(no author)'s avatar
(no author) committed
865
866
867
868
869
870
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
871
872
873
874
875
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
            if attributeName in self._h5.attrs :
                toReturn = self._h5.attrs[attributeName]
(no author)'s avatar
(no author) committed
876
877
        
        return toReturn
878
879
880
881
882
883
884
885
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
        
        # TODO, are there any bad types for these files?
        return True
(no author)'s avatar
(no author) committed
886

887
888
889
890
891
892
893
894
895
896
897
    def display_string (self, show_attrs=False, ) :
        """
        Create and return a display string that describes informational details but not
        the actual data that's inside the file.

        If show_attrs is true, then global and variable attributes and their values will
        be included in the returned display string.

        returns a string, describing the file in a user readable format.
        """

898
        # identify the file by it's path
899
900
        to_return = "File path: " + self._path + "\n"

901
902
        # add detailed variables info
        to_return += "\tvariables:\n"
903
904
        temp_vars = self()
        for var_name in temp_vars :
905
906
907
908
909
910
911
912
913
914
915
916
917
918
            v_object = self.get_variable_object(var_name)
            to_return += "\t\t" + str(v_object.dtype) + " " + var_name + " " + str(v_object.shape) + "\n"
            # todo, handle single value variables
            if show_attrs :
                temp_attrs = self.get_variable_attributes(var_name, caseInsensitive=False,)
                for attr_name in temp_attrs :
                    to_return += "\t\t\t " + attr_name + " = " + str(temp_attrs[attr_name]) + "\n"

        # if appropriate, add global attributes info
        if show_attrs:
            to_return += "\tglobal attributes: \n"
            temp_g_attrs = self.get_global_attributes(caseInsensitive=False, )
            for g_attr_name in temp_g_attrs:
                to_return += "\t\t" + g_attr_name + " = " + str(temp_g_attrs[g_attr_name]) + "\n"
919
920
921

        return to_return

(no author)'s avatar
(no author) committed
922
923
924
925
926
927
class aeri(object):
    """wrapper for AERI RNC/SUM/CXS/etc datasets
    """
    _dmv = None
    _vectors = { }
    _scalars = { }
928
    _path = None
(no author)'s avatar
(no author) committed
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
    
    @staticmethod
    def _meta_mapping(fp):
        ids = fp.metaIDs()
        names = [fp.queryMetaDescString(1, id_, fp.SHORTNAME) for id_ in ids]
        assert len(ids) == len(names)
        return (dict((n, i) for n, i in zip(names, ids)))
    
    def _inventory(self):
        fp = self._dmv
        assert(fp is not None)
        # get list of vectors and scalars
        self._vectors = dict( (fp.queryVectorDescString(n,fp.SHORTNAME), n) for n in fp.vectorIDs() )
        self._scalars = self._meta_mapping(fp)

    def __init__(self, filename, allowWrite=False):
        assert(allowWrite==False)
        if dmvlib is None:
            LOG.error('cannot open AERI files without dmv module being available')
948
            assert (dmvlib is not None)
949
        self._path = filename
(no author)'s avatar
(no author) committed
950
951
952
953
954
955
956
957
958
        self._dmv = dmvlib.dmv()
        rc = self._dmv.openFile(filename)
        if rc!=0:
            LOG.error("unable to open file, rc=%d" % rc)
            self._dmv = None        
        else:
            self._inventory()
    
    def __call__(self):
Eva Schiffer's avatar
Eva Schiffer committed
959
        return list(self._vectors) + list(self._scalars)
(no author)'s avatar
(no author) committed
960
961
962
963
964
965
966
967
968
        
    def __getitem__(self, name):
        fp = self._dmv
        assert(fp is not None)
        if 'DMV_RECORDS' in os.environ:
            nrecs = int(os.environ['DMV_RECORDS'])
            LOG.warning('overriding dmv record count to %d' % nrecs)
        else:
            nrecs = self._dmv.recordCount()
969
        recrange = list(range(1, nrecs+1))
(no author)'s avatar
(no author) committed
970
971
972
        if name in self._vectors:
            vid = self._vectors[name]
            vdata = [ fp.vectorDepValues(rec, vid) for rec in recrange ]
973
            return numpy.array(vdata)
(no author)'s avatar
(no author) committed
974
975
        elif name in self._scalars:
            vdata = fp.metaValueMatrix(recrange, [self._scalars[name]])
976
            return numpy.array(vdata)
(no author)'s avatar
(no author) committed
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
        else:
            raise LookupError('cannot find variable %s' % name)
       
    def get_variable_object(self,name):
        return None
    
    def missing_value(self, name):
        return float('nan')
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
993
994
995
996
        """
        
        raise IOUnimplimentedError('Unable to create variable in aeri file, this functionality is not yet available.')
        
997
        #return None
(no author)'s avatar
(no author) committed
998
999
1000
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """