io.py 54.7 KB
Newer Older
(no author)'s avatar
(no author) committed
1
2
3
4
5
6
7
8
9
#!/usr/bin/env python
# encoding: utf-8
"""
I/O routines supporting reading a number of file formats.

Created by rayg Apr 2009.
Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
"""

10
import os, logging
11
import numpy
12
from functools import reduce
(no author)'s avatar
   
(no author) committed
13
14

LOG = logging.getLogger(__name__)
(no author)'s avatar
(no author) committed
15

16
17
Loadable_Types = set()

(no author)'s avatar
   
(no author) committed
18
19
20
try:
    import pyhdf
    from pyhdf.SD import SD,SDC, SDS, HDF4Error
21
    Loadable_Types.add("hdf")
(no author)'s avatar
   
(no author) committed
22
23
24
25
26
27
except:
    LOG.info('no pyhdf module available for HDF4')
    pyhdf = None
    SD = SDC = SDS = object
    HDF4Error = EnvironmentError
    
28
29
try:
    import h5py
30
    from h5py import h5d
31
    Loadable_Types.add("h5")
32
except ImportError:
(no author)'s avatar
   
(no author) committed
33
34
    LOG.info('no h5py module available for reading HDF5')
    h5py = None
(no author)'s avatar
(no author) committed
35

36
37
38
# the newer netCDF library that replaced pycdf
try:
    import netCDF4
39
    Loadable_Types.update(["nc", "nc4", "cdf", ])
40
41
42
43
except:
    LOG.info("unable to import netcdf4 library")
    netCDF4 = None

(no author)'s avatar
(no author) committed
44
45
46
try:
    import dmv as dmvlib
    LOG.info('loaded dmv module for AERI data file access')
47
    Loadable_Types.update(["cxs", "rnc", "cxv", "csv", "spc", "sum", "uvs", "aeri", ])
(no author)'s avatar
(no author) committed
48
49
50
51
except ImportError:
    LOG.info('no AERI dmv data file format module')
    dmvlib = None

52
# DEPRECATED, will be removed in future!
(no author)'s avatar
   
(no author) committed
53
54
55
try:
    import adl_blob
    LOG.info('adl_blob module found for JPSS ADL data file access')
56
57
58
    LOG.warning('DEPRECATED: you have an adl_blob module installed; '
                'loading JPSS ADL data files is DEPRECATED and will be '
                'removed in a future version of Glance')
(no author)'s avatar
   
(no author) committed
59
60
61
62
except ImportError:
    LOG.info('no adl_blob format handler available')
    adl_blob = None

63
64
65
try :
    from osgeo import gdal
    LOG.info('loading osgeo module for GeoTIFF data file access')
66
    Loadable_Types.update(["tiff", "tif", "tifa", ])
67
68
69
70
except :
    LOG.info('no osgeo available for reading GeoTIFF data files')
    gdal = None

71
UNITS_CONSTANT = "units"
(no author)'s avatar
(no author) committed
72

73
74
75
fillValConst1 = '_FillValue'
fillValConst2 = 'missing_value'

76
77
78
79
ADD_OFFSET_STR   = 'add_offset'
SCALE_FACTOR_STR = 'scale_factor'
SCALE_METHOD_STR = 'scaling_method'

80
81
82
UNSIGNED_ATTR_STR = "_unsigned"

SIGNED_TO_UNSIGNED_DTYPES = {
83
84
85
86
                                numpy.dtype(numpy.int8):    numpy.dtype(numpy.uint8),
                                numpy.dtype(numpy.int16):   numpy.dtype(numpy.uint16),
                                numpy.dtype(numpy.int32):   numpy.dtype(numpy.uint32),
                                numpy.dtype(numpy.int64):   numpy.dtype(numpy.uint64),
87
88
                            }

89
90
91
92
93
94
95
96
97
98
99
class IOUnimplimentedError(Exception):
    """
    The exception raised when a requested io operation is not yet available.
    
        msg  -- explanation of the problem
    """
    def __init__(self, msg):
        self.msg = msg
    def __str__(self):
        return self.msg

100
101
102
103
104
105
class IONonnumericalTypeError(Exception):
    """
    A type was encountered that numpy doesn't know how to deal with - e.g. netCDF variable-length string arrays
    """
    pass

106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
class CaseInsensitiveAttributeCache (object) :
    """
    A cache of attributes for a single file and all of it's variables.
    This cache is considered uncased, it will store all attributes it caches
    in lower case and will lower case any strings it is asked to search for
    in the cache.
    When variable or global attribute sets are not yet loaded and something
    from that part of the file is requested the cache will transparently load
    attributes from the file behind the scenes and build the cache for that
    part of the file.
    """
    
    def __init__(self, fileObject) :
        """
        set up the empty cache and hang on to the file object we'll be caching
        """
        
        self.fileToCache             = fileObject
        self.globalAttributesLower   = None
        self.variableAttributesLower = { }
    
    def _load_global_attributes_if_needed (self) :
        """
        load up the global attributes if they need to be cached
        """
        
        # load the attributes from the file if they aren't cached
        if self.globalAttributesLower is None :
            LOG.debug ("Loading file global attributes into case-insensitive cache.")
            tempAttrs                  = self.fileToCache.get_global_attributes(caseInsensitive=False)
            self.globalAttributesLower = dict((k.lower(), v) for k, v in tempAttrs.items())
    
    def _load_variable_attributes_if_needed (self, variableName) :
        """
        load up the variable attributes if they need to be cached
        """
        
        # make a lower cased version of the variable name
        tempVariableName = variableName.lower()
        
        # load the variable's attributes from the file if they aren't cached
Eva Schiffer's avatar
Eva Schiffer committed
147
        if tempVariableName not in self.variableAttributesLower :
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
            LOG.debug ("Loading attributes for variable \"" + variableName + "\" into case-insensitive cache.")
            tempAttrs = self.fileToCache.get_variable_attributes(variableName, caseInsensitive=False)
            # now if there are any attributes, make a case insensitive version
            self.variableAttributesLower[tempVariableName] = dict((k.lower(), v) for k, v in tempAttrs.items())
    
    def get_variable_attribute (self, variableName, attributeName) :
        """
        get the specified attribute for the specified variable,
        if this variable's attributes have not yet been loaded
        they will be loaded and cached
        """
        
        self._load_variable_attributes_if_needed(variableName)
        
        toReturn = None
        tempVariableName  =  variableName.lower()
        tempAttributeName = attributeName.lower()
        if (tempVariableName in self.variableAttributesLower) and (tempAttributeName in self.variableAttributesLower[tempVariableName]) :
            toReturn = self.variableAttributesLower[tempVariableName][tempAttributeName]
        else:
            LOG.debug ("Attribute \"" + attributeName + "\" was not present for variable \"" + variableName + "\".")
        
        return toReturn
    
    def get_variable_attributes (self, variableName) :
        """
        get the variable attributes for the variable name given
        """
        
        self._load_variable_attributes_if_needed(variableName)
        
        toReturn = self.variableAttributesLower[variableName.lower()] if (variableName.lower() in self.variableAttributesLower) else None
        
        return toReturn
    
    def get_global_attribute (self, attributeName) :
        """
        get a global attribute with the given name
        """
        
        self._load_global_attributes_if_needed()
        
        toReturn = self.globalAttributesLower[attributeName.lower()] if (attributeName.lower() in self.globalAttributesLower) else None
        
        return toReturn
    
    def get_global_attributes (self) :
        """
        get the global attributes,
        """
        
        self._load_global_attributes_if_needed()
        
        toReturn = self.globalAttributesLower
        
        return toReturn
204
205
206
207
208
209
210
211
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
        
        # TODO, are there any bad types for these files?
        return True
212

213
214
215
216
217
218
219
220
221
222
223
224
225
def _get_data_uptype (input_dtype) :
    """
    Given an input data type, figure out what type we need to upcast it to.

    Note: Glance expects all it's data to get upcast into floats for the purposes of it's
    later math manipulations.
    """

    default_uptype = numpy.float32
    default_finfo  = numpy.finfo(default_uptype)
    input_info     = numpy.finfo(input_dtype) if  numpy.issubdtype(input_dtype, numpy.floating,) else numpy.iinfo(input_dtype)

    # if our input won't fit into the default, pick a bigger type
226
    if (default_finfo.min > input_info.min) or (default_finfo.max < input_info.max) :
227
228
229
230
231
232
233
        LOG.debug("Input data will not fit in default float32 data type, using larger type.")
        default_uptype = numpy.float64

    # FUTURE, if we reach a point where a float64 isn't big enough, this will need to be revisited

    return default_uptype

234
class hdf (object):
(no author)'s avatar
(no author) committed
235
236
237
238
239
    """wrapper for HDF4 dataset for comparison
    __call__ yields sequence of variable names
    __getitem__ returns individual variables ready for slicing to numpy arrays
    """
    
240
241
    _hdf = None
    
242
    def __init__(self, filename, allowWrite=False):
243
        
(no author)'s avatar
   
(no author) committed
244
245
246
        if pyhdf is None:
            LOG.error('pyhdf is not installed and is needed in order to read hdf4 files')
            assert(pyhdf is not None)
247
248
249
        mode = SDC.READ
        if allowWrite:
            mode = mode | SDC.WRITE
250
251
252
        
        self._hdf = SD(filename, mode)
        self.attributeCache = CaseInsensitiveAttributeCache(self)
(no author)'s avatar
(no author) committed
253
254

    def __call__(self):
255
256
257
        """
        yield names of variables to be compared
        """
Eva Schiffer's avatar
Eva Schiffer committed
258
        return list(self._hdf.datasets())
(no author)'s avatar
(no author) committed
259
    
260
261
262
263
    # this returns a numpy array with a copy of the full, scaled
    # data for this variable, if the data type must be changed to allow
    # for scaling it will be (so the return type may not reflect the
    # type found in the original file)
(no author)'s avatar
(no author) committed
264
    def __getitem__(self, name):
265
266
267
        # defaults
        scale_factor = 1.0
        add_offset = 0.0
268
        data_type = None 
(no author)'s avatar
(no author) committed
269
        scaling_method = None
270
271
272
273
274
275
276
277
278
        
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
        raw_data_copy = variable_object[:]
        try :
            # TODO, this currently won't work with geocat data, work around it for now
            scale_factor, scale_factor_error, add_offset, add_offset_error, data_type = SDS.getcal(variable_object)
        except HDF4Error:
279
280
            # load just the scale factor and add offset information by hand
            temp = self.attributeCache.get_variable_attributes(name)
Eva Schiffer's avatar
Eva Schiffer committed
281
            if ADD_OFFSET_STR in temp :
282
                add_offset = temp[ADD_OFFSET_STR]
283
                data_type = numpy.dtype(type(add_offset))
Eva Schiffer's avatar
Eva Schiffer committed
284
            if SCALE_FACTOR_STR in temp :
285
                scale_factor = temp[SCALE_FACTOR_STR]
286
                data_type = numpy.dtype(type(scale_factor))
Eva Schiffer's avatar
Eva Schiffer committed
287
            if SCALE_METHOD_STR in temp :
288
                scaling_method = temp[SCALE_METHOD_STR]
289
        SDS.endaccess(variable_object)
(no author)'s avatar
(no author) committed
290
        
291
292
293
294
        # don't do lots of work if we don't need to scale things
        if (scale_factor == 1.0) and (add_offset == 0.0) :
            return raw_data_copy
        
295
296
297
298
299
300
301
302
        # at the moment geocat has several scaling methods that don't match the normal standards for hdf
        """
        please see constant.f90 for a more up to date version of this information:
            INTEGER(kind=int1) :: NO_SCALE              ! 0
            INTEGER(kind=int1) :: LINEAR_SCALE          ! 1
            INTEGER(kind=int1) :: LOG_SCALE             ! 2
            INTEGER(kind=int1) :: SQRT_SCALE            ! 3 
        """
303
        if scaling_method == 0 :
304
            return raw_data_copy
305
        if not ((scaling_method is None) or (int(scaling_method) <= 1)) :
306
307
            LOG.warning ('Scaling method of \"' + str(scaling_method) + '\" will be ignored in favor of hdf standard method. '
                         + 'This may cause problems with data consistency')
308
        
309
310
311
        # if we don't have a data type something strange has gone wrong
        assert(not (data_type is None))
        
312
313
        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
314
        missing_mask = numpy.zeros(raw_data_copy.shape, dtype=numpy.bool)
315
316
        if missing_val is not None :
            missing_mask[raw_data_copy == missing_val] = True
317
        
318
        # create the scaled version of the data
319
        scaled_data_copy                = numpy.array(raw_data_copy, dtype=data_type)
320
        scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues?
321
322
323
324
        
        return scaled_data_copy 
    
    def get_variable_object(self, name):
325
        return self._hdf.select(name)
326
    
(no author)'s avatar
(no author) committed
327
    def missing_value(self, name):
328
        
329
        return self.get_attribute(name, fillValConst1)
330
331
332
333
334
335
336
337
338
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
(no author)'s avatar
(no author) committed
339
        
340
        raise IOUnimplimentedError('Unable to create variable in hdf file, this functionality is not yet available.')
341
        
342
        #return None
343
344
345
346
347
348
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
349
350
        
        raise IOUnimplimentedError('Unable add attribute to hdf file, this functionality is not yet available.')
351
        
352
        #return
353
    
354
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
355
356
357
358
        """
        returns all the attributes associated with a variable name
        """
        
359
        #toReturn = None
360
361
362
363
364
365
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
            toReturn = self.get_variable_object(variableName).attributes()
        
        return toReturn
366
    
367
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
368
369
370
371
372
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
373
374
375
376
377
378
379
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attributes = self.get_variable_attributes(variableName, caseInsensitive=False)
            
            if attributeName in temp_attributes :
                toReturn = temp_attributes[attributeName]
380
381
        
        return toReturn
(no author)'s avatar
(no author) committed
382
    
383
384
385
386
387
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
388
        #toReturn = None
389
390
        
        if caseInsensitive :
391
            toReturn = self.attributeCache.get_global_attributes()
392
393
394
395
396
397
        else :
            toReturn = self._hdf.attributes()
        
        return toReturn
    
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
(no author)'s avatar
(no author) committed
398
399
400
401
402
403
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
404
405
406
407
408
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
            if attributeName in self._hdf.attributes() :
                toReturn = self._hdf.attributes()[attributeName]
(no author)'s avatar
(no author) committed
409
410
        
        return toReturn
411
412
413
414
415
416
417
418
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
        
        # TODO, are there any bad types for these files?
        return True
(no author)'s avatar
(no author) committed
419

420
class nc (object):
421
    """wrapper for netcdf4-python data access for comparison
(no author)'s avatar
(no author) committed
422
423
424
425
    __call__ yields sequence of variable names
    __getitem__ returns individual variables ready for slicing to numpy arrays
    """
    
426
    _nc = None
427
428
429
430
    _var_map = None

    # walk down through all groups and get variable names and objects
    def _walkgroups(self, start_at, prefix=None, ):
431
        # look through the variables that are here
Eva Schiffer's avatar
Eva Schiffer committed
432
        for var_name in start_at.variables:
433
434
            temp_name = var_name if prefix is None or len(prefix) <= 0 else prefix + "/" + var_name
            yield temp_name, start_at[var_name]
435
        # look through the groups that are here
Eva Schiffer's avatar
Eva Schiffer committed
436
        for group_name in start_at.groups:
437
438
439
            grp_str = group_name if prefix is None or len(prefix) <= 0 else prefix + "/" + group_name
            for more_var_name, more_var_obj in self._walkgroups(start_at.groups[group_name], prefix=grp_str):
                yield more_var_name, more_var_obj
440
    
441
442
    def __init__(self, filename, allowWrite=False):
        
443
444
445
        if netCDF4 is None:
            LOG.error('netCDF4 is not installed and is needed in order to read NetCDF files')
            assert(netCDF4 is not None)
(no author)'s avatar
   
(no author) committed
446
        
447
        mode = 'r'
448
        if allowWrite :
449
450
            mode = 'a' # a is for append, if I use w it creates a whole new file, deleting the old one

451
        self._nc = netCDF4.Dataset(filename, mode)
452
        self.attributeCache = CaseInsensitiveAttributeCache(self)
453
454
455
        self._var_map = { }
        for var_name, var_obj in self._walkgroups(self._nc,) :
            self._var_map[var_name] = var_obj
456

(no author)'s avatar
(no author) committed
457
    def __call__(self):
458
459
460
461
        """
        yield names of variables in this file
        """

Eva Schiffer's avatar
Eva Schiffer committed
462
        return list(self._var_map)
463

(no author)'s avatar
(no author) committed
464
    def __getitem__(self, name):
465
466
467
468
469
470
471
        """
        this returns a numpy array with a copy of the full, scaled
        data for this variable, if the data type must be changed to allow
        for scaling it will be (so the return type may not reflect the
        type found in the original file)
        """

472
473
        LOG.debug("loading variable data for: " + name)

474
475
476
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
477

478
479
        # get our data, save the dtype, and make sure it's a more flexible dtype for now
        variable_object.set_auto_maskandscale(False)  # for now just do the darn calculations ourselves
480
481
        temp_input_data = variable_object[:]
        LOG.debug("Native input dtype: " + str(temp_input_data.dtype))
482
483
        # if this is object data, stop because we can't run our regular analysis on that kind
        if temp_input_data.dtype == object :
484
485
            LOG.warning("Variable '" + name + "' has a data type of 'object'. This type of data cannot be analyzed by Glance. "
                        "This variable will not be analyzed.")
486
487
488
489
490
491
492
493
            raise IONonnumericalTypeError("Variable '" + name + "' is of data type 'object'. "
                                          "This program can't analyze non-numerical data.")
        """
            Note to self, if we ever do want to access data in a numpy array with dtype=object, for some
            reason this library is packing that into a a zero dimensional tuple or something similar.
            I was able to unpack the data using a construction like: temp_input_data = temp_input_data[()]
            After that the array can be indexed into as normal for a numpy array.
        """
494
495
496
        dtype_to_use = _get_data_uptype(temp_input_data.dtype)
        LOG.debug("Choosing dtype " + str(dtype_to_use) + " for our internal representation of this data.")
        scaled_data_copy = numpy.array(temp_input_data, dtype=dtype_to_use,)
497
498

        # get the attribute cache so we can check on loading related attributes
499
        temp = self.attributeCache.get_variable_attributes(name)
500
501
502

        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
503
        missing_mask = numpy.zeros(scaled_data_copy.shape, dtype=numpy.bool)
504
505
        if missing_val is not None:
            missing_mask[scaled_data_copy == missing_val] = True
506
507
508
509

        #***** just do the darn unsigned handling ourselves, ugh

        # if our data is labeled as being unsigned by the appropriately set attribute
510
        if UNSIGNED_ATTR_STR in temp and str(temp[UNSIGNED_ATTR_STR]).lower() == "true":
511
512
            LOG.debug("Correcting for unsigned values in variable data.")
            where_temp = (scaled_data_copy < 0.0) & ~missing_mask # where we have negative but not missing data
513
            scaled_data_copy[where_temp] += (numpy.iinfo(numpy.uint16).max + 1.0) # add the 2's complement
514
515
516
517
518
519
520
521
522

        #***** end of handling the unsigned attribute

        ###### the start of the scaling code
        # Note, I had to turn this back on because the netcdf4 library is behaving erratically when unsigned is set

        # get the scale factor and add offset from the attributes
        scale_factor = 1.0
        add_offset = 0.0
Eva Schiffer's avatar
Eva Schiffer committed
523
        if SCALE_FACTOR_STR in temp :
524
            scale_factor = temp[SCALE_FACTOR_STR]
Eva Schiffer's avatar
Eva Schiffer committed
525
        if ADD_OFFSET_STR in temp :
526
            add_offset = temp[ADD_OFFSET_STR]
527

528
529
        # don't do work if we don't need to unpack things
        if (scale_factor != 1.0) or (add_offset != 0.0) :
530

531
            LOG.debug("Manually applying scale (" + str(scale_factor) + ") and add offset (" + str(add_offset) + ").")
532

533
534
535
536
537
538
539
            # unpack the data
            scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset

        ###### end of the scaling code

        """
        #TODO, this section was for when we had to do the unsigned correction after unpacking
Eva Schiffer's avatar
Eva Schiffer committed
540
        if UNSIGNED_ATTR_STR in temp and str(temp[UNSIGNED_ATTR_STR]).lower() == ( "true" ) :
541
542
543
544
545
546
547

            LOG.debug("fixing unsigned values in variable " + name)

            # load the scale factor and add offset
            scale_factor = 1.0
            add_offset = 0.0
            temp = self.attributeCache.get_variable_attributes(name)
Eva Schiffer's avatar
Eva Schiffer committed
548
            if SCALE_FACTOR_STR in temp :
549
                scale_factor = temp[SCALE_FACTOR_STR]
Eva Schiffer's avatar
Eva Schiffer committed
550
            if ADD_OFFSET_STR in temp :
551
552
553
554
                add_offset = temp[ADD_OFFSET_STR]

            # get the missing value and figure out the dtype of the original data
            missing_val  = self.missing_value(name)
555
            orig_dtype   = numpy.array([missing_val,]).dtype
Eva Schiffer's avatar
Eva Schiffer committed
556
            needed_dtype = SIGNED_TO_UNSIGNED_DTYPES[orig_dtype] if orig_dtype in SIGNED_TO_UNSIGNED_DTYPES else None
557
558
559

            if needed_dtype is not None :
                # now figure out where all the corrupted values are, and shift them up to be positive
560
                needs_fix_mask = (scaled_data_copy < add_offset) & (scaled_data_copy != missing_val)
561
                # we are adding the 2's complement, but first we're scaling it appropriately
562
                scaled_data_copy[needs_fix_mask] += ((numpy.iinfo(numpy.uint16).max + 1.0) * scale_factor)
563
        """
564

565
        return scaled_data_copy
566
    
567
568
569
570
    # TODO, this hasn't been supported in other file types
    def close (self) :
        self._nc.close()
        self._nc = None
571
        self._var_map = None
572

573
    def get_variable_object(self, name):
574

575
        return self._var_map[name]
576
    
(no author)'s avatar
(no author) committed
577
    def missing_value(self, name):
578
        
579
580
581
582
583
584
585
586
587
588
589
        toReturn = None
        
        temp = self.attributeCache.get_variable_attribute(name, fillValConst1)
        if temp is not None :
            toReturn = temp
        else :
            temp = self.attributeCache.get_variable_attribute(name, fillValConst2)
            if temp is not None :
                toReturn = temp
        
        return toReturn
590

591
592
593
594
595
596
597
598
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
599
600

        # TODO, this will not work with groups
601
        #self._nc.nc_redef() # TODO?
602
603
        
        # if the variable already exists, stop with a warning
Eva Schiffer's avatar
Eva Schiffer committed
604
        if variablename in self._nc.variables :
605
606
            LOG.warning("New variable name requested (" + variablename + ") is already present in file. " +
                        "Skipping generation of new variable.")
607
            return None
608
609
        # if we have no data we won't be able to determine the data type to create the variable
        if (data is None) or (len(data) <= 0) :
610
611
            LOG.warning("Data type for new variable (" + variablename + ") could not be determined. " +
                        "Skipping generation of new variable.")
612
            return None
Eva Schiffer's avatar
Eva Schiffer committed
613

614
        # TODO, the type managment here is going to cause problems with larger floats, review this
615
        #dataType = None
616
617
        if numpy.issubdtype(data.dtype, int) :
            dataType = numpy.int
618
619
            #print("Picked INT")
        # TODO, at the moment the fill type is forcing me to use a double, when sometimes I want a float
620
621
        #elif numpy.issubdtype(data.dtype, numpy.float32) :
        #    dataType = numpy.float
622
        #    print("Picked FLOAT")
623
624
        elif numpy.issubdtype(data.dtype, float) :
            dataType = numpy.float64
625
626
            #print("Picked DOUBLE")
        # what do we do if it's some other type?
627
628
        else :
            dataType = data.dtype
629
630
631
632
633
        
        # create and set all the dimensions
        dimensions = [ ]
        dimensionNum = 0
        for dimSize in data.shape :
634
635
636
            tempName = variablename + '-index' + str(dimensionNum)
            self._nc.createDimension(tempName, dimSize)
            dimensions.append(tempName)
637
638
639
            dimensionNum = dimensionNum + 1
        
        # create the new variable
640
641
642
        #print('variable name: ' + variablename)
        #print('data type:     ' + str(dataType))
        #print('dimensions:    ' + str(dimensions))
643
        # if a missing value was given, use that
644
645
646
647
        if missingvalue is None :
            newVariable = self._nc.createVariable(variablename, dataType, tuple(dimensions))
        else :
            newVariable = self._nc.createVariable(variablename, dataType, tuple(dimensions), fill_value=missingvalue, )
648
649
650
        
        # if we have a variable to copy attributes from, do so
        if variabletocopyattributesfrom is not None :
651
652
            attributes = self.get_variable_attributes(variabletocopyattributesfrom, caseInsensitive=False)

Eva Schiffer's avatar
Eva Schiffer committed
653
            for attribute in attributes :
654
655
                if attribute.lower() != "_fillvalue" :
                    setattr(newVariable, attribute, attributes[attribute])
656

657
        #self._nc.nc_enddef() # TODO?
658

659
660
        # if data was given, use that
        if data is not None :
661
662

            newVariable[:] = data
663

664
        return newVariable
665

666
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue, variableObject=None,) :
667
668
669
670
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
671
672
        # TODO, this will not work with groups

673
674
        if variableObject is None :
            variableObject = self.get_variable_object(variableName)
675
        
676
        #self._nc.nc_redef() # TODO?
677
678
679

        setattr(variableObject, newAttributeName, newAttributeValue)

680
        #self._nc.nc_enddef() # TODO?
681

682
683
684
        # TODO, this will cause our attribute cache to be wrong!
        # TODO, for now, brute force clear the cache
        self.attributeCache = CaseInsensitiveAttributeCache(self)
685
686
        
        return
687
    
688
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
689
690
691
692
        """
        returns all the attributes associated with a variable name
        """
        
693
        #toReturn = None
694
695
696
697
        
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
698
699
700
701
702
            toReturn = { }
            tempVarObj   = self.get_variable_object(variableName)
            tempAttrKeys = tempVarObj.ncattrs()
            for attrKey in tempAttrKeys :
                toReturn[attrKey] = getattr(tempVarObj, attrKey)
703
704
        
        return toReturn
705
    
706
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
707
708
709
710
711
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
712
713
714
715
716
717
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attributes = self.get_variable_attributes(variableName, caseInsensitive=False)
            
            if attributeName in temp_attributes :
718
                toReturn = getattr(self.get_variable_object, attributeName)
719
720
721
722
723
724
725
726
        
        return toReturn
    
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
727
        #toReturn = None
728
        
729
        if caseInsensitive :
730
            toReturn = self.attributeCache.get_global_attributes()
731
        else :
732
733
734
735
            toReturn = { }
            tempAttrKeys = self._nc.ncattrs()
            for attrKey in tempAttrKeys :
                toReturn[attrKey] = getattr(self._nc, attrKey)
736

737
        return toReturn
738
    
739
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
740
741
742
743
744
745
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
746
747
748
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
749
            if attributeName in self._nc.ncattrs() :
750
                toReturn = getattr(self._nc, attributeName)
751
752
        
        return toReturn
753
754
755
756
757
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
758
759

        return True
760

(no author)'s avatar
(no author) committed
761
762
763
nc4 = nc
cdf = nc

764
765
# TODO remove
#FIXME_IDPS = [ '/All_Data/CrIS-SDR_All/ES' + ri + band for ri in ['Real','Imaginary'] for band in ['LW','MW','SW'] ] 
766

(no author)'s avatar
(no author) committed
767
class h5(object):
768
769
770
771
    """wrapper for HDF5 datasets
    """
    _h5 = None
    
772
    def __init__(self, filename, allowWrite=False):
773
774
        self.attributeCache = CaseInsensitiveAttributeCache(self)
        
775
776
777
        mode = 'r'
        if allowWrite :
            mode = 'r+'
(no author)'s avatar
   
(no author) committed
778
779
780
        if h5py is None:
            LOG.error('h5py module is not installed and is needed in order to read h5 files')
            assert(h5py is not None)
781
        self._h5 = h5py.File(filename, mode)
782
783
    
    def __call__(self):
784
785
786
787
        
        variableList = [ ]
        def testFn (name, obj) :
            #print ('checking name: ' + name)
788
            #print ('object: ' + str(obj))
789
790
791
792
            
            if isinstance(obj, h5py.Dataset) :
                try :
                    tempType = obj.dtype # this is required to provoke a type error for closed data sets
793
                    
794
                    #LOG.debug ('type: ' + str(tempType))
795
796
797
798
799
800
801
802
803
                    variableList.append(name)
                except TypeError :
                    LOG.debug('TypeError prevents the use of variable ' + name
                              + '. This variable will be ignored')
        
        self._h5.visititems(testFn)
        
        LOG.debug('variables from visiting h5 file structure: ' + str(variableList))
        
804
        return variableList
805
806
807
808
809
    
    @staticmethod
    def trav(h5,pth): 
        return reduce( lambda x,a: x[a] if a else x, pth.split('/'), h5)
        
810
811
812
813
814
    # this returns a numpy array with a copy of the full, scaled
    # data for this variable, if the data type must be changed to allow
    # for scaling it will be (so the return type may not reflect the
    # type found in the original file)
    def __getitem__(self, name):
815
        
816
817
818
819
820
821
822
823
        # defaults
        scale_factor = 1.0
        add_offset = 0.0
        
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
        raw_data_copy = variable_object[:]
824
825
826
827

        # pick a data type to use internally
        data_type = _get_data_uptype(raw_data_copy.dtype)

828
829
830
831
        #print ('*************************')
        #print (dir (variable_object.id)) # TODO, is there a way to get the scale and offset through this?
        #print ('*************************')
        
832
        # load the scale factor and add offset
833
        temp = self.attributeCache.get_variable_attributes(name)
Eva Schiffer's avatar
Eva Schiffer committed
834
        if SCALE_FACTOR_STR in temp :
835
            scale_factor = temp[SCALE_FACTOR_STR]
Eva Schiffer's avatar
Eva Schiffer committed
836
        if ADD_OFFSET_STR in temp :
837
            add_offset = temp[ADD_OFFSET_STR]
838
839
840
841
842
843
        # todo, does cdf have an equivalent of endaccess to close the variable?
        
        # don't do lots of work if we don't need to scale things
        if (scale_factor == 1.0) and (add_offset == 0.0) :
            return raw_data_copy
        
844
845
        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
846
        missing_mask = numpy.zeros(raw_data_copy.shape, dtype=numpy.bool)
847
848
        if missing_val is not None:
            missing_mask[raw_data_copy == missing_val] = True
849
        
850
        # create the scaled version of the data
851
        scaled_data_copy = numpy.array(raw_data_copy, dtype=data_type)
852
        scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues?
853
854
855
856
        
        return scaled_data_copy
    
    def get_variable_object(self,name):
857
858
859
        return h5.trav(self._h5, name)
    
    def missing_value(self, name):
860
861
862
863
864
865
866
        
        toReturn = None
        
        # get the missing value if it has been set
        variableObject = self.get_variable_object(name)
        pListObj = variableObject.id.get_create_plist()
        fillValueStatus = pListObj.fill_value_defined()
Eva Schiffer's avatar
Eva Schiffer committed
867
        if (h5d.FILL_VALUE_DEFAULT == fillValueStatus) or (h5d.FILL_VALUE_USER_DEFINED == fillValueStatus) :
868
            temp = numpy.array((1), dtype=variableObject.dtype)
869
870
871
872
            pListObj.get_fill_value(temp)
            toReturn = temp
        
        return toReturn
873
874
875
876
877
878
879
880
881
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
882
        
883
        raise IOUnimplimentedError('Unable to create variable in hdf 5 file, this functionality is not yet available.')
884
        
885
        #return None
886
887
888
889
890
891
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
892
893
        
        raise IOUnimplimentedError('Unable to add attribute to hdf 5 file, this functionality is not yet available.')
894
        
895
        #return
896
    
897
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
898
899
900
901
        """
        returns all the attributes associated with a variable name
        """
        
902
        #toReturn = None
903
904
905
906
907
908
909
        
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
            toReturn = self.get_variable_object(variableName).attrs
        
        return toReturn
910
    
911
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
912
913
914
915
916
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
917
918
919
920
921
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attrs = self.get_variable_attributes(variableName, caseInsensitive=False)
            
922
            if attributeName in temp_attrs :
923
924
925
926
927
928
929
930
931
                toReturn = temp_attrs[attributeName]
        
        return toReturn
    
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
932
        #toReturn = None
933
        
934
        if caseInsensitive :
935
            toReturn = self.attributeCache.get_global_attributes()
936
937
        else :
            toReturn = self._h5.attrs
938
939
        
        return toReturn
(no author)'s avatar
(no author) committed
940
    
941
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
(no author)'s avatar
(no author) committed
942
943
944
945
946
947
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
948
949
950
951
952
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
            if attributeName in self._h5.attrs :
                toReturn = self._h5.attrs[attributeName]
(no author)'s avatar
(no author) committed
953
954
        
        return toReturn
955
956
957
958
959
960
961
962
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
        
        # TODO, are there any bad types for these files?
        return True
(no author)'s avatar
(no author) committed
963

(no author)'s avatar
(no author) committed
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999

class aeri(object):
    """wrapper for AERI RNC/SUM/CXS/etc datasets
    """
    _dmv = None
    _vectors = { }
    _scalars = { }
    
    @staticmethod
    def _meta_mapping(fp):
        ids = fp.metaIDs()
        names = [fp.queryMetaDescString(1, id_, fp.SHORTNAME) for id_ in ids]
        assert len(ids) == len(names)
        return (dict((n, i) for n, i in zip(names, ids)))
    
    def _inventory(self):
        fp = self._dmv
        assert(fp is not None)
        # get list of vectors and scalars
        self._vectors = dict( (fp.queryVectorDescString(n,fp.SHORTNAME), n) for n in fp.vectorIDs() )
        self._scalars = self._meta_mapping(fp)

    def __init__(self, filename, allowWrite=False):
        assert(allowWrite==False)
        if dmvlib is None:
            LOG.error('cannot open AERI files without dmv module being available')
            return
        self._dmv = dmvlib.dmv()
        rc = self._dmv.openFile(filename)
        if rc!=0:
            LOG.error("unable to open file, rc=%d" % rc)
            self._dmv = None        
        else:
            self._inventory()
    
    def __call__(self):
Eva Schiffer's avatar
Eva Schiffer committed
1000
        return list(self._vectors) + list(self._scalars)
For faster browsing, not all history is shown. View entire blame