io.py 52.2 KB
Newer Older
(no author)'s avatar
(no author) committed
1
2
3
4
5
6
7
8
9
#!/usr/bin/env python
# encoding: utf-8
"""
I/O routines supporting reading a number of file formats.

Created by rayg Apr 2009.
Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
"""

10
import os, logging
11
import numpy
12
from functools import reduce
(no author)'s avatar
   
(no author) committed
13
14

LOG = logging.getLogger(__name__)
(no author)'s avatar
(no author) committed
15

(no author)'s avatar
   
(no author) committed
16
17
18
19
20
21
22
23
24
try:
    import pyhdf
    from pyhdf.SD import SD,SDC, SDS, HDF4Error
except:
    LOG.info('no pyhdf module available for HDF4')
    pyhdf = None
    SD = SDC = SDS = object
    HDF4Error = EnvironmentError
    
25
26
try:
    import h5py
27
    from h5py import h5d
28
except ImportError:
(no author)'s avatar
   
(no author) committed
29
30
    LOG.info('no h5py module available for reading HDF5')
    h5py = None
(no author)'s avatar
(no author) committed
31

32
33
34
35
36
37
38
# the newer netCDF library that replaced pycdf
try:
    import netCDF4
except:
    LOG.info("unable to import netcdf4 library")
    netCDF4 = None

(no author)'s avatar
(no author) committed
39
40
41
42
43
44
45
try:
    import dmv as dmvlib
    LOG.info('loaded dmv module for AERI data file access')
except ImportError:
    LOG.info('no AERI dmv data file format module')
    dmvlib = None

(no author)'s avatar
   
(no author) committed
46
47
48
49
50
51
52
try:
    import adl_blob
    LOG.info('adl_blob module found for JPSS ADL data file access')
except ImportError:
    LOG.info('no adl_blob format handler available')
    adl_blob = None

53
54
55
56
57
58
59
try :
    from osgeo import gdal
    LOG.info('loading osgeo module for GeoTIFF data file access')
except :
    LOG.info('no osgeo available for reading GeoTIFF data files')
    gdal = None

60
UNITS_CONSTANT = "units"
(no author)'s avatar
(no author) committed
61

62
63
64
fillValConst1 = '_FillValue'
fillValConst2 = 'missing_value'

65
66
67
68
ADD_OFFSET_STR   = 'add_offset'
SCALE_FACTOR_STR = 'scale_factor'
SCALE_METHOD_STR = 'scaling_method'

69
70
71
UNSIGNED_ATTR_STR = "_unsigned"

SIGNED_TO_UNSIGNED_DTYPES = {
72
73
74
75
                                numpy.dtype(numpy.int8):    numpy.dtype(numpy.uint8),
                                numpy.dtype(numpy.int16):   numpy.dtype(numpy.uint16),
                                numpy.dtype(numpy.int32):   numpy.dtype(numpy.uint32),
                                numpy.dtype(numpy.int64):   numpy.dtype(numpy.uint64),
76
77
                            }

78
79
80
81
82
83
84
85
86
87
88
class IOUnimplimentedError(Exception):
    """
    The exception raised when a requested io operation is not yet available.
    
        msg  -- explanation of the problem
    """
    def __init__(self, msg):
        self.msg = msg
    def __str__(self):
        return self.msg

89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
class CaseInsensitiveAttributeCache (object) :
    """
    A cache of attributes for a single file and all of it's variables.
    This cache is considered uncased, it will store all attributes it caches
    in lower case and will lower case any strings it is asked to search for
    in the cache.
    When variable or global attribute sets are not yet loaded and something
    from that part of the file is requested the cache will transparently load
    attributes from the file behind the scenes and build the cache for that
    part of the file.
    """
    
    def __init__(self, fileObject) :
        """
        set up the empty cache and hang on to the file object we'll be caching
        """
        
        self.fileToCache             = fileObject
        self.globalAttributesLower   = None
        self.variableAttributesLower = { }
    
    def _load_global_attributes_if_needed (self) :
        """
        load up the global attributes if they need to be cached
        """
        
        # load the attributes from the file if they aren't cached
        if self.globalAttributesLower is None :
            LOG.debug ("Loading file global attributes into case-insensitive cache.")
            tempAttrs                  = self.fileToCache.get_global_attributes(caseInsensitive=False)
            self.globalAttributesLower = dict((k.lower(), v) for k, v in tempAttrs.items())
    
    def _load_variable_attributes_if_needed (self, variableName) :
        """
        load up the variable attributes if they need to be cached
        """
        
        # make a lower cased version of the variable name
        tempVariableName = variableName.lower()
        
        # load the variable's attributes from the file if they aren't cached
Eva Schiffer's avatar
Eva Schiffer committed
130
        if tempVariableName not in self.variableAttributesLower :
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
            LOG.debug ("Loading attributes for variable \"" + variableName + "\" into case-insensitive cache.")
            tempAttrs = self.fileToCache.get_variable_attributes(variableName, caseInsensitive=False)
            # now if there are any attributes, make a case insensitive version
            self.variableAttributesLower[tempVariableName] = dict((k.lower(), v) for k, v in tempAttrs.items())
    
    def get_variable_attribute (self, variableName, attributeName) :
        """
        get the specified attribute for the specified variable,
        if this variable's attributes have not yet been loaded
        they will be loaded and cached
        """
        
        self._load_variable_attributes_if_needed(variableName)
        
        toReturn = None
        tempVariableName  =  variableName.lower()
        tempAttributeName = attributeName.lower()
        if (tempVariableName in self.variableAttributesLower) and (tempAttributeName in self.variableAttributesLower[tempVariableName]) :
            toReturn = self.variableAttributesLower[tempVariableName][tempAttributeName]
        else:
            LOG.debug ("Attribute \"" + attributeName + "\" was not present for variable \"" + variableName + "\".")
        
        return toReturn
    
    def get_variable_attributes (self, variableName) :
        """
        get the variable attributes for the variable name given
        """
        
        self._load_variable_attributes_if_needed(variableName)
        
        toReturn = self.variableAttributesLower[variableName.lower()] if (variableName.lower() in self.variableAttributesLower) else None
        
        return toReturn
    
    def get_global_attribute (self, attributeName) :
        """
        get a global attribute with the given name
        """
        
        self._load_global_attributes_if_needed()
        
        toReturn = self.globalAttributesLower[attributeName.lower()] if (attributeName.lower() in self.globalAttributesLower) else None
        
        return toReturn
    
    def get_global_attributes (self) :
        """
        get the global attributes,
        """
        
        self._load_global_attributes_if_needed()
        
        toReturn = self.globalAttributesLower
        
        return toReturn
187
188
189
190
191
192
193
194
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
        
        # TODO, are there any bad types for these files?
        return True
195

196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
def _get_data_uptype (input_dtype) :
    """
    Given an input data type, figure out what type we need to upcast it to.

    Note: Glance expects all it's data to get upcast into floats for the purposes of it's
    later math manipulations.
    """

    default_uptype = numpy.float32
    default_finfo  = numpy.finfo(default_uptype)
    input_info     = numpy.finfo(input_dtype) if  numpy.issubdtype(input_dtype, numpy.floating,) else numpy.iinfo(input_dtype)

    # if our input won't fit into the default, pick a bigger type
    if ( (default_finfo.min > input_info.min) or (default_finfo.max < input_info.max) ) :
        LOG.debug("Input data will not fit in default float32 data type, using larger type.")
        default_uptype = numpy.float64

    # FUTURE, if we reach a point where a float64 isn't big enough, this will need to be revisited

    return default_uptype

217
class hdf (object):
(no author)'s avatar
(no author) committed
218
219
220
221
222
    """wrapper for HDF4 dataset for comparison
    __call__ yields sequence of variable names
    __getitem__ returns individual variables ready for slicing to numpy arrays
    """
    
223
224
    _hdf = None
    
225
    def __init__(self, filename, allowWrite=False):
226
        
(no author)'s avatar
   
(no author) committed
227
228
229
        if pyhdf is None:
            LOG.error('pyhdf is not installed and is needed in order to read hdf4 files')
            assert(pyhdf is not None)
230
231
232
        mode = SDC.READ
        if allowWrite:
            mode = mode | SDC.WRITE
233
234
235
        
        self._hdf = SD(filename, mode)
        self.attributeCache = CaseInsensitiveAttributeCache(self)
(no author)'s avatar
(no author) committed
236
237
238

    def __call__(self):
        "yield names of variables to be compared"
Eva Schiffer's avatar
Eva Schiffer committed
239
        return list(self._hdf.datasets())
(no author)'s avatar
(no author) committed
240
    
241
242
243
244
    # this returns a numpy array with a copy of the full, scaled
    # data for this variable, if the data type must be changed to allow
    # for scaling it will be (so the return type may not reflect the
    # type found in the original file)
(no author)'s avatar
(no author) committed
245
    def __getitem__(self, name):
246
247
248
        # defaults
        scale_factor = 1.0
        add_offset = 0.0
249
        data_type = None 
(no author)'s avatar
(no author) committed
250
        scaling_method = None
251
252
253
254
255
256
257
258
259
        
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
        raw_data_copy = variable_object[:]
        try :
            # TODO, this currently won't work with geocat data, work around it for now
            scale_factor, scale_factor_error, add_offset, add_offset_error, data_type = SDS.getcal(variable_object)
        except HDF4Error:
260
261
            # load just the scale factor and add offset information by hand
            temp = self.attributeCache.get_variable_attributes(name)
Eva Schiffer's avatar
Eva Schiffer committed
262
            if ADD_OFFSET_STR in temp :
263
                add_offset = temp[ADD_OFFSET_STR]
264
                data_type = numpy.dtype(type(add_offset))
Eva Schiffer's avatar
Eva Schiffer committed
265
            if SCALE_FACTOR_STR in temp :
266
                scale_factor = temp[SCALE_FACTOR_STR]
267
                data_type = numpy.dtype(type(scale_factor))
Eva Schiffer's avatar
Eva Schiffer committed
268
            if SCALE_METHOD_STR in temp :
269
                scaling_method = temp[SCALE_METHOD_STR]
270
        SDS.endaccess(variable_object)
(no author)'s avatar
(no author) committed
271
        
272
273
274
275
        # don't do lots of work if we don't need to scale things
        if (scale_factor == 1.0) and (add_offset == 0.0) :
            return raw_data_copy
        
276
277
278
279
280
281
282
283
284
285
        # at the moment geocat has several scaling methods that don't match the normal standards for hdf
        """
        please see constant.f90 for a more up to date version of this information:
            INTEGER(kind=int1) :: NO_SCALE              ! 0
            INTEGER(kind=int1) :: LINEAR_SCALE          ! 1
            INTEGER(kind=int1) :: LOG_SCALE             ! 2
            INTEGER(kind=int1) :: SQRT_SCALE            ! 3 
        """
        if (scaling_method == 0) :
            return raw_data_copy
286
        if not ((scaling_method is None) or (int(scaling_method) <= 1)) :
287
288
            LOG.warn ('Scaling method of \"' + str(scaling_method) + '\" will be ignored in favor of hdf standard method. '
                      + 'This may cause problems with data consistency')
289
        
290
291
292
        # if we don't have a data type something strange has gone wrong
        assert(not (data_type is None))
        
293
294
        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
295
        missing_mask = numpy.zeros(raw_data_copy.shape, dtype=numpy.bool)
296
297
        if missing_val is not None :
            missing_mask[raw_data_copy == missing_val] = True
298
        
299
        # create the scaled version of the data
300
        scaled_data_copy                = numpy.array(raw_data_copy, dtype=data_type)
301
        scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues?
302
303
304
305
        
        return scaled_data_copy 
    
    def get_variable_object(self, name):
306
        return self._hdf.select(name)
307
    
(no author)'s avatar
(no author) committed
308
    def missing_value(self, name):
309
        
310
        return self.get_attribute(name, fillValConst1)
311
312
313
314
315
316
317
318
319
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
(no author)'s avatar
(no author) committed
320
        
321
        raise IOUnimplimentedError('Unable to create variable in hdf file, this functionality is not yet available.')
322
323
324
325
326
327
328
329
        
        return None
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
330
331
        
        raise IOUnimplimentedError('Unable add attribute to hdf file, this functionality is not yet available.')
332
333
        
        return
334
    
335
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
336
337
338
339
        """
        returns all the attributes associated with a variable name
        """
        
340
341
342
343
344
345
346
        toReturn = None
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
            toReturn = self.get_variable_object(variableName).attributes()
        
        return toReturn
347
    
348
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
349
350
351
352
353
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
354
355
356
357
358
359
360
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attributes = self.get_variable_attributes(variableName, caseInsensitive=False)
            
            if attributeName in temp_attributes :
                toReturn = temp_attributes[attributeName]
361
362
        
        return toReturn
(no author)'s avatar
(no author) committed
363
    
364
365
366
367
368
369
370
371
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
        toReturn = None
        
        if caseInsensitive :
372
            toReturn = self.attributeCache.get_global_attributes()
373
374
375
376
377
378
        else :
            toReturn = self._hdf.attributes()
        
        return toReturn
    
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
(no author)'s avatar
(no author) committed
379
380
381
382
383
384
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
385
386
387
388
389
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
            if attributeName in self._hdf.attributes() :
                toReturn = self._hdf.attributes()[attributeName]
(no author)'s avatar
(no author) committed
390
391
        
        return toReturn
392
393
394
395
396
397
398
399
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
        
        # TODO, are there any bad types for these files?
        return True
(no author)'s avatar
(no author) committed
400

401
class nc (object):
402
    """wrapper for netcdf4-python data access for comparison
(no author)'s avatar
(no author) committed
403
404
405
406
    __call__ yields sequence of variable names
    __getitem__ returns individual variables ready for slicing to numpy arrays
    """
    
407
    _nc = None
408
409
410
411
    _var_map = None

    # walk down through all groups and get variable names and objects
    def _walkgroups(self, start_at, prefix=None, ):
412
        # look through the variables that are here
Eva Schiffer's avatar
Eva Schiffer committed
413
        for var_name in start_at.variables:
414
415
            temp_name = var_name if prefix is None or len(prefix) <= 0 else prefix + "/" + var_name
            yield temp_name, start_at[var_name]
416
        # look through the groups that are here
Eva Schiffer's avatar
Eva Schiffer committed
417
        for group_name in start_at.groups:
418
419
420
            grp_str = group_name if prefix is None or len(prefix) <= 0 else prefix + "/" + group_name
            for more_var_name, more_var_obj in self._walkgroups(start_at.groups[group_name], prefix=grp_str):
                yield more_var_name, more_var_obj
421
    
422
423
    def __init__(self, filename, allowWrite=False):
        
424
425
426
        if netCDF4 is None:
            LOG.error('netCDF4 is not installed and is needed in order to read NetCDF files')
            assert(netCDF4 is not None)
(no author)'s avatar
   
(no author) committed
427
        
428
        mode = 'r'
429
        if allowWrite :
430
            mode = 'w'
431
        
432
        self._nc = netCDF4.Dataset(filename, mode)
433
        self.attributeCache = CaseInsensitiveAttributeCache(self)
434
435
436
        self._var_map = { }
        for var_name, var_obj in self._walkgroups(self._nc,) :
            self._var_map[var_name] = var_obj
437

(no author)'s avatar
(no author) committed
438
    def __call__(self):
439
440
441
442
        """
        yield names of variables in this file
        """

Eva Schiffer's avatar
Eva Schiffer committed
443
        return list(self._var_map)
444

(no author)'s avatar
(no author) committed
445
    def __getitem__(self, name):
446
447
448
449
450
451
452
        """
        this returns a numpy array with a copy of the full, scaled
        data for this variable, if the data type must be changed to allow
        for scaling it will be (so the return type may not reflect the
        type found in the original file)
        """

453
454
        LOG.debug("loading variable data for: " + name)

455
456
457
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
458

459
460
        # get our data, save the dtype, and make sure it's a more flexible dtype for now
        variable_object.set_auto_maskandscale(False)  # for now just do the darn calculations ourselves
461
462
463
464
465
        temp_input_data = variable_object[:]
        LOG.debug("Native input dtype: " + str(temp_input_data.dtype))
        dtype_to_use = _get_data_uptype(temp_input_data.dtype)
        LOG.debug("Choosing dtype " + str(dtype_to_use) + " for our internal representation of this data.")
        scaled_data_copy = numpy.array(temp_input_data, dtype=dtype_to_use,)
466
467

        # get the attribute cache so we can check on loading related attributes
468
        temp = self.attributeCache.get_variable_attributes(name)
469
470
471

        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
472
        missing_mask = numpy.zeros(scaled_data_copy.shape, dtype=numpy.bool)
473
474
        if missing_val is not None:
            missing_mask[scaled_data_copy == missing_val] = True
475
476
477
478

        #***** just do the darn unsigned handling ourselves, ugh

        # if our data is labeled as being unsigned by the appropriately set attribute
Eva Schiffer's avatar
Eva Schiffer committed
479
        if UNSIGNED_ATTR_STR in temp and str(temp[UNSIGNED_ATTR_STR]).lower() == ("true"):
480
481
            LOG.debug("Correcting for unsigned values in variable data.")
            where_temp = (scaled_data_copy < 0.0) & ~missing_mask # where we have negative but not missing data
482
            scaled_data_copy[where_temp] += (numpy.iinfo(numpy.uint16).max + 1.0) # add the 2's complement
483
484
485
486
487
488
489
490
491

        #***** end of handling the unsigned attribute

        ###### the start of the scaling code
        # Note, I had to turn this back on because the netcdf4 library is behaving erratically when unsigned is set

        # get the scale factor and add offset from the attributes
        scale_factor = 1.0
        add_offset = 0.0
Eva Schiffer's avatar
Eva Schiffer committed
492
        if SCALE_FACTOR_STR in temp :
493
            scale_factor = temp[SCALE_FACTOR_STR]
Eva Schiffer's avatar
Eva Schiffer committed
494
        if ADD_OFFSET_STR in temp :
495
            add_offset = temp[ADD_OFFSET_STR]
496

497
498
        # don't do work if we don't need to unpack things
        if (scale_factor != 1.0) or (add_offset != 0.0) :
499

500
            LOG.debug("Manually applying scale (" + str(scale_factor) + ") and add offset (" + str(add_offset) + ").")
501

502
503
504
505
506
507
508
            # unpack the data
            scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset

        ###### end of the scaling code

        """
        #TODO, this section was for when we had to do the unsigned correction after unpacking
Eva Schiffer's avatar
Eva Schiffer committed
509
        if UNSIGNED_ATTR_STR in temp and str(temp[UNSIGNED_ATTR_STR]).lower() == ( "true" ) :
510
511
512
513
514
515
516

            LOG.debug("fixing unsigned values in variable " + name)

            # load the scale factor and add offset
            scale_factor = 1.0
            add_offset = 0.0
            temp = self.attributeCache.get_variable_attributes(name)
Eva Schiffer's avatar
Eva Schiffer committed
517
            if SCALE_FACTOR_STR in temp :
518
                scale_factor = temp[SCALE_FACTOR_STR]
Eva Schiffer's avatar
Eva Schiffer committed
519
            if ADD_OFFSET_STR in temp :
520
521
522
523
                add_offset = temp[ADD_OFFSET_STR]

            # get the missing value and figure out the dtype of the original data
            missing_val  = self.missing_value(name)
524
            orig_dtype   = numpy.array([missing_val,]).dtype
Eva Schiffer's avatar
Eva Schiffer committed
525
            needed_dtype = SIGNED_TO_UNSIGNED_DTYPES[orig_dtype] if orig_dtype in SIGNED_TO_UNSIGNED_DTYPES else None
526
527
528

            if needed_dtype is not None :
                # now figure out where all the corrupted values are, and shift them up to be positive
529
                needs_fix_mask = (scaled_data_copy < add_offset) & (scaled_data_copy != missing_val)
530
                # we are adding the 2's complement, but first we're scaling it appropriately
531
                scaled_data_copy[needs_fix_mask] += ((numpy.iinfo(numpy.uint16).max + 1.0) * scale_factor)
532
        """
533

534
        return scaled_data_copy
535
    
536
537
538
539
    # TODO, this hasn't been supported in other file types
    def close (self) :
        self._nc.close()
        self._nc = None
540
        self._var_map = None
541

542
    def get_variable_object(self, name):
543

544
        return self._var_map[name]
545
    
(no author)'s avatar
(no author) committed
546
    def missing_value(self, name):
547
        
548
549
550
551
552
553
554
555
556
557
558
        toReturn = None
        
        temp = self.attributeCache.get_variable_attribute(name, fillValConst1)
        if temp is not None :
            toReturn = temp
        else :
            temp = self.attributeCache.get_variable_attribute(name, fillValConst2)
            if temp is not None :
                toReturn = temp
        
        return toReturn
559

560
561
562
563
564
565
566
567
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
568
569

        # TODO, this will not work with groups
570
        self._nc.nc_redef()
571
572
        
        # if the variable already exists, stop with a warning
Eva Schiffer's avatar
Eva Schiffer committed
573
        if variablename in self._nc.variables :
574
575
576
            LOG.warn("New variable name requested (" + variablename + ") is already present in file. " +
                     "Skipping generation of new variable.")
            return None
577
578
579
580
581
        # if we have no data we won't be able to determine the data type to create the variable
        if (data is None) or (len(data) <= 0) :
            LOG.warn("Data type for new variable (" + variablename + ") could not be determined. " +
                     "Skipping generation of new variable.")
            return None
582
583
        
        dataType = None
584
585
        if numpy.issubdtype(data.dtype, int) :
            dataType = numpy.int
586
587
            #print("Picked INT")
        # TODO, at the moment the fill type is forcing me to use a double, when sometimes I want a float
588
589
        #elif numpy.issubdtype(data.dtype, numpy.float32) :
        #    dataType = numpy.float
590
        #    print("Picked FLOAT")
591
592
        elif numpy.issubdtype(data.dtype, float) :
            dataType = numpy.float64
593
594
595
596
597
598
599
            #print("Picked DOUBLE")
        # what do we do if it's some other type?
        
        # create and set all the dimensions
        dimensions = [ ]
        dimensionNum = 0
        for dimSize in data.shape :
600
            dimensions.append(self._nc.createDimension(variablename + '-index' + str(dimensionNum), dimSize))
601
602
603
            dimensionNum = dimensionNum + 1
        
        # create the new variable
604
605
606
        #print('variable name: ' + variablename)
        #print('data type:     ' + str(dataType))
        #print('dimensions:    ' + str(dimensions))
607
        newVariable = self._nc.createVariable(variablename, dataType, tuple(dimensions))
608
609
610
611
612
613
614
        
        # if a missing value was given, use that
        if missingvalue is not None :
            newVariable._FillValue = missingvalue
        
        # if we have a variable to copy attributes from, do so
        if variabletocopyattributesfrom is not None :
615
616
            attributes = self.get_variable_attributes(variabletocopyattributesfrom, caseInsensitive=False)

Eva Schiffer's avatar
Eva Schiffer committed
617
            for attribute in attributes :
618
619
620
621
                setattr(newVariable, attribute, attributes[attribute])

        self._nc.nc_enddef()

622
623
        # if data was given, use that
        if data is not None :
624
625
            newVariable[:](data.tolist())

626
        return newVariable
627

628
629
630
631
632
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
633
634
        # TODO, this will not work with groups

635
636
        variableObject = self.get_variable_object(variableName)
        
637
638
639
640
641
642
        self._nc.nc_redef()

        setattr(variableObject, newAttributeName, newAttributeValue)

        self._nc.nc_enddef()

643
644
645
        # TODO, this will cause our attribute cache to be wrong!
        # TODO, for now, brute force clear the cache
        self.attributeCache = CaseInsensitiveAttributeCache(self)
646
647
        
        return
648
    
649
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
650
651
652
653
        """
        returns all the attributes associated with a variable name
        """
        
654
655
656
657
658
        toReturn = None
        
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
659
660
661
662
663
            toReturn = { }
            tempVarObj   = self.get_variable_object(variableName)
            tempAttrKeys = tempVarObj.ncattrs()
            for attrKey in tempAttrKeys :
                toReturn[attrKey] = getattr(tempVarObj, attrKey)
664
665
        
        return toReturn
666
    
667
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
668
669
670
671
672
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
673
674
675
676
677
678
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attributes = self.get_variable_attributes(variableName, caseInsensitive=False)
            
            if attributeName in temp_attributes :
679
                toReturn = getattr(self.get_variable_object, attributeName)
680
681
682
683
684
685
686
687
688
        
        return toReturn
    
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
        toReturn = None
689
        
690
        if caseInsensitive :
691
            toReturn = self.attributeCache.get_global_attributes()
692
        else :
693
694
695
696
            toReturn = { }
            tempAttrKeys = self._nc.ncattrs()
            for attrKey in tempAttrKeys :
                toReturn[attrKey] = getattr(self._nc, attrKey)
697

698
        return toReturn
699
    
700
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
701
702
703
704
705
706
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
707
708
709
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
710
            if attributeName in self._nc.ncattrs() :
711
                toReturn = getattr(self._nc, attributeName)
712
713
        
        return toReturn
714
715
716
717
718
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
719
720

        return True
721

(no author)'s avatar
(no author) committed
722
723
724
nc4 = nc
cdf = nc

725
726
# TODO remove
#FIXME_IDPS = [ '/All_Data/CrIS-SDR_All/ES' + ri + band for ri in ['Real','Imaginary'] for band in ['LW','MW','SW'] ] 
727

(no author)'s avatar
(no author) committed
728
class h5(object):
729
730
731
732
    """wrapper for HDF5 datasets
    """
    _h5 = None
    
733
    def __init__(self, filename, allowWrite=False):
734
735
        self.attributeCache = CaseInsensitiveAttributeCache(self)
        
736
737
738
        mode = 'r'
        if allowWrite :
            mode = 'r+'
(no author)'s avatar
   
(no author) committed
739
740
741
        if h5py is None:
            LOG.error('h5py module is not installed and is needed in order to read h5 files')
            assert(h5py is not None)
742
        self._h5 = h5py.File(filename, mode)
743
744
    
    def __call__(self):
745
746
747
748
        
        variableList = [ ]
        def testFn (name, obj) :
            #print ('checking name: ' + name)
749
            #print ('object: ' + str(obj))
750
751
752
753
            
            if isinstance(obj, h5py.Dataset) :
                try :
                    tempType = obj.dtype # this is required to provoke a type error for closed data sets
754
                    
755
                    #LOG.debug ('type: ' + str(tempType))
756
757
758
759
760
761
762
763
764
765
                    variableList.append(name)
                except TypeError :
                    LOG.debug('TypeError prevents the use of variable ' + name
                              + '. This variable will be ignored')
        
        self._h5.visititems(testFn)
        
        LOG.debug('variables from visiting h5 file structure: ' + str(variableList))
        
        return(variableList)
766
767
768
769
770
    
    @staticmethod
    def trav(h5,pth): 
        return reduce( lambda x,a: x[a] if a else x, pth.split('/'), h5)
        
771
772
773
774
775
    # this returns a numpy array with a copy of the full, scaled
    # data for this variable, if the data type must be changed to allow
    # for scaling it will be (so the return type may not reflect the
    # type found in the original file)
    def __getitem__(self, name):
776
        
777
778
779
780
781
782
783
784
        # defaults
        scale_factor = 1.0
        add_offset = 0.0
        
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
        raw_data_copy = variable_object[:]
785
786
787
788

        # pick a data type to use internally
        data_type = _get_data_uptype(raw_data_copy.dtype)

789
790
791
792
        #print ('*************************')
        #print (dir (variable_object.id)) # TODO, is there a way to get the scale and offset through this?
        #print ('*************************')
        
793
        # load the scale factor and add offset
794
        temp = self.attributeCache.get_variable_attributes(name)
Eva Schiffer's avatar
Eva Schiffer committed
795
        if SCALE_FACTOR_STR in temp :
796
            scale_factor = temp[SCALE_FACTOR_STR]
Eva Schiffer's avatar
Eva Schiffer committed
797
        if ADD_OFFSET_STR in temp :
798
            add_offset = temp[ADD_OFFSET_STR]
799
800
801
802
803
804
        # todo, does cdf have an equivalent of endaccess to close the variable?
        
        # don't do lots of work if we don't need to scale things
        if (scale_factor == 1.0) and (add_offset == 0.0) :
            return raw_data_copy
        
805
806
        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
807
        missing_mask = numpy.zeros(raw_data_copy.shape, dtype=numpy.bool)
808
809
        if missing_val is not None:
            missing_mask[raw_data_copy == missing_val] = True
810
        
811
        # create the scaled version of the data
812
        scaled_data_copy = numpy.array(raw_data_copy, dtype=data_type)
813
        scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues?
814
815
816
817
        
        return scaled_data_copy
    
    def get_variable_object(self,name):
818
819
820
        return h5.trav(self._h5, name)
    
    def missing_value(self, name):
821
822
823
824
825
826
827
828
        
        toReturn = None
        
        # get the missing value if it has been set
        variableObject = self.get_variable_object(name)
        pListObj = variableObject.id.get_create_plist()
        fillValueStatus = pListObj.fill_value_defined()
        if (h5d.FILL_VALUE_DEFAULT is fillValueStatus) or (h5d.FILL_VALUE_USER_DEFINED is fillValueStatus) :
829
            temp = numpy.array((1), dtype=variableObject.dtype)
830
831
832
833
            pListObj.get_fill_value(temp)
            toReturn = temp
        
        return toReturn
834
835
836
837
838
839
840
841
842
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
843
        
844
        raise IOUnimplimentedError('Unable to create variable in hdf 5 file, this functionality is not yet available.')
845
846
847
848
849
850
851
852
        
        return None
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
853
854
        
        raise IOUnimplimentedError('Unable to add attribute to hdf 5 file, this functionality is not yet available.')
855
856
        
        return
857
    
858
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
859
860
861
862
        """
        returns all the attributes associated with a variable name
        """
        
863
864
865
866
867
868
869
870
        toReturn = None
        
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
            toReturn = self.get_variable_object(variableName).attrs
        
        return toReturn
871
    
872
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
873
874
875
876
877
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attrs = self.get_variable_attributes(variableName, caseInsensitive=False)
            
            if (attributeName in temp_attrs) :
                toReturn = temp_attrs[attributeName]
        
        return toReturn
    
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
        toReturn = None
894
        
895
        if caseInsensitive :
896
            toReturn = self.attributeCache.get_global_attributes()
897
898
        else :
            toReturn = self._h5.attrs
899
900
        
        return toReturn
(no author)'s avatar
(no author) committed
901
    
902
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
(no author)'s avatar
(no author) committed
903
904
905
906
907
908
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
909
910
911
912
913
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
            if attributeName in self._h5.attrs :
                toReturn = self._h5.attrs[attributeName]
(no author)'s avatar
(no author) committed
914
915
        
        return toReturn
916
917
918
919
920
921
922
923
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
        
        # TODO, are there any bad types for these files?
        return True
(no author)'s avatar
(no author) committed
924

(no author)'s avatar
(no author) committed
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960

class aeri(object):
    """wrapper for AERI RNC/SUM/CXS/etc datasets
    """
    _dmv = None
    _vectors = { }
    _scalars = { }
    
    @staticmethod
    def _meta_mapping(fp):
        ids = fp.metaIDs()
        names = [fp.queryMetaDescString(1, id_, fp.SHORTNAME) for id_ in ids]
        assert len(ids) == len(names)
        return (dict((n, i) for n, i in zip(names, ids)))
    
    def _inventory(self):
        fp = self._dmv
        assert(fp is not None)
        # get list of vectors and scalars
        self._vectors = dict( (fp.queryVectorDescString(n,fp.SHORTNAME), n) for n in fp.vectorIDs() )
        self._scalars = self._meta_mapping(fp)

    def __init__(self, filename, allowWrite=False):
        assert(allowWrite==False)
        if dmvlib is None:
            LOG.error('cannot open AERI files without dmv module being available')
            return
        self._dmv = dmvlib.dmv()
        rc = self._dmv.openFile(filename)
        if rc!=0:
            LOG.error("unable to open file, rc=%d" % rc)
            self._dmv = None        
        else:
            self._inventory()
    
    def __call__(self):
Eva Schiffer's avatar
Eva Schiffer committed
961
        return list(self._vectors) + list(self._scalars)
(no author)'s avatar
(no author) committed
962
963
964
965
966
967
968
969
970
        
    def __getitem__(self, name):
        fp = self._dmv
        assert(fp is not None)
        if 'DMV_RECORDS' in os.environ:
            nrecs = int(os.environ['DMV_RECORDS'])
            LOG.warning('overriding dmv record count to %d' % nrecs)
        else:
            nrecs = self._dmv.recordCount()
971
        recrange = list(range(1, nrecs+1))
(no author)'s avatar
(no author) committed
972
973
974
        if name in self._vectors:
            vid = self._vectors[name]
            vdata = [ fp.vectorDepValues(rec, vid) for rec in recrange ]
975
            return numpy.array(vdata)
(no author)'s avatar
(no author) committed
976
977
        elif name in self._scalars:
            vdata = fp.metaValueMatrix(recrange, [self._scalars[name]])
978
            return numpy.array(vdata)
(no author)'s avatar
(no author) committed
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
        else:
            raise LookupError('cannot find variable %s' % name)
       
    def get_variable_object(self,name):
        return None
    
    def missing_value(self, name):
        return float('nan')
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
995
996
997
998
        """
        
        raise IOUnimplimentedError('Unable to create variable in aeri file, this functionality is not yet available.')
        
(no author)'s avatar
(no author) committed
999
1000
        return None
    
For faster browsing, not all history is shown. View entire blame