io.py 51.5 KB
Newer Older
(no author)'s avatar
(no author) committed
1
2
3
4
5
6
7
8
9
#!/usr/bin/env python
# encoding: utf-8
"""
I/O routines supporting reading a number of file formats.

Created by rayg Apr 2009.
Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
"""

10
import os, logging
(no author)'s avatar
   
(no author) committed
11
import numpy as np
12
from functools import reduce
(no author)'s avatar
   
(no author) committed
13
14

LOG = logging.getLogger(__name__)
(no author)'s avatar
(no author) committed
15

(no author)'s avatar
   
(no author) committed
16
17
18
19
20
21
22
23
24
try:
    import pyhdf
    from pyhdf.SD import SD,SDC, SDS, HDF4Error
except:
    LOG.info('no pyhdf module available for HDF4')
    pyhdf = None
    SD = SDC = SDS = object
    HDF4Error = EnvironmentError
    
25
26
try:
    import h5py
27
    from h5py import h5d
28
except ImportError:
(no author)'s avatar
   
(no author) committed
29
30
    LOG.info('no h5py module available for reading HDF5')
    h5py = None
(no author)'s avatar
(no author) committed
31

32
33
34
35
36
37
38
39
# the newer netCDF library that replaced pycdf
try:
    import netCDF4
except:
    LOG.info("unable to import netcdf4 library")
    netCDF4 = None

""" this is the previous netCDF library, remove this once the new one is fully tested
(no author)'s avatar
   
(no author) committed
40
41
42
43
44
45
46
47
48
try:    
    import pycdf
    from pycdf import CDF, NC, strerror
except:
    LOG.info('no pycdf module available')
    pycdf = None
    CDF = NC = object
    def strerror(*args):
        return 'no pycdf module installed'
49
"""
(no author)'s avatar
(no author) committed
50

(no author)'s avatar
(no author) committed
51
52
53
54
55
56
57
try:
    import dmv as dmvlib
    LOG.info('loaded dmv module for AERI data file access')
except ImportError:
    LOG.info('no AERI dmv data file format module')
    dmvlib = None

(no author)'s avatar
   
(no author) committed
58
59
60
61
62
63
64
try:
    import adl_blob
    LOG.info('adl_blob module found for JPSS ADL data file access')
except ImportError:
    LOG.info('no adl_blob format handler available')
    adl_blob = None

65
66
67
68
69
70
71
try :
    from osgeo import gdal
    LOG.info('loading osgeo module for GeoTIFF data file access')
except :
    LOG.info('no osgeo available for reading GeoTIFF data files')
    gdal = None

72
UNITS_CONSTANT = "units"
(no author)'s avatar
(no author) committed
73

74
75
76
fillValConst1 = '_FillValue'
fillValConst2 = 'missing_value'

77
78
79
80
ADD_OFFSET_STR   = 'add_offset'
SCALE_FACTOR_STR = 'scale_factor'
SCALE_METHOD_STR = 'scaling_method'

81
82
83
84
85
86
87
88
89
UNSIGNED_ATTR_STR = "_unsigned"

SIGNED_TO_UNSIGNED_DTYPES = {
                                np.dtype(np.int8):   np.dtype(np.uint8),
                                np.dtype(np.int16):   np.dtype(np.uint16),
                                np.dtype(np.int32):   np.dtype(np.uint32),
                                np.dtype(np.int64):   np.dtype(np.uint64),
                            }

90
91
92
93
94
95
96
97
98
99
100
class IOUnimplimentedError(Exception):
    """
    The exception raised when a requested io operation is not yet available.
    
        msg  -- explanation of the problem
    """
    def __init__(self, msg):
        self.msg = msg
    def __str__(self):
        return self.msg

101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
class CaseInsensitiveAttributeCache (object) :
    """
    A cache of attributes for a single file and all of it's variables.
    This cache is considered uncased, it will store all attributes it caches
    in lower case and will lower case any strings it is asked to search for
    in the cache.
    When variable or global attribute sets are not yet loaded and something
    from that part of the file is requested the cache will transparently load
    attributes from the file behind the scenes and build the cache for that
    part of the file.
    """
    
    def __init__(self, fileObject) :
        """
        set up the empty cache and hang on to the file object we'll be caching
        """
        
        self.fileToCache             = fileObject
        self.globalAttributesLower   = None
        self.variableAttributesLower = { }
    
    def _load_global_attributes_if_needed (self) :
        """
        load up the global attributes if they need to be cached
        """
        
        # load the attributes from the file if they aren't cached
        if self.globalAttributesLower is None :
            LOG.debug ("Loading file global attributes into case-insensitive cache.")
            tempAttrs                  = self.fileToCache.get_global_attributes(caseInsensitive=False)
            self.globalAttributesLower = dict((k.lower(), v) for k, v in tempAttrs.items())
    
    def _load_variable_attributes_if_needed (self, variableName) :
        """
        load up the variable attributes if they need to be cached
        """
        
        # make a lower cased version of the variable name
        tempVariableName = variableName.lower()
        
        # load the variable's attributes from the file if they aren't cached
        if tempVariableName not in self.variableAttributesLower.keys() :
            LOG.debug ("Loading attributes for variable \"" + variableName + "\" into case-insensitive cache.")
            tempAttrs = self.fileToCache.get_variable_attributes(variableName, caseInsensitive=False)
            # now if there are any attributes, make a case insensitive version
            self.variableAttributesLower[tempVariableName] = dict((k.lower(), v) for k, v in tempAttrs.items())
    
    def get_variable_attribute (self, variableName, attributeName) :
        """
        get the specified attribute for the specified variable,
        if this variable's attributes have not yet been loaded
        they will be loaded and cached
        """
        
        self._load_variable_attributes_if_needed(variableName)
        
        toReturn = None
        tempVariableName  =  variableName.lower()
        tempAttributeName = attributeName.lower()
        if (tempVariableName in self.variableAttributesLower) and (tempAttributeName in self.variableAttributesLower[tempVariableName]) :
            toReturn = self.variableAttributesLower[tempVariableName][tempAttributeName]
        else:
            LOG.debug ("Attribute \"" + attributeName + "\" was not present for variable \"" + variableName + "\".")
        
        return toReturn
    
    def get_variable_attributes (self, variableName) :
        """
        get the variable attributes for the variable name given
        """
        
        self._load_variable_attributes_if_needed(variableName)
        
        toReturn = self.variableAttributesLower[variableName.lower()] if (variableName.lower() in self.variableAttributesLower) else None
        
        return toReturn
    
    def get_global_attribute (self, attributeName) :
        """
        get a global attribute with the given name
        """
        
        self._load_global_attributes_if_needed()
        
        toReturn = self.globalAttributesLower[attributeName.lower()] if (attributeName.lower() in self.globalAttributesLower) else None
        
        return toReturn
    
    def get_global_attributes (self) :
        """
        get the global attributes,
        """
        
        self._load_global_attributes_if_needed()
        
        toReturn = self.globalAttributesLower
        
        return toReturn
199
200
201
202
203
204
205
206
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
        
        # TODO, are there any bad types for these files?
        return True
207
208

class hdf (object):
(no author)'s avatar
(no author) committed
209
210
211
212
213
    """wrapper for HDF4 dataset for comparison
    __call__ yields sequence of variable names
    __getitem__ returns individual variables ready for slicing to numpy arrays
    """
    
214
215
    _hdf = None
    
216
    def __init__(self, filename, allowWrite=False):
217
        
(no author)'s avatar
   
(no author) committed
218
219
220
        if pyhdf is None:
            LOG.error('pyhdf is not installed and is needed in order to read hdf4 files')
            assert(pyhdf is not None)
221
222
223
        mode = SDC.READ
        if allowWrite:
            mode = mode | SDC.WRITE
224
225
226
        
        self._hdf = SD(filename, mode)
        self.attributeCache = CaseInsensitiveAttributeCache(self)
(no author)'s avatar
(no author) committed
227
228
229

    def __call__(self):
        "yield names of variables to be compared"
230
        return list(self._hdf.datasets().keys())
(no author)'s avatar
(no author) committed
231
    
232
233
234
235
    # this returns a numpy array with a copy of the full, scaled
    # data for this variable, if the data type must be changed to allow
    # for scaling it will be (so the return type may not reflect the
    # type found in the original file)
(no author)'s avatar
(no author) committed
236
    def __getitem__(self, name):
237
238
239
        # defaults
        scale_factor = 1.0
        add_offset = 0.0
240
        data_type = None 
(no author)'s avatar
(no author) committed
241
        scaling_method = None
242
243
244
245
246
247
248
249
250
        
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
        raw_data_copy = variable_object[:]
        try :
            # TODO, this currently won't work with geocat data, work around it for now
            scale_factor, scale_factor_error, add_offset, add_offset_error, data_type = SDS.getcal(variable_object)
        except HDF4Error:
251
252
253
254
            # load just the scale factor and add offset information by hand
            temp = self.attributeCache.get_variable_attributes(name)
            if ADD_OFFSET_STR in temp.keys() :
                add_offset = temp[ADD_OFFSET_STR]
255
                data_type = np.dtype(type(add_offset))
256
257
            if SCALE_FACTOR_STR in temp.keys() :
                scale_factor = temp[SCALE_FACTOR_STR]
258
                data_type = np.dtype(type(scale_factor))
259
260
            if SCALE_METHOD_STR in temp.keys() :
                scaling_method = temp[SCALE_METHOD_STR]
261
        SDS.endaccess(variable_object)
(no author)'s avatar
(no author) committed
262
        
263
264
265
266
        # don't do lots of work if we don't need to scale things
        if (scale_factor == 1.0) and (add_offset == 0.0) :
            return raw_data_copy
        
267
268
269
270
271
272
273
274
275
276
        # at the moment geocat has several scaling methods that don't match the normal standards for hdf
        """
        please see constant.f90 for a more up to date version of this information:
            INTEGER(kind=int1) :: NO_SCALE              ! 0
            INTEGER(kind=int1) :: LINEAR_SCALE          ! 1
            INTEGER(kind=int1) :: LOG_SCALE             ! 2
            INTEGER(kind=int1) :: SQRT_SCALE            ! 3 
        """
        if (scaling_method == 0) :
            return raw_data_copy
277
        if not ((scaling_method is None) or (int(scaling_method) <= 1)) :
278
279
            LOG.warn ('Scaling method of \"' + str(scaling_method) + '\" will be ignored in favor of hdf standard method. '
                      + 'This may cause problems with data consistency')
280
        
281
282
283
        # if we don't have a data type something strange has gone wrong
        assert(not (data_type is None))
        
284
285
        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
286
        missing_mask = np.zeros(raw_data_copy.shape, dtype=np.bool)
287
288
        if missing_val is not None :
            missing_mask[raw_data_copy == missing_val] = True
289
        
290
        # create the scaled version of the data
291
        scaled_data_copy                = np.array(raw_data_copy, dtype=data_type)
292
        scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues?
293
294
295
296
        
        return scaled_data_copy 
    
    def get_variable_object(self, name):
297
        return self._hdf.select(name)
298
    
(no author)'s avatar
(no author) committed
299
    def missing_value(self, name):
300
        
301
        return self.get_attribute(name, fillValConst1)
302
303
304
305
306
307
308
309
310
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
(no author)'s avatar
(no author) committed
311
        
312
        raise IOUnimplimentedError('Unable to create variable in hdf file, this functionality is not yet available.')
313
314
315
316
317
318
319
320
        
        return None
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
321
322
        
        raise IOUnimplimentedError('Unable add attribute to hdf file, this functionality is not yet available.')
323
324
        
        return
325
    
326
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
327
328
329
330
        """
        returns all the attributes associated with a variable name
        """
        
331
332
333
334
335
336
337
        toReturn = None
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
            toReturn = self.get_variable_object(variableName).attributes()
        
        return toReturn
338
    
339
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
340
341
342
343
344
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
345
346
347
348
349
350
351
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attributes = self.get_variable_attributes(variableName, caseInsensitive=False)
            
            if attributeName in temp_attributes :
                toReturn = temp_attributes[attributeName]
352
353
        
        return toReturn
(no author)'s avatar
(no author) committed
354
    
355
356
357
358
359
360
361
362
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
        toReturn = None
        
        if caseInsensitive :
363
            toReturn = self.attributeCache.get_global_attributes()
364
365
366
367
368
369
        else :
            toReturn = self._hdf.attributes()
        
        return toReturn
    
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
(no author)'s avatar
(no author) committed
370
371
372
373
374
375
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
376
377
378
379
380
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
            if attributeName in self._hdf.attributes() :
                toReturn = self._hdf.attributes()[attributeName]
(no author)'s avatar
(no author) committed
381
382
        
        return toReturn
383
384
385
386
387
388
389
390
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
        
        # TODO, are there any bad types for these files?
        return True
(no author)'s avatar
(no author) committed
391

392
class nc (object):
393
    """wrapper for netcdf4-python data access for comparison
(no author)'s avatar
(no author) committed
394
395
396
397
    __call__ yields sequence of variable names
    __getitem__ returns individual variables ready for slicing to numpy arrays
    """
    
398
    _nc = None
399
400
401
402
    _var_map = None

    # walk down through all groups and get variable names and objects
    def _walkgroups(self, start_at, prefix=None, ):
403
404
        # look through the variables that are here
        for var_name in start_at.variables.keys():
405
406
            temp_name = var_name if prefix is None or len(prefix) <= 0 else prefix + "/" + var_name
            yield temp_name, start_at[var_name]
407
408
        # look through the groups that are here
        for group_name in start_at.groups.keys():
409
410
411
            grp_str = group_name if prefix is None or len(prefix) <= 0 else prefix + "/" + group_name
            for more_var_name, more_var_obj in self._walkgroups(start_at.groups[group_name], prefix=grp_str):
                yield more_var_name, more_var_obj
412
    
413
414
    def __init__(self, filename, allowWrite=False):
        
415
416
417
        if netCDF4 is None:
            LOG.error('netCDF4 is not installed and is needed in order to read NetCDF files')
            assert(netCDF4 is not None)
(no author)'s avatar
   
(no author) committed
418
        
419
        mode = 'r'
420
        if allowWrite :
421
            mode = 'w'
422
        
423
        self._nc = netCDF4.Dataset(filename, mode)
424
        self.attributeCache = CaseInsensitiveAttributeCache(self)
425
426
427
        self._var_map = { }
        for var_name, var_obj in self._walkgroups(self._nc,) :
            self._var_map[var_name] = var_obj
428

(no author)'s avatar
(no author) committed
429
    def __call__(self):
430
431
432
433
        """
        yield names of variables in this file
        """

434
        return list(self._var_map.keys())
435

(no author)'s avatar
(no author) committed
436
    def __getitem__(self, name):
437
438
439
440
441
442
443
        """
        this returns a numpy array with a copy of the full, scaled
        data for this variable, if the data type must be changed to allow
        for scaling it will be (so the return type may not reflect the
        type found in the original file)
        """

444
        # defaults
445
        data_type = np.float32 # TODO temporary this avoids type truncation issues, but is not a general solution
446
447
448
449
        
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
450

451
452
453
454
455
        # get our data, save the dtype, and make sure it's a more flexible dtype for now
        variable_object.set_auto_maskandscale(False)  # for now just do the darn calculations ourselves
        scaled_data_copy = np.array(variable_object[:], dtype=data_type)

        # get the attribute cache so we can check on loading related attributes
456
        temp = self.attributeCache.get_variable_attributes(name)
457
458
459
460

        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
        missing_mask = np.zeros(scaled_data_copy.shape, dtype=np.bool)
461
462
        if missing_val is not None:
            missing_mask[scaled_data_copy == missing_val] = True
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479

        #***** just do the darn unsigned handling ourselves, ugh

        # if our data is labeled as being unsigned by the appropriately set attribute
        if UNSIGNED_ATTR_STR in temp.keys() and str(temp[UNSIGNED_ATTR_STR]).lower() == ("true"):
            LOG.debug("Correcting for unsigned values in variable data.")
            where_temp = (scaled_data_copy < 0.0) & ~missing_mask # where we have negative but not missing data
            scaled_data_copy[where_temp] += (np.iinfo(np.uint16).max + 1.0) # add the 2's complement

        #***** end of handling the unsigned attribute

        ###### the start of the scaling code
        # Note, I had to turn this back on because the netcdf4 library is behaving erratically when unsigned is set

        # get the scale factor and add offset from the attributes
        scale_factor = 1.0
        add_offset = 0.0
480
481
482
483
        if SCALE_FACTOR_STR in temp.keys() :
            scale_factor = temp[SCALE_FACTOR_STR]
        if ADD_OFFSET_STR in temp.keys() :
            add_offset = temp[ADD_OFFSET_STR]
484

485
486
        # don't do work if we don't need to unpack things
        if (scale_factor != 1.0) or (add_offset != 0.0) :
487

488
            LOG.debug("Manually applying scale (" + str(scale_factor) + ") and add offset (" + str(add_offset) + ").")
489

490
491
492
493
494
495
496
            # unpack the data
            scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset

        ###### end of the scaling code

        """
        #TODO, this section was for when we had to do the unsigned correction after unpacking
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
        if UNSIGNED_ATTR_STR in temp.keys() and str(temp[UNSIGNED_ATTR_STR]).lower() == ( "true" ) :

            LOG.debug("fixing unsigned values in variable " + name)

            # load the scale factor and add offset
            scale_factor = 1.0
            add_offset = 0.0
            temp = self.attributeCache.get_variable_attributes(name)
            if SCALE_FACTOR_STR in temp.keys() :
                scale_factor = temp[SCALE_FACTOR_STR]
            if ADD_OFFSET_STR in temp.keys() :
                add_offset = temp[ADD_OFFSET_STR]

            # get the missing value and figure out the dtype of the original data
            missing_val  = self.missing_value(name)
            orig_dtype   = np.array([missing_val,]).dtype
            needed_dtype = SIGNED_TO_UNSIGNED_DTYPES[orig_dtype] if orig_dtype in SIGNED_TO_UNSIGNED_DTYPES.keys() else None

            if needed_dtype is not None :
                # now figure out where all the corrupted values are, and shift them up to be positive
517
                needs_fix_mask = (scaled_data_copy < add_offset) & (scaled_data_copy != missing_val)
518
                # we are adding the 2's complement, but first we're scaling it appropriately
519
                scaled_data_copy[needs_fix_mask] += ((np.iinfo(np.uint16).max + 1.0) * scale_factor)
520
        """
521

522
        return scaled_data_copy
523
    
524
525
526
527
    # TODO, this hasn't been supported in other file types
    def close (self) :
        self._nc.close()
        self._nc = None
528
        self._var_map = None
529

530
    def get_variable_object(self, name):
531

532
        return self._var_map[name]
533
    
(no author)'s avatar
(no author) committed
534
    def missing_value(self, name):
535
        
536
537
538
539
540
541
542
543
544
545
546
        toReturn = None
        
        temp = self.attributeCache.get_variable_attribute(name, fillValConst1)
        if temp is not None :
            toReturn = temp
        else :
            temp = self.attributeCache.get_variable_attribute(name, fillValConst2)
            if temp is not None :
                toReturn = temp
        
        return toReturn
547

548
549
550
551
552
553
554
555
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
556
557

        # TODO, this will not work with groups
558
        self._nc.nc_redef()
559
560
        
        # if the variable already exists, stop with a warning
561
        if variablename in self._nc.variables.keys() :
562
563
564
            LOG.warn("New variable name requested (" + variablename + ") is already present in file. " +
                     "Skipping generation of new variable.")
            return None
565
566
567
568
569
        # if we have no data we won't be able to determine the data type to create the variable
        if (data is None) or (len(data) <= 0) :
            LOG.warn("Data type for new variable (" + variablename + ") could not be determined. " +
                     "Skipping generation of new variable.")
            return None
570
571
572
        
        dataType = None
        if np.issubdtype(data.dtype, int) :
573
            dataType = np.int
574
575
576
            #print("Picked INT")
        # TODO, at the moment the fill type is forcing me to use a double, when sometimes I want a float
        #elif np.issubdtype(data.dtype, np.float32) :
577
        #    dataType = np.float
578
579
        #    print("Picked FLOAT")
        elif np.issubdtype(data.dtype, float) :
580
            dataType = np.float64
581
582
583
584
585
586
587
            #print("Picked DOUBLE")
        # what do we do if it's some other type?
        
        # create and set all the dimensions
        dimensions = [ ]
        dimensionNum = 0
        for dimSize in data.shape :
588
            dimensions.append(self._nc.createDimension(variablename + '-index' + str(dimensionNum), dimSize))
589
590
591
            dimensionNum = dimensionNum + 1
        
        # create the new variable
592
593
594
        #print('variable name: ' + variablename)
        #print('data type:     ' + str(dataType))
        #print('dimensions:    ' + str(dimensions))
595
        newVariable = self._nc.createVariable(variablename, dataType, tuple(dimensions))
596
597
598
599
600
601
602
        
        # if a missing value was given, use that
        if missingvalue is not None :
            newVariable._FillValue = missingvalue
        
        # if we have a variable to copy attributes from, do so
        if variabletocopyattributesfrom is not None :
603
604
            attributes = self.get_variable_attributes(variabletocopyattributesfrom, caseInsensitive=False)

605
            for attribute in attributes.keys() :
606
607
608
609
                setattr(newVariable, attribute, attributes[attribute])

        self._nc.nc_enddef()

610
611
        # if data was given, use that
        if data is not None :
612
613
            newVariable[:](data.tolist())

614
        return newVariable
615

616
617
618
619
620
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
621
622
        # TODO, this will not work with groups

623
624
        variableObject = self.get_variable_object(variableName)
        
625
626
627
628
629
630
        self._nc.nc_redef()

        setattr(variableObject, newAttributeName, newAttributeValue)

        self._nc.nc_enddef()

631
632
633
        # TODO, this will cause our attribute cache to be wrong!
        # TODO, for now, brute force clear the cache
        self.attributeCache = CaseInsensitiveAttributeCache(self)
634
635
        
        return
636
    
637
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
638
639
640
641
        """
        returns all the attributes associated with a variable name
        """
        
642
643
644
645
646
        toReturn = None
        
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
647
648
649
650
651
            toReturn = { }
            tempVarObj   = self.get_variable_object(variableName)
            tempAttrKeys = tempVarObj.ncattrs()
            for attrKey in tempAttrKeys :
                toReturn[attrKey] = getattr(tempVarObj, attrKey)
652
653
        
        return toReturn
654
    
655
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
656
657
658
659
660
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
661
662
663
664
665
666
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attributes = self.get_variable_attributes(variableName, caseInsensitive=False)
            
            if attributeName in temp_attributes :
667
                toReturn = getattr(self.get_variable_object, attributeName)
668
669
670
671
672
673
674
675
676
        
        return toReturn
    
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
        toReturn = None
677
        
678
        if caseInsensitive :
679
            toReturn = self.attributeCache.get_global_attributes()
680
        else :
681
682
683
684
            toReturn = { }
            tempAttrKeys = self._nc.ncattrs()
            for attrKey in tempAttrKeys :
                toReturn[attrKey] = getattr(self._nc, attrKey)
685

686
        return toReturn
687
    
688
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
689
690
691
692
693
694
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
695
696
697
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
698
            if attributeName in self._nc.ncattrs() :
699
                toReturn = getattr(self._nc, attributeName)
700
701
        
        return toReturn
702
703
704
705
706
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
707
708

        return True
709

(no author)'s avatar
(no author) committed
710
711
712
nc4 = nc
cdf = nc

713
714
# TODO remove
#FIXME_IDPS = [ '/All_Data/CrIS-SDR_All/ES' + ri + band for ri in ['Real','Imaginary'] for band in ['LW','MW','SW'] ] 
715

(no author)'s avatar
(no author) committed
716
class h5(object):
717
718
719
720
    """wrapper for HDF5 datasets
    """
    _h5 = None
    
721
    def __init__(self, filename, allowWrite=False):
722
723
        self.attributeCache = CaseInsensitiveAttributeCache(self)
        
724
725
726
        mode = 'r'
        if allowWrite :
            mode = 'r+'
(no author)'s avatar
   
(no author) committed
727
728
729
        if h5py is None:
            LOG.error('h5py module is not installed and is needed in order to read h5 files')
            assert(h5py is not None)
730
        self._h5 = h5py.File(filename, mode)
731
732
    
    def __call__(self):
733
734
735
736
        
        variableList = [ ]
        def testFn (name, obj) :
            #print ('checking name: ' + name)
737
            #print ('object: ' + str(obj))
738
739
740
741
            
            if isinstance(obj, h5py.Dataset) :
                try :
                    tempType = obj.dtype # this is required to provoke a type error for closed data sets
742
                    
743
                    #LOG.debug ('type: ' + str(tempType))
744
745
746
747
748
749
750
751
752
753
                    variableList.append(name)
                except TypeError :
                    LOG.debug('TypeError prevents the use of variable ' + name
                              + '. This variable will be ignored')
        
        self._h5.visititems(testFn)
        
        LOG.debug('variables from visiting h5 file structure: ' + str(variableList))
        
        return(variableList)
754
755
756
757
758
    
    @staticmethod
    def trav(h5,pth): 
        return reduce( lambda x,a: x[a] if a else x, pth.split('/'), h5)
        
759
760
761
762
763
    # this returns a numpy array with a copy of the full, scaled
    # data for this variable, if the data type must be changed to allow
    # for scaling it will be (so the return type may not reflect the
    # type found in the original file)
    def __getitem__(self, name):
764
        
765
766
767
768
769
770
771
772
773
        # defaults
        scale_factor = 1.0
        add_offset = 0.0
        data_type = np.float32 # TODO temporary
        
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
        raw_data_copy = variable_object[:]
774
775
776
777
778
        
        #print ('*************************')
        #print (dir (variable_object.id)) # TODO, is there a way to get the scale and offset through this?
        #print ('*************************')
        
779
        # load the scale factor and add offset
780
781
782
783
784
        temp = self.attributeCache.get_variable_attributes(name)
        if (SCALE_FACTOR_STR in temp.keys()) :
            scale_factor = temp[SCALE_FACTOR_STR]
        if (ADD_OFFSET_STR in temp.keys()) :
            add_offset = temp[ADD_OFFSET_STR]
785
786
787
788
789
790
        # todo, does cdf have an equivalent of endaccess to close the variable?
        
        # don't do lots of work if we don't need to scale things
        if (scale_factor == 1.0) and (add_offset == 0.0) :
            return raw_data_copy
        
791
792
        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
793
        missing_mask = np.zeros(raw_data_copy.shape, dtype=np.bool)
794
795
        if missing_val is not None:
            missing_mask[raw_data_copy == missing_val] = True
796
        
797
798
        # create the scaled version of the data
        scaled_data_copy = np.array(raw_data_copy, dtype=data_type)
799
        scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues?
800
801
802
803
        
        return scaled_data_copy
    
    def get_variable_object(self,name):
804
805
806
        return h5.trav(self._h5, name)
    
    def missing_value(self, name):
807
808
809
810
811
812
813
814
815
816
817
818
819
        
        toReturn = None
        
        # get the missing value if it has been set
        variableObject = self.get_variable_object(name)
        pListObj = variableObject.id.get_create_plist()
        fillValueStatus = pListObj.fill_value_defined()
        if (h5d.FILL_VALUE_DEFAULT is fillValueStatus) or (h5d.FILL_VALUE_USER_DEFINED is fillValueStatus) :
            temp = np.array((1), dtype=variableObject.dtype)
            pListObj.get_fill_value(temp)
            toReturn = temp
        
        return toReturn
820
821
822
823
824
825
826
827
828
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
829
        
830
        raise IOUnimplimentedError('Unable to create variable in hdf 5 file, this functionality is not yet available.')
831
832
833
834
835
836
837
838
        
        return None
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
839
840
        
        raise IOUnimplimentedError('Unable to add attribute to hdf 5 file, this functionality is not yet available.')
841
842
        
        return
843
    
844
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
845
846
847
848
        """
        returns all the attributes associated with a variable name
        """
        
849
850
851
852
853
854
855
856
        toReturn = None
        
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
            toReturn = self.get_variable_object(variableName).attrs
        
        return toReturn
857
    
858
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
859
860
861
862
863
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attrs = self.get_variable_attributes(variableName, caseInsensitive=False)
            
            if (attributeName in temp_attrs) :
                toReturn = temp_attrs[attributeName]
        
        return toReturn
    
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
        toReturn = None
880
        
881
        if caseInsensitive :
882
            toReturn = self.attributeCache.get_global_attributes()
883
884
        else :
            toReturn = self._h5.attrs
885
886
        
        return toReturn
(no author)'s avatar
(no author) committed
887
    
888
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
(no author)'s avatar
(no author) committed
889
890
891
892
893
894
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
895
896
897
898
899
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
            if attributeName in self._h5.attrs :
                toReturn = self._h5.attrs[attributeName]
(no author)'s avatar
(no author) committed
900
901
        
        return toReturn
902
903
904
905
906
907
908
909
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
        
        # TODO, are there any bad types for these files?
        return True
(no author)'s avatar
(no author) committed
910

(no author)'s avatar
(no author) committed
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956

class aeri(object):
    """wrapper for AERI RNC/SUM/CXS/etc datasets
    """
    _dmv = None
    _vectors = { }
    _scalars = { }
    
    @staticmethod
    def _meta_mapping(fp):
        ids = fp.metaIDs()
        names = [fp.queryMetaDescString(1, id_, fp.SHORTNAME) for id_ in ids]
        assert len(ids) == len(names)
        return (dict((n, i) for n, i in zip(names, ids)))
    
    def _inventory(self):
        fp = self._dmv
        assert(fp is not None)
        # get list of vectors and scalars
        self._vectors = dict( (fp.queryVectorDescString(n,fp.SHORTNAME), n) for n in fp.vectorIDs() )
        self._scalars = self._meta_mapping(fp)

    def __init__(self, filename, allowWrite=False):
        assert(allowWrite==False)
        if dmvlib is None:
            LOG.error('cannot open AERI files without dmv module being available')
            return
        self._dmv = dmvlib.dmv()
        rc = self._dmv.openFile(filename)
        if rc!=0:
            LOG.error("unable to open file, rc=%d" % rc)
            self._dmv = None        
        else:
            self._inventory()
    
    def __call__(self):
        return list(self._vectors.keys()) + list(self._scalars.keys())
        
    def __getitem__(self, name):
        fp = self._dmv
        assert(fp is not None)
        if 'DMV_RECORDS' in os.environ:
            nrecs = int(os.environ['DMV_RECORDS'])
            LOG.warning('overriding dmv record count to %d' % nrecs)
        else:
            nrecs = self._dmv.recordCount()
957
        recrange = list(range(1, nrecs+1))
(no author)'s avatar
(no author) committed
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
        if name in self._vectors:
            vid = self._vectors[name]
            vdata = [ fp.vectorDepValues(rec, vid) for rec in recrange ]
            return np.array(vdata)
        elif name in self._scalars:
            vdata = fp.metaValueMatrix(recrange, [self._scalars[name]])
            return np.array(vdata)
        else:
            raise LookupError('cannot find variable %s' % name)
       
    def get_variable_object(self,name):
        return None
    
    def missing_value(self, name):
        return float('nan')
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
981
982
983
984
        """
        
        raise IOUnimplimentedError('Unable to create variable in aeri file, this functionality is not yet available.')
        
(no author)'s avatar
(no author) committed
985
986
987
988
989
990
991
992
        return None
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
        
993
994
        raise IOUnimplimentedError('Unable to add attribute to aeri file, this functionality is not yet available.')
        
(no author)'s avatar
(no author) committed
995
        return
996
    
997
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
998
999
1000
        """
        returns all the attributes associated with a variable name
        """
For faster browsing, not all history is shown. View entire blame