io.py 53.2 KB
Newer Older
(no author)'s avatar
(no author) committed
1
2
3
4
5
6
7
8
9
#!/usr/bin/env python
# encoding: utf-8
"""
I/O routines supporting reading a number of file formats.

Created by rayg Apr 2009.
Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
"""

10
import os, logging
11
import numpy
12
from functools import reduce
(no author)'s avatar
   
(no author) committed
13
14

LOG = logging.getLogger(__name__)
(no author)'s avatar
(no author) committed
15

16
17
Loadable_Types = set()

(no author)'s avatar
   
(no author) committed
18
19
20
try:
    import pyhdf
    from pyhdf.SD import SD,SDC, SDS, HDF4Error
21
    Loadable_Types.add("hdf")
(no author)'s avatar
   
(no author) committed
22
23
24
25
26
27
except:
    LOG.info('no pyhdf module available for HDF4')
    pyhdf = None
    SD = SDC = SDS = object
    HDF4Error = EnvironmentError
    
28
29
try:
    import h5py
30
    from h5py import h5d
31
    Loadable_Types.add("h5")
32
except ImportError:
(no author)'s avatar
   
(no author) committed
33
34
    LOG.info('no h5py module available for reading HDF5')
    h5py = None
(no author)'s avatar
(no author) committed
35

36
37
38
# the newer netCDF library that replaced pycdf
try:
    import netCDF4
39
    Loadable_Types.update(["nc", "nc4", "cdf", ])
40
41
42
43
except:
    LOG.info("unable to import netcdf4 library")
    netCDF4 = None

(no author)'s avatar
(no author) committed
44
45
46
try:
    import dmv as dmvlib
    LOG.info('loaded dmv module for AERI data file access')
47
    Loadable_Types.update(["cxs", "rnc", "cxv", "csv", "spc", "sum", "uvs", "aeri", ])
(no author)'s avatar
(no author) committed
48
49
50
51
except ImportError:
    LOG.info('no AERI dmv data file format module')
    dmvlib = None

(no author)'s avatar
   
(no author) committed
52
53
54
try:
    import adl_blob
    LOG.info('adl_blob module found for JPSS ADL data file access')
55
    # TODO, what is the loadable file extension?
(no author)'s avatar
   
(no author) committed
56
57
58
59
except ImportError:
    LOG.info('no adl_blob format handler available')
    adl_blob = None

60
61
62
try :
    from osgeo import gdal
    LOG.info('loading osgeo module for GeoTIFF data file access')
63
    Loadable_Types.update(["tiff", "tif", "tifa", ])
64
65
66
67
except :
    LOG.info('no osgeo available for reading GeoTIFF data files')
    gdal = None

68
UNITS_CONSTANT = "units"
(no author)'s avatar
(no author) committed
69

70
71
72
fillValConst1 = '_FillValue'
fillValConst2 = 'missing_value'

73
74
75
76
ADD_OFFSET_STR   = 'add_offset'
SCALE_FACTOR_STR = 'scale_factor'
SCALE_METHOD_STR = 'scaling_method'

77
78
79
UNSIGNED_ATTR_STR = "_unsigned"

SIGNED_TO_UNSIGNED_DTYPES = {
80
81
82
83
                                numpy.dtype(numpy.int8):    numpy.dtype(numpy.uint8),
                                numpy.dtype(numpy.int16):   numpy.dtype(numpy.uint16),
                                numpy.dtype(numpy.int32):   numpy.dtype(numpy.uint32),
                                numpy.dtype(numpy.int64):   numpy.dtype(numpy.uint64),
84
85
                            }

86
87
88
89
90
91
92
93
94
95
96
class IOUnimplimentedError(Exception):
    """
    The exception raised when a requested io operation is not yet available.
    
        msg  -- explanation of the problem
    """
    def __init__(self, msg):
        self.msg = msg
    def __str__(self):
        return self.msg

97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
class CaseInsensitiveAttributeCache (object) :
    """
    A cache of attributes for a single file and all of it's variables.
    This cache is considered uncased, it will store all attributes it caches
    in lower case and will lower case any strings it is asked to search for
    in the cache.
    When variable or global attribute sets are not yet loaded and something
    from that part of the file is requested the cache will transparently load
    attributes from the file behind the scenes and build the cache for that
    part of the file.
    """
    
    def __init__(self, fileObject) :
        """
        set up the empty cache and hang on to the file object we'll be caching
        """
        
        self.fileToCache             = fileObject
        self.globalAttributesLower   = None
        self.variableAttributesLower = { }
    
    def _load_global_attributes_if_needed (self) :
        """
        load up the global attributes if they need to be cached
        """
        
        # load the attributes from the file if they aren't cached
        if self.globalAttributesLower is None :
            LOG.debug ("Loading file global attributes into case-insensitive cache.")
            tempAttrs                  = self.fileToCache.get_global_attributes(caseInsensitive=False)
            self.globalAttributesLower = dict((k.lower(), v) for k, v in tempAttrs.items())
    
    def _load_variable_attributes_if_needed (self, variableName) :
        """
        load up the variable attributes if they need to be cached
        """
        
        # make a lower cased version of the variable name
        tempVariableName = variableName.lower()
        
        # load the variable's attributes from the file if they aren't cached
Eva Schiffer's avatar
Eva Schiffer committed
138
        if tempVariableName not in self.variableAttributesLower :
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
            LOG.debug ("Loading attributes for variable \"" + variableName + "\" into case-insensitive cache.")
            tempAttrs = self.fileToCache.get_variable_attributes(variableName, caseInsensitive=False)
            # now if there are any attributes, make a case insensitive version
            self.variableAttributesLower[tempVariableName] = dict((k.lower(), v) for k, v in tempAttrs.items())
    
    def get_variable_attribute (self, variableName, attributeName) :
        """
        get the specified attribute for the specified variable,
        if this variable's attributes have not yet been loaded
        they will be loaded and cached
        """
        
        self._load_variable_attributes_if_needed(variableName)
        
        toReturn = None
        tempVariableName  =  variableName.lower()
        tempAttributeName = attributeName.lower()
        if (tempVariableName in self.variableAttributesLower) and (tempAttributeName in self.variableAttributesLower[tempVariableName]) :
            toReturn = self.variableAttributesLower[tempVariableName][tempAttributeName]
        else:
            LOG.debug ("Attribute \"" + attributeName + "\" was not present for variable \"" + variableName + "\".")
        
        return toReturn
    
    def get_variable_attributes (self, variableName) :
        """
        get the variable attributes for the variable name given
        """
        
        self._load_variable_attributes_if_needed(variableName)
        
        toReturn = self.variableAttributesLower[variableName.lower()] if (variableName.lower() in self.variableAttributesLower) else None
        
        return toReturn
    
    def get_global_attribute (self, attributeName) :
        """
        get a global attribute with the given name
        """
        
        self._load_global_attributes_if_needed()
        
        toReturn = self.globalAttributesLower[attributeName.lower()] if (attributeName.lower() in self.globalAttributesLower) else None
        
        return toReturn
    
    def get_global_attributes (self) :
        """
        get the global attributes,
        """
        
        self._load_global_attributes_if_needed()
        
        toReturn = self.globalAttributesLower
        
        return toReturn
195
196
197
198
199
200
201
202
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
        
        # TODO, are there any bad types for these files?
        return True
203

204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
def _get_data_uptype (input_dtype) :
    """
    Given an input data type, figure out what type we need to upcast it to.

    Note: Glance expects all it's data to get upcast into floats for the purposes of it's
    later math manipulations.
    """

    default_uptype = numpy.float32
    default_finfo  = numpy.finfo(default_uptype)
    input_info     = numpy.finfo(input_dtype) if  numpy.issubdtype(input_dtype, numpy.floating,) else numpy.iinfo(input_dtype)

    # if our input won't fit into the default, pick a bigger type
    if ( (default_finfo.min > input_info.min) or (default_finfo.max < input_info.max) ) :
        LOG.debug("Input data will not fit in default float32 data type, using larger type.")
        default_uptype = numpy.float64

    # FUTURE, if we reach a point where a float64 isn't big enough, this will need to be revisited

    return default_uptype

225
class hdf (object):
(no author)'s avatar
(no author) committed
226
227
228
229
230
    """wrapper for HDF4 dataset for comparison
    __call__ yields sequence of variable names
    __getitem__ returns individual variables ready for slicing to numpy arrays
    """
    
231
232
    _hdf = None
    
233
    def __init__(self, filename, allowWrite=False):
234
        
(no author)'s avatar
   
(no author) committed
235
236
237
        if pyhdf is None:
            LOG.error('pyhdf is not installed and is needed in order to read hdf4 files')
            assert(pyhdf is not None)
238
239
240
        mode = SDC.READ
        if allowWrite:
            mode = mode | SDC.WRITE
241
242
243
        
        self._hdf = SD(filename, mode)
        self.attributeCache = CaseInsensitiveAttributeCache(self)
(no author)'s avatar
(no author) committed
244
245
246

    def __call__(self):
        "yield names of variables to be compared"
Eva Schiffer's avatar
Eva Schiffer committed
247
        return list(self._hdf.datasets())
(no author)'s avatar
(no author) committed
248
    
249
250
251
252
    # this returns a numpy array with a copy of the full, scaled
    # data for this variable, if the data type must be changed to allow
    # for scaling it will be (so the return type may not reflect the
    # type found in the original file)
(no author)'s avatar
(no author) committed
253
    def __getitem__(self, name):
254
255
256
        # defaults
        scale_factor = 1.0
        add_offset = 0.0
257
        data_type = None 
(no author)'s avatar
(no author) committed
258
        scaling_method = None
259
260
261
262
263
264
265
266
267
        
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
        raw_data_copy = variable_object[:]
        try :
            # TODO, this currently won't work with geocat data, work around it for now
            scale_factor, scale_factor_error, add_offset, add_offset_error, data_type = SDS.getcal(variable_object)
        except HDF4Error:
268
269
            # load just the scale factor and add offset information by hand
            temp = self.attributeCache.get_variable_attributes(name)
Eva Schiffer's avatar
Eva Schiffer committed
270
            if ADD_OFFSET_STR in temp :
271
                add_offset = temp[ADD_OFFSET_STR]
272
                data_type = numpy.dtype(type(add_offset))
Eva Schiffer's avatar
Eva Schiffer committed
273
            if SCALE_FACTOR_STR in temp :
274
                scale_factor = temp[SCALE_FACTOR_STR]
275
                data_type = numpy.dtype(type(scale_factor))
Eva Schiffer's avatar
Eva Schiffer committed
276
            if SCALE_METHOD_STR in temp :
277
                scaling_method = temp[SCALE_METHOD_STR]
278
        SDS.endaccess(variable_object)
(no author)'s avatar
(no author) committed
279
        
280
281
282
283
        # don't do lots of work if we don't need to scale things
        if (scale_factor == 1.0) and (add_offset == 0.0) :
            return raw_data_copy
        
284
285
286
287
288
289
290
291
292
293
        # at the moment geocat has several scaling methods that don't match the normal standards for hdf
        """
        please see constant.f90 for a more up to date version of this information:
            INTEGER(kind=int1) :: NO_SCALE              ! 0
            INTEGER(kind=int1) :: LINEAR_SCALE          ! 1
            INTEGER(kind=int1) :: LOG_SCALE             ! 2
            INTEGER(kind=int1) :: SQRT_SCALE            ! 3 
        """
        if (scaling_method == 0) :
            return raw_data_copy
294
        if not ((scaling_method is None) or (int(scaling_method) <= 1)) :
295
296
            LOG.warn ('Scaling method of \"' + str(scaling_method) + '\" will be ignored in favor of hdf standard method. '
                      + 'This may cause problems with data consistency')
297
        
298
299
300
        # if we don't have a data type something strange has gone wrong
        assert(not (data_type is None))
        
301
302
        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
303
        missing_mask = numpy.zeros(raw_data_copy.shape, dtype=numpy.bool)
304
305
        if missing_val is not None :
            missing_mask[raw_data_copy == missing_val] = True
306
        
307
        # create the scaled version of the data
308
        scaled_data_copy                = numpy.array(raw_data_copy, dtype=data_type)
309
        scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues?
310
311
312
313
        
        return scaled_data_copy 
    
    def get_variable_object(self, name):
314
        return self._hdf.select(name)
315
    
(no author)'s avatar
(no author) committed
316
    def missing_value(self, name):
317
        
318
        return self.get_attribute(name, fillValConst1)
319
320
321
322
323
324
325
326
327
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
(no author)'s avatar
(no author) committed
328
        
329
        raise IOUnimplimentedError('Unable to create variable in hdf file, this functionality is not yet available.')
330
331
332
333
334
335
336
337
        
        return None
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
338
339
        
        raise IOUnimplimentedError('Unable add attribute to hdf file, this functionality is not yet available.')
340
341
        
        return
342
    
343
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
344
345
346
347
        """
        returns all the attributes associated with a variable name
        """
        
348
349
350
351
352
353
354
        toReturn = None
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
            toReturn = self.get_variable_object(variableName).attributes()
        
        return toReturn
355
    
356
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
357
358
359
360
361
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
362
363
364
365
366
367
368
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attributes = self.get_variable_attributes(variableName, caseInsensitive=False)
            
            if attributeName in temp_attributes :
                toReturn = temp_attributes[attributeName]
369
370
        
        return toReturn
(no author)'s avatar
(no author) committed
371
    
372
373
374
375
376
377
378
379
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
        toReturn = None
        
        if caseInsensitive :
380
            toReturn = self.attributeCache.get_global_attributes()
381
382
383
384
385
386
        else :
            toReturn = self._hdf.attributes()
        
        return toReturn
    
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
(no author)'s avatar
(no author) committed
387
388
389
390
391
392
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
393
394
395
396
397
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
            if attributeName in self._hdf.attributes() :
                toReturn = self._hdf.attributes()[attributeName]
(no author)'s avatar
(no author) committed
398
399
        
        return toReturn
400
401
402
403
404
405
406
407
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
        
        # TODO, are there any bad types for these files?
        return True
(no author)'s avatar
(no author) committed
408

409
class nc (object):
410
    """wrapper for netcdf4-python data access for comparison
(no author)'s avatar
(no author) committed
411
412
413
414
    __call__ yields sequence of variable names
    __getitem__ returns individual variables ready for slicing to numpy arrays
    """
    
415
    _nc = None
416
417
418
419
    _var_map = None

    # walk down through all groups and get variable names and objects
    def _walkgroups(self, start_at, prefix=None, ):
420
        # look through the variables that are here
Eva Schiffer's avatar
Eva Schiffer committed
421
        for var_name in start_at.variables:
422
423
            temp_name = var_name if prefix is None or len(prefix) <= 0 else prefix + "/" + var_name
            yield temp_name, start_at[var_name]
424
        # look through the groups that are here
Eva Schiffer's avatar
Eva Schiffer committed
425
        for group_name in start_at.groups:
426
427
428
            grp_str = group_name if prefix is None or len(prefix) <= 0 else prefix + "/" + group_name
            for more_var_name, more_var_obj in self._walkgroups(start_at.groups[group_name], prefix=grp_str):
                yield more_var_name, more_var_obj
429
    
430
431
    def __init__(self, filename, allowWrite=False):
        
432
433
434
        if netCDF4 is None:
            LOG.error('netCDF4 is not installed and is needed in order to read NetCDF files')
            assert(netCDF4 is not None)
(no author)'s avatar
   
(no author) committed
435
        
436
        mode = 'r'
437
        if allowWrite :
438
439
            mode = 'a' # a is for append, if I use w it creates a whole new file, deleting the old one

440
        self._nc = netCDF4.Dataset(filename, mode)
441
        self.attributeCache = CaseInsensitiveAttributeCache(self)
442
443
444
        self._var_map = { }
        for var_name, var_obj in self._walkgroups(self._nc,) :
            self._var_map[var_name] = var_obj
445

(no author)'s avatar
(no author) committed
446
    def __call__(self):
447
448
449
450
        """
        yield names of variables in this file
        """

Eva Schiffer's avatar
Eva Schiffer committed
451
        return list(self._var_map)
452

(no author)'s avatar
(no author) committed
453
    def __getitem__(self, name):
454
455
456
457
458
459
460
        """
        this returns a numpy array with a copy of the full, scaled
        data for this variable, if the data type must be changed to allow
        for scaling it will be (so the return type may not reflect the
        type found in the original file)
        """

461
462
        LOG.debug("loading variable data for: " + name)

463
464
465
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
466

467
468
        # get our data, save the dtype, and make sure it's a more flexible dtype for now
        variable_object.set_auto_maskandscale(False)  # for now just do the darn calculations ourselves
469
470
471
472
473
        temp_input_data = variable_object[:]
        LOG.debug("Native input dtype: " + str(temp_input_data.dtype))
        dtype_to_use = _get_data_uptype(temp_input_data.dtype)
        LOG.debug("Choosing dtype " + str(dtype_to_use) + " for our internal representation of this data.")
        scaled_data_copy = numpy.array(temp_input_data, dtype=dtype_to_use,)
474
475

        # get the attribute cache so we can check on loading related attributes
476
        temp = self.attributeCache.get_variable_attributes(name)
477
478
479

        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
480
        missing_mask = numpy.zeros(scaled_data_copy.shape, dtype=numpy.bool)
481
482
        if missing_val is not None:
            missing_mask[scaled_data_copy == missing_val] = True
483
484
485
486

        #***** just do the darn unsigned handling ourselves, ugh

        # if our data is labeled as being unsigned by the appropriately set attribute
Eva Schiffer's avatar
Eva Schiffer committed
487
        if UNSIGNED_ATTR_STR in temp and str(temp[UNSIGNED_ATTR_STR]).lower() == ("true"):
488
489
            LOG.debug("Correcting for unsigned values in variable data.")
            where_temp = (scaled_data_copy < 0.0) & ~missing_mask # where we have negative but not missing data
490
            scaled_data_copy[where_temp] += (numpy.iinfo(numpy.uint16).max + 1.0) # add the 2's complement
491
492
493
494
495
496
497
498
499

        #***** end of handling the unsigned attribute

        ###### the start of the scaling code
        # Note, I had to turn this back on because the netcdf4 library is behaving erratically when unsigned is set

        # get the scale factor and add offset from the attributes
        scale_factor = 1.0
        add_offset = 0.0
Eva Schiffer's avatar
Eva Schiffer committed
500
        if SCALE_FACTOR_STR in temp :
501
            scale_factor = temp[SCALE_FACTOR_STR]
Eva Schiffer's avatar
Eva Schiffer committed
502
        if ADD_OFFSET_STR in temp :
503
            add_offset = temp[ADD_OFFSET_STR]
504

505
506
        # don't do work if we don't need to unpack things
        if (scale_factor != 1.0) or (add_offset != 0.0) :
507

508
            LOG.debug("Manually applying scale (" + str(scale_factor) + ") and add offset (" + str(add_offset) + ").")
509

510
511
512
513
514
515
516
            # unpack the data
            scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset

        ###### end of the scaling code

        """
        #TODO, this section was for when we had to do the unsigned correction after unpacking
Eva Schiffer's avatar
Eva Schiffer committed
517
        if UNSIGNED_ATTR_STR in temp and str(temp[UNSIGNED_ATTR_STR]).lower() == ( "true" ) :
518
519
520
521
522
523
524

            LOG.debug("fixing unsigned values in variable " + name)

            # load the scale factor and add offset
            scale_factor = 1.0
            add_offset = 0.0
            temp = self.attributeCache.get_variable_attributes(name)
Eva Schiffer's avatar
Eva Schiffer committed
525
            if SCALE_FACTOR_STR in temp :
526
                scale_factor = temp[SCALE_FACTOR_STR]
Eva Schiffer's avatar
Eva Schiffer committed
527
            if ADD_OFFSET_STR in temp :
528
529
530
531
                add_offset = temp[ADD_OFFSET_STR]

            # get the missing value and figure out the dtype of the original data
            missing_val  = self.missing_value(name)
532
            orig_dtype   = numpy.array([missing_val,]).dtype
Eva Schiffer's avatar
Eva Schiffer committed
533
            needed_dtype = SIGNED_TO_UNSIGNED_DTYPES[orig_dtype] if orig_dtype in SIGNED_TO_UNSIGNED_DTYPES else None
534
535
536

            if needed_dtype is not None :
                # now figure out where all the corrupted values are, and shift them up to be positive
537
                needs_fix_mask = (scaled_data_copy < add_offset) & (scaled_data_copy != missing_val)
538
                # we are adding the 2's complement, but first we're scaling it appropriately
539
                scaled_data_copy[needs_fix_mask] += ((numpy.iinfo(numpy.uint16).max + 1.0) * scale_factor)
540
        """
541

542
        return scaled_data_copy
543
    
544
545
546
547
    # TODO, this hasn't been supported in other file types
    def close (self) :
        self._nc.close()
        self._nc = None
548
        self._var_map = None
549

550
    def get_variable_object(self, name):
551

552
        return self._var_map[name]
553
    
(no author)'s avatar
(no author) committed
554
    def missing_value(self, name):
555
        
556
557
558
559
560
561
562
563
564
565
566
        toReturn = None
        
        temp = self.attributeCache.get_variable_attribute(name, fillValConst1)
        if temp is not None :
            toReturn = temp
        else :
            temp = self.attributeCache.get_variable_attribute(name, fillValConst2)
            if temp is not None :
                toReturn = temp
        
        return toReturn
567

568
569
570
571
572
573
574
575
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
576
577

        # TODO, this will not work with groups
578
        #self._nc.nc_redef() # TODO?
579
580
        
        # if the variable already exists, stop with a warning
Eva Schiffer's avatar
Eva Schiffer committed
581
        if variablename in self._nc.variables :
582
583
584
            LOG.warn("New variable name requested (" + variablename + ") is already present in file. " +
                     "Skipping generation of new variable.")
            return None
585
586
587
588
589
        # if we have no data we won't be able to determine the data type to create the variable
        if (data is None) or (len(data) <= 0) :
            LOG.warn("Data type for new variable (" + variablename + ") could not be determined. " +
                     "Skipping generation of new variable.")
            return None
590
        
591
        # TODO, the type managment here is going to cause problems with larger floats, review this
592
        dataType = None
593
594
        if numpy.issubdtype(data.dtype, int) :
            dataType = numpy.int
595
596
            #print("Picked INT")
        # TODO, at the moment the fill type is forcing me to use a double, when sometimes I want a float
597
598
        #elif numpy.issubdtype(data.dtype, numpy.float32) :
        #    dataType = numpy.float
599
        #    print("Picked FLOAT")
600
601
        elif numpy.issubdtype(data.dtype, float) :
            dataType = numpy.float64
602
603
            #print("Picked DOUBLE")
        # what do we do if it's some other type?
604
605
        else :
            dataType = data.dtype
606
607
608
609
610
        
        # create and set all the dimensions
        dimensions = [ ]
        dimensionNum = 0
        for dimSize in data.shape :
611
612
613
            tempName = variablename + '-index' + str(dimensionNum)
            self._nc.createDimension(tempName, dimSize)
            dimensions.append(tempName)
614
615
616
            dimensionNum = dimensionNum + 1
        
        # create the new variable
617
618
619
        #print('variable name: ' + variablename)
        #print('data type:     ' + str(dataType))
        #print('dimensions:    ' + str(dimensions))
620
        # if a missing value was given, use that
621
622
623
624
        if missingvalue is None :
            newVariable = self._nc.createVariable(variablename, dataType, tuple(dimensions))
        else :
            newVariable = self._nc.createVariable(variablename, dataType, tuple(dimensions), fill_value=missingvalue, )
625
626
627
        
        # if we have a variable to copy attributes from, do so
        if variabletocopyattributesfrom is not None :
628
629
            attributes = self.get_variable_attributes(variabletocopyattributesfrom, caseInsensitive=False)

Eva Schiffer's avatar
Eva Schiffer committed
630
            for attribute in attributes :
631
632
                if attribute.lower() != "_fillvalue" :
                    setattr(newVariable, attribute, attributes[attribute])
633

634
        #self._nc.nc_enddef() # TODO?
635

636
637
        # if data was given, use that
        if data is not None :
638
639

            newVariable[:] = data
640

641
        return newVariable
642

643
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue, variableObject=None,) :
644
645
646
647
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
648
649
        # TODO, this will not work with groups

650
651
        if variableObject is None :
            variableObject = self.get_variable_object(variableName)
652
        
653
        #self._nc.nc_redef() # TODO?
654
655
656

        setattr(variableObject, newAttributeName, newAttributeValue)

657
        #self._nc.nc_enddef() # TODO?
658

659
660
661
        # TODO, this will cause our attribute cache to be wrong!
        # TODO, for now, brute force clear the cache
        self.attributeCache = CaseInsensitiveAttributeCache(self)
662
663
        
        return
664
    
665
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
666
667
668
669
        """
        returns all the attributes associated with a variable name
        """
        
670
671
672
673
674
        toReturn = None
        
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
675
676
677
678
679
            toReturn = { }
            tempVarObj   = self.get_variable_object(variableName)
            tempAttrKeys = tempVarObj.ncattrs()
            for attrKey in tempAttrKeys :
                toReturn[attrKey] = getattr(tempVarObj, attrKey)
680
681
        
        return toReturn
682
    
683
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
684
685
686
687
688
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
689
690
691
692
693
694
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attributes = self.get_variable_attributes(variableName, caseInsensitive=False)
            
            if attributeName in temp_attributes :
695
                toReturn = getattr(self.get_variable_object, attributeName)
696
697
698
699
700
701
702
703
704
        
        return toReturn
    
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
        toReturn = None
705
        
706
        if caseInsensitive :
707
            toReturn = self.attributeCache.get_global_attributes()
708
        else :
709
710
711
712
            toReturn = { }
            tempAttrKeys = self._nc.ncattrs()
            for attrKey in tempAttrKeys :
                toReturn[attrKey] = getattr(self._nc, attrKey)
713

714
        return toReturn
715
    
716
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
717
718
719
720
721
722
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
723
724
725
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
726
            if attributeName in self._nc.ncattrs() :
727
                toReturn = getattr(self._nc, attributeName)
728
729
        
        return toReturn
730
731
732
733
734
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
735
736

        return True
737

(no author)'s avatar
(no author) committed
738
739
740
nc4 = nc
cdf = nc

741
742
# TODO remove
#FIXME_IDPS = [ '/All_Data/CrIS-SDR_All/ES' + ri + band for ri in ['Real','Imaginary'] for band in ['LW','MW','SW'] ] 
743

(no author)'s avatar
(no author) committed
744
class h5(object):
745
746
747
748
    """wrapper for HDF5 datasets
    """
    _h5 = None
    
749
    def __init__(self, filename, allowWrite=False):
750
751
        self.attributeCache = CaseInsensitiveAttributeCache(self)
        
752
753
754
        mode = 'r'
        if allowWrite :
            mode = 'r+'
(no author)'s avatar
   
(no author) committed
755
756
757
        if h5py is None:
            LOG.error('h5py module is not installed and is needed in order to read h5 files')
            assert(h5py is not None)
758
        self._h5 = h5py.File(filename, mode)
759
760
    
    def __call__(self):
761
762
763
764
        
        variableList = [ ]
        def testFn (name, obj) :
            #print ('checking name: ' + name)
765
            #print ('object: ' + str(obj))
766
767
768
769
            
            if isinstance(obj, h5py.Dataset) :
                try :
                    tempType = obj.dtype # this is required to provoke a type error for closed data sets
770
                    
771
                    #LOG.debug ('type: ' + str(tempType))
772
773
774
775
776
777
778
779
780
781
                    variableList.append(name)
                except TypeError :
                    LOG.debug('TypeError prevents the use of variable ' + name
                              + '. This variable will be ignored')
        
        self._h5.visititems(testFn)
        
        LOG.debug('variables from visiting h5 file structure: ' + str(variableList))
        
        return(variableList)
782
783
784
785
786
    
    @staticmethod
    def trav(h5,pth): 
        return reduce( lambda x,a: x[a] if a else x, pth.split('/'), h5)
        
787
788
789
790
791
    # this returns a numpy array with a copy of the full, scaled
    # data for this variable, if the data type must be changed to allow
    # for scaling it will be (so the return type may not reflect the
    # type found in the original file)
    def __getitem__(self, name):
792
        
793
794
795
796
797
798
799
800
        # defaults
        scale_factor = 1.0
        add_offset = 0.0
        
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
        raw_data_copy = variable_object[:]
801
802
803
804

        # pick a data type to use internally
        data_type = _get_data_uptype(raw_data_copy.dtype)

805
806
807
808
        #print ('*************************')
        #print (dir (variable_object.id)) # TODO, is there a way to get the scale and offset through this?
        #print ('*************************')
        
809
        # load the scale factor and add offset
810
        temp = self.attributeCache.get_variable_attributes(name)
Eva Schiffer's avatar
Eva Schiffer committed
811
        if SCALE_FACTOR_STR in temp :
812
            scale_factor = temp[SCALE_FACTOR_STR]
Eva Schiffer's avatar
Eva Schiffer committed
813
        if ADD_OFFSET_STR in temp :
814
            add_offset = temp[ADD_OFFSET_STR]
815
816
817
818
819
820
        # todo, does cdf have an equivalent of endaccess to close the variable?
        
        # don't do lots of work if we don't need to scale things
        if (scale_factor == 1.0) and (add_offset == 0.0) :
            return raw_data_copy
        
821
822
        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
823
        missing_mask = numpy.zeros(raw_data_copy.shape, dtype=numpy.bool)
824
825
        if missing_val is not None:
            missing_mask[raw_data_copy == missing_val] = True
826
        
827
        # create the scaled version of the data
828
        scaled_data_copy = numpy.array(raw_data_copy, dtype=data_type)
829
        scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues?
830
831
832
833
        
        return scaled_data_copy
    
    def get_variable_object(self,name):
834
835
836
        return h5.trav(self._h5, name)
    
    def missing_value(self, name):
837
838
839
840
841
842
843
        
        toReturn = None
        
        # get the missing value if it has been set
        variableObject = self.get_variable_object(name)
        pListObj = variableObject.id.get_create_plist()
        fillValueStatus = pListObj.fill_value_defined()
Eva Schiffer's avatar
Eva Schiffer committed
844
        if (h5d.FILL_VALUE_DEFAULT == fillValueStatus) or (h5d.FILL_VALUE_USER_DEFINED == fillValueStatus) :
845
            temp = numpy.array((1), dtype=variableObject.dtype)
846
847
848
849
            pListObj.get_fill_value(temp)
            toReturn = temp
        
        return toReturn
850
851
852
853
854
855
856
857
858
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
859
        
860
        raise IOUnimplimentedError('Unable to create variable in hdf 5 file, this functionality is not yet available.')
861
862
863
864
865
866
867
868
        
        return None
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
869
870
        
        raise IOUnimplimentedError('Unable to add attribute to hdf 5 file, this functionality is not yet available.')
871
872
        
        return
873
    
874
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
875
876
877
878
        """
        returns all the attributes associated with a variable name
        """
        
879
880
881
882
883
884
885
886
        toReturn = None
        
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
            toReturn = self.get_variable_object(variableName).attrs
        
        return toReturn
887
    
888
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
889
890
891
892
893
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attrs = self.get_variable_attributes(variableName, caseInsensitive=False)
            
            if (attributeName in temp_attrs) :
                toReturn = temp_attrs[attributeName]
        
        return toReturn
    
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
        toReturn = None
910
        
911
        if caseInsensitive :
912
            toReturn = self.attributeCache.get_global_attributes()
913
914
        else :
            toReturn = self._h5.attrs
915
916
        
        return toReturn
(no author)'s avatar
(no author) committed
917
    
918
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
(no author)'s avatar
(no author) committed
919
920
921
922
923
924
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
925
926
927
928
929
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
            if attributeName in self._h5.attrs :
                toReturn = self._h5.attrs[attributeName]
(no author)'s avatar
(no author) committed
930
931
        
        return toReturn
932
933
934
935
936
937
938
939
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
        
        # TODO, are there any bad types for these files?
        return True
(no author)'s avatar
(no author) committed
940

(no author)'s avatar
(no author) committed
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976

class aeri(object):
    """wrapper for AERI RNC/SUM/CXS/etc datasets
    """
    _dmv = None
    _vectors = { }
    _scalars = { }
    
    @staticmethod
    def _meta_mapping(fp):
        ids = fp.metaIDs()
        names = [fp.queryMetaDescString(1, id_, fp.SHORTNAME) for id_ in ids]
        assert len(ids) == len(names)
        return (dict((n, i) for n, i in zip(names, ids)))
    
    def _inventory(self):
        fp = self._dmv
        assert(fp is not None)
        # get list of vectors and scalars
        self._vectors = dict( (fp.queryVectorDescString(n,fp.SHORTNAME), n) for n in fp.vectorIDs() )
        self._scalars = self._meta_mapping(fp)

    def __init__(self, filename, allowWrite=False):
        assert(allowWrite==False)
        if dmvlib is None:
            LOG.error('cannot open AERI files without dmv module being available')
            return
        self._dmv = dmvlib.dmv()
        rc = self._dmv.openFile(filename)
        if rc!=0:
            LOG.error("unable to open file, rc=%d" % rc)
            self._dmv = None        
        else:
            self._inventory()
    
    def __call__(self):
Eva Schiffer's avatar
Eva Schiffer committed
977
        return list(self._vectors) + list(self._scalars)
(no author)'s avatar
(no author) committed
978
979
980
981
982
983
984
985
986
        
    def __getitem__(self, name):
        fp = self._dmv
        assert(fp is not None)
        if 'DMV_RECORDS' in os.environ:
            nrecs = int(os.environ['DMV_RECORDS'])
            LOG.warning('overriding dmv record count to %d' % nrecs)
        else:
            nrecs = self._dmv.recordCount()
987
        recrange = list(range(1, nrecs+1))
(no author)'s avatar
(no author) committed
988
989
990
        if name in self._vectors:
            vid = self._vectors[name]
            vdata = [ fp.vectorDepValues(rec, vid) for rec in recrange ]
991
            return numpy.array(vdata)
(no author)'s avatar
(no author) committed
992
993
        elif name in self._scalars:
            vdata = fp.metaValueMatrix(recrange, [self._scalars[name]])
994
            return numpy.array(vdata)
(no author)'s avatar
(no author) committed
995
996
997
998
999
1000
        else:
            raise LookupError('cannot find variable %s' % name)
       
    def get_variable_object(self,name):
        return None
    
For faster browsing, not all history is shown. View entire blame