io.py 47.4 KB
Newer Older
(no author)'s avatar
(no author) committed
1
2
3
4
5
6
7
8
9
#!/usr/bin/env python
# encoding: utf-8
"""
I/O routines supporting reading a number of file formats.

Created by rayg Apr 2009.
Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
"""

10
import os, logging
(no author)'s avatar
   
(no author) committed
11
12
13
import numpy as np

LOG = logging.getLogger(__name__)
(no author)'s avatar
(no author) committed
14

(no author)'s avatar
   
(no author) committed
15
16
17
18
19
20
21
22
23
try:
    import pyhdf
    from pyhdf.SD import SD,SDC, SDS, HDF4Error
except:
    LOG.info('no pyhdf module available for HDF4')
    pyhdf = None
    SD = SDC = SDS = object
    HDF4Error = EnvironmentError
    
24
25
try:
    import h5py
26
    from h5py import h5d
27
except ImportError:
(no author)'s avatar
   
(no author) committed
28
29
    LOG.info('no h5py module available for reading HDF5')
    h5py = None
(no author)'s avatar
(no author) committed
30

31
32
33
34
35
36
37
38
# the newer netCDF library that replaced pycdf
try:
    import netCDF4
except:
    LOG.info("unable to import netcdf4 library")
    netCDF4 = None

""" this is the previous netCDF library, remove this once the new one is fully tested
(no author)'s avatar
   
(no author) committed
39
40
41
42
43
44
45
46
47
try:    
    import pycdf
    from pycdf import CDF, NC, strerror
except:
    LOG.info('no pycdf module available')
    pycdf = None
    CDF = NC = object
    def strerror(*args):
        return 'no pycdf module installed'
48
"""
(no author)'s avatar
(no author) committed
49

(no author)'s avatar
(no author) committed
50
51
52
53
54
55
56
try:
    import dmv as dmvlib
    LOG.info('loaded dmv module for AERI data file access')
except ImportError:
    LOG.info('no AERI dmv data file format module')
    dmvlib = None

(no author)'s avatar
   
(no author) committed
57
58
59
60
61
62
63
try:
    import adl_blob
    LOG.info('adl_blob module found for JPSS ADL data file access')
except ImportError:
    LOG.info('no adl_blob format handler available')
    adl_blob = None

64
65
66
67
68
69
70
try :
    from osgeo import gdal
    LOG.info('loading osgeo module for GeoTIFF data file access')
except :
    LOG.info('no osgeo available for reading GeoTIFF data files')
    gdal = None

71
UNITS_CONSTANT = "units"
(no author)'s avatar
(no author) committed
72

73
74
75
fillValConst1 = '_FillValue'
fillValConst2 = 'missing_value'

76
77
78
79
ADD_OFFSET_STR   = 'add_offset'
SCALE_FACTOR_STR = 'scale_factor'
SCALE_METHOD_STR = 'scaling_method'

80
81
82
83
84
85
86
87
88
89
90
class IOUnimplimentedError(Exception):
    """
    The exception raised when a requested io operation is not yet available.
    
        msg  -- explanation of the problem
    """
    def __init__(self, msg):
        self.msg = msg
    def __str__(self):
        return self.msg

91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
class CaseInsensitiveAttributeCache (object) :
    """
    A cache of attributes for a single file and all of it's variables.
    This cache is considered uncased, it will store all attributes it caches
    in lower case and will lower case any strings it is asked to search for
    in the cache.
    When variable or global attribute sets are not yet loaded and something
    from that part of the file is requested the cache will transparently load
    attributes from the file behind the scenes and build the cache for that
    part of the file.
    """
    
    def __init__(self, fileObject) :
        """
        set up the empty cache and hang on to the file object we'll be caching
        """
        
        self.fileToCache             = fileObject
        self.globalAttributesLower   = None
        self.variableAttributesLower = { }
    
    def _load_global_attributes_if_needed (self) :
        """
        load up the global attributes if they need to be cached
        """
        
        # load the attributes from the file if they aren't cached
        if self.globalAttributesLower is None :
            LOG.debug ("Loading file global attributes into case-insensitive cache.")
            tempAttrs                  = self.fileToCache.get_global_attributes(caseInsensitive=False)
            self.globalAttributesLower = dict((k.lower(), v) for k, v in tempAttrs.items())
    
    def _load_variable_attributes_if_needed (self, variableName) :
        """
        load up the variable attributes if they need to be cached
        """
        
        # make a lower cased version of the variable name
        tempVariableName = variableName.lower()
        
        # load the variable's attributes from the file if they aren't cached
        if tempVariableName not in self.variableAttributesLower.keys() :
            LOG.debug ("Loading attributes for variable \"" + variableName + "\" into case-insensitive cache.")
            tempAttrs = self.fileToCache.get_variable_attributes(variableName, caseInsensitive=False)
            # now if there are any attributes, make a case insensitive version
            self.variableAttributesLower[tempVariableName] = dict((k.lower(), v) for k, v in tempAttrs.items())
    
    def get_variable_attribute (self, variableName, attributeName) :
        """
        get the specified attribute for the specified variable,
        if this variable's attributes have not yet been loaded
        they will be loaded and cached
        """
        
        self._load_variable_attributes_if_needed(variableName)
        
        toReturn = None
        tempVariableName  =  variableName.lower()
        tempAttributeName = attributeName.lower()
        if (tempVariableName in self.variableAttributesLower) and (tempAttributeName in self.variableAttributesLower[tempVariableName]) :
            toReturn = self.variableAttributesLower[tempVariableName][tempAttributeName]
        else:
            LOG.debug ("Attribute \"" + attributeName + "\" was not present for variable \"" + variableName + "\".")
        
        return toReturn
    
    def get_variable_attributes (self, variableName) :
        """
        get the variable attributes for the variable name given
        """
        
        self._load_variable_attributes_if_needed(variableName)
        
        toReturn = self.variableAttributesLower[variableName.lower()] if (variableName.lower() in self.variableAttributesLower) else None
        
        return toReturn
    
    def get_global_attribute (self, attributeName) :
        """
        get a global attribute with the given name
        """
        
        self._load_global_attributes_if_needed()
        
        toReturn = self.globalAttributesLower[attributeName.lower()] if (attributeName.lower() in self.globalAttributesLower) else None
        
        return toReturn
    
    def get_global_attributes (self) :
        """
        get the global attributes,
        """
        
        self._load_global_attributes_if_needed()
        
        toReturn = self.globalAttributesLower
        
        return toReturn
189
190
191
192
193
194
195
196
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
        
        # TODO, are there any bad types for these files?
        return True
197
198

class hdf (object):
(no author)'s avatar
(no author) committed
199
200
201
202
203
    """wrapper for HDF4 dataset for comparison
    __call__ yields sequence of variable names
    __getitem__ returns individual variables ready for slicing to numpy arrays
    """
    
204
205
    _hdf = None
    
206
    def __init__(self, filename, allowWrite=False):
207
        
(no author)'s avatar
   
(no author) committed
208
209
210
        if pyhdf is None:
            LOG.error('pyhdf is not installed and is needed in order to read hdf4 files')
            assert(pyhdf is not None)
211
212
213
        mode = SDC.READ
        if allowWrite:
            mode = mode | SDC.WRITE
214
215
216
        
        self._hdf = SD(filename, mode)
        self.attributeCache = CaseInsensitiveAttributeCache(self)
(no author)'s avatar
(no author) committed
217
218
219

    def __call__(self):
        "yield names of variables to be compared"
220
        return self._hdf.datasets().keys()
(no author)'s avatar
(no author) committed
221
    
222
223
224
225
    # this returns a numpy array with a copy of the full, scaled
    # data for this variable, if the data type must be changed to allow
    # for scaling it will be (so the return type may not reflect the
    # type found in the original file)
(no author)'s avatar
(no author) committed
226
    def __getitem__(self, name):
227
228
229
        # defaults
        scale_factor = 1.0
        add_offset = 0.0
230
        data_type = None 
(no author)'s avatar
(no author) committed
231
        scaling_method = None
232
233
234
235
236
237
238
239
240
        
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
        raw_data_copy = variable_object[:]
        try :
            # TODO, this currently won't work with geocat data, work around it for now
            scale_factor, scale_factor_error, add_offset, add_offset_error, data_type = SDS.getcal(variable_object)
        except HDF4Error:
241
242
243
244
            # load just the scale factor and add offset information by hand
            temp = self.attributeCache.get_variable_attributes(name)
            if ADD_OFFSET_STR in temp.keys() :
                add_offset = temp[ADD_OFFSET_STR]
245
                data_type = np.dtype(type(add_offset))
246
247
            if SCALE_FACTOR_STR in temp.keys() :
                scale_factor = temp[SCALE_FACTOR_STR]
248
                data_type = np.dtype(type(scale_factor))
249
250
            if SCALE_METHOD_STR in temp.keys() :
                scaling_method = temp[SCALE_METHOD_STR]
251
        SDS.endaccess(variable_object)
(no author)'s avatar
(no author) committed
252
        
253
254
255
256
        # don't do lots of work if we don't need to scale things
        if (scale_factor == 1.0) and (add_offset == 0.0) :
            return raw_data_copy
        
257
258
259
260
261
262
263
264
265
266
        # at the moment geocat has several scaling methods that don't match the normal standards for hdf
        """
        please see constant.f90 for a more up to date version of this information:
            INTEGER(kind=int1) :: NO_SCALE              ! 0
            INTEGER(kind=int1) :: LINEAR_SCALE          ! 1
            INTEGER(kind=int1) :: LOG_SCALE             ! 2
            INTEGER(kind=int1) :: SQRT_SCALE            ! 3 
        """
        if (scaling_method == 0) :
            return raw_data_copy
267
        if not ((scaling_method is None) or (int(scaling_method) <= 1)) :
268
269
            LOG.warn ('Scaling method of \"' + str(scaling_method) + '\" will be ignored in favor of hdf standard method. '
                      + 'This may cause problems with data consistency')
270
        
271
272
273
        # if we don't have a data type something strange has gone wrong
        assert(not (data_type is None))
        
274
275
        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
276
277
        missing_mask = np.zeros(raw_data_copy.shape, dtype=np.bool)
        missing_mask[raw_data_copy == missing_val] = True
278
        
279
        # create the scaled version of the data
280
        scaled_data_copy                = np.array(raw_data_copy, dtype=data_type)
281
        scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues?
282
283
284
285
        
        return scaled_data_copy 
    
    def get_variable_object(self, name):
286
        return self._hdf.select(name)
287
    
(no author)'s avatar
(no author) committed
288
    def missing_value(self, name):
289
        
290
        return self.get_attribute(name, fillValConst1)
291
292
293
294
295
296
297
298
299
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
(no author)'s avatar
(no author) committed
300
        
301
        raise IOUnimplimentedError('Unable to create variable in hdf file, this functionality is not yet available.')
302
303
304
305
306
307
308
309
        
        return None
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
310
311
        
        raise IOUnimplimentedError('Unable add attribute to hdf file, this functionality is not yet available.')
312
313
        
        return
314
    
315
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
316
317
318
319
        """
        returns all the attributes associated with a variable name
        """
        
320
321
322
323
324
325
326
        toReturn = None
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
            toReturn = self.get_variable_object(variableName).attributes()
        
        return toReturn
327
    
328
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
329
330
331
332
333
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
334
335
336
337
338
339
340
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attributes = self.get_variable_attributes(variableName, caseInsensitive=False)
            
            if attributeName in temp_attributes :
                toReturn = temp_attributes[attributeName]
341
342
        
        return toReturn
(no author)'s avatar
(no author) committed
343
    
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
        toReturn = None
        
        if caseInsensitive :
            self.attributeCache.get_global_attributes()
        else :
            toReturn = self._hdf.attributes()
        
        return toReturn
    
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
(no author)'s avatar
(no author) committed
359
360
361
362
363
364
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
365
366
367
368
369
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
            if attributeName in self._hdf.attributes() :
                toReturn = self._hdf.attributes()[attributeName]
(no author)'s avatar
(no author) committed
370
371
        
        return toReturn
372
373
374
375
376
377
378
379
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
        
        # TODO, are there any bad types for these files?
        return True
(no author)'s avatar
(no author) committed
380

381
class nc (object):
382
    """wrapper for netcdf4-python data access for comparison
(no author)'s avatar
(no author) committed
383
384
385
386
    __call__ yields sequence of variable names
    __getitem__ returns individual variables ready for slicing to numpy arrays
    """
    
387
388
    _nc = None
    
389
390
    def __init__(self, filename, allowWrite=False):
        
391
392
393
        if netCDF4 is None:
            LOG.error('netCDF4 is not installed and is needed in order to read NetCDF files')
            assert(netCDF4 is not None)
(no author)'s avatar
   
(no author) committed
394
        
395
        mode = 'r'
396
        if allowWrite :
397
            mode = 'w'
398
        
399
        self._nc = netCDF4.Dataset(filename, mode)
400
        self.attributeCache = CaseInsensitiveAttributeCache(self)
401

(no author)'s avatar
(no author) committed
402
403
    def __call__(self):
        "yield names of variables to be compared"
404
        return self._nc.variables.keys()
(no author)'s avatar
(no author) committed
405
    
406
407
408
409
    # this returns a numpy array with a copy of the full, scaled
    # data for this variable, if the data type must be changed to allow
    # for scaling it will be (so the return type may not reflect the
    # type found in the original file)
(no author)'s avatar
(no author) committed
410
    def __getitem__(self, name):
411
412
413
        
        #print ("*** opening variable: " + name)
        
414
415
416
417
418
419
420
421
        # defaults
        scale_factor = 1.0
        add_offset = 0.0
        data_type = np.float32 # TODO temporary
        
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
422
        
423
424
        raw_data_copy = variable_object[:]
        # load the scale factor and add offset
425
426
427
428
429
430
        
        temp = self.attributeCache.get_variable_attributes(name)
        if SCALE_FACTOR_STR in temp.keys() :
            scale_factor = temp[SCALE_FACTOR_STR]
        if ADD_OFFSET_STR in temp.keys() :
            add_offset = temp[ADD_OFFSET_STR]
431
432
433
434
        
        # don't do lots of work if we don't need to scale things
        if (scale_factor == 1.0) and (add_offset == 0.0) :
            return raw_data_copy
(no author)'s avatar
(no author) committed
435
        
436
437
        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
438
439
        missing_mask = np.zeros(raw_data_copy.shape, dtype=np.bool)
        missing_mask[raw_data_copy == missing_val] = True
440
        
441
442
        # create the scaled version of the data
        scaled_data_copy = np.array(raw_data_copy, dtype=data_type)
443
        scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues?
444
445
446
        
        return scaled_data_copy 
    
447
448
449
450
    # TODO, this hasn't been supported in other file types
    def close (self) :
        self._nc.close()
        self._nc = None
451

452
    def get_variable_object(self, name):
453
        return self._nc.variables[name]
454
    
(no author)'s avatar
(no author) committed
455
    def missing_value(self, name):
456
        
457
458
459
460
461
462
463
464
465
466
467
        toReturn = None
        
        temp = self.attributeCache.get_variable_attribute(name, fillValConst1)
        if temp is not None :
            toReturn = temp
        else :
            temp = self.attributeCache.get_variable_attribute(name, fillValConst2)
            if temp is not None :
                toReturn = temp
        
        return toReturn
468
469

    # TODO, convert this to the new netCDF4 format
470
471
472
473
474
475
476
477
478
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
        
479
        self._nc.redef()
480
481
        
        # if the variable already exists, stop with a warning
482
        if variablename in self._nc.variables.keys() :
483
484
485
            LOG.warn("New variable name requested (" + variablename + ") is already present in file. " +
                     "Skipping generation of new variable.")
            return None
486
487
488
489
490
        # if we have no data we won't be able to determine the data type to create the variable
        if (data is None) or (len(data) <= 0) :
            LOG.warn("Data type for new variable (" + variablename + ") could not be determined. " +
                     "Skipping generation of new variable.")
            return None
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
        
        dataType = None
        if np.issubdtype(data.dtype, int) :
            dataType = NC.INT
            #print("Picked INT")
        # TODO, at the moment the fill type is forcing me to use a double, when sometimes I want a float
        #elif np.issubdtype(data.dtype, np.float32) :
        #    dataType = NC.FLOAT
        #    print("Picked FLOAT")
        elif np.issubdtype(data.dtype, float) :
            dataType = NC.DOUBLE
            #print("Picked DOUBLE")
        # what do we do if it's some other type?
        
        # create and set all the dimensions
        dimensions = [ ]
        dimensionNum = 0
        for dimSize in data.shape :
509
            dimensions.append(self._nc.def_dim(variablename + '-index' + str(dimensionNum), dimSize))
510
511
512
            dimensionNum = dimensionNum + 1
        
        # create the new variable
513
514
515
        #print('variable name: ' + variablename)
        #print('data type:     ' + str(dataType))
        #print('dimensions:    ' + str(dimensions))
516
        newVariable = self._nc.def_var(variablename, dataType, tuple(dimensions))
517
518
519
520
521
522
523
524
525
526
527
528
        
        # if a missing value was given, use that
        if missingvalue is not None :
            newVariable._FillValue = missingvalue
        
        # if we have a variable to copy attributes from, do so
        if variabletocopyattributesfrom is not None :
            tocopyfrom = self.get_variable_object(variabletocopyattributesfrom)
            attributes = tocopyfrom.attributes()
            for attribute in attributes.keys() :
                newVariable.__setattr__(attribute, attributes[attribute])
        
529
        self._nc.enddef()
530
531
532
533
534
535
        
        # if data was given, use that
        if data is not None :
            newVariable.put(data.tolist()) 
        
        return newVariable
536
537

    # TODO convert this to the new netCDF4 format
538
539
540
541
542
543
544
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
        variableObject = self.get_variable_object(variableName)
        
545
        self._nc.redef()
546
547
        
        variableObject.__setattr__(newAttributeName, newAttributeValue)
548
549
550
        # TODO, this will cause our attribute cache to be wrong!
        # TODO, for now, brute force clear the cache
        self.attributeCache = CaseInsensitiveAttributeCache(self)
551
        
552
        self._nc.enddef()
553
554
        
        return
555
    
556
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
557
558
559
560
        """
        returns all the attributes associated with a variable name
        """
        
561
562
563
564
565
        toReturn = None
        
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
566
567
568
569
570
            toReturn = { }
            tempVarObj   = self.get_variable_object(variableName)
            tempAttrKeys = tempVarObj.ncattrs()
            for attrKey in tempAttrKeys :
                toReturn[attrKey] = getattr(tempVarObj, attrKey)
571
572
        
        return toReturn
573
    
574
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
575
576
577
578
579
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
580
581
582
583
584
585
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attributes = self.get_variable_attributes(variableName, caseInsensitive=False)
            
            if attributeName in temp_attributes :
586
                toReturn = getattr(self.get_variable_object, attributeName)
587
588
589
590
591
592
593
594
595
        
        return toReturn
    
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
        toReturn = None
596
        
597
598
599
        if caseInsensitive :
            self.attributeCache.get_global_attributes()
        else :
600
601
602
603
            toReturn = { }
            tempAttrKeys = self._nc.ncattrs()
            for attrKey in tempAttrKeys :
                toReturn[attrKey] = getattr(self._nc, attrKey)
604
605
        
        return toReturn
606
    
607
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
608
609
610
611
612
613
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
614
615
616
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
617
            if attributeName in self._nc.ncattrs() :
618
                toReturn = getattr(self._nc, attributeName)
619
620
        
        return toReturn
621
622
623
624
625
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
626
627

        return True
628

(no author)'s avatar
(no author) committed
629
630
631
nc4 = nc
cdf = nc

632
633
# TODO remove
#FIXME_IDPS = [ '/All_Data/CrIS-SDR_All/ES' + ri + band for ri in ['Real','Imaginary'] for band in ['LW','MW','SW'] ] 
634

(no author)'s avatar
(no author) committed
635
class h5(object):
636
637
638
639
    """wrapper for HDF5 datasets
    """
    _h5 = None
    
640
    def __init__(self, filename, allowWrite=False):
641
642
        self.attributeCache = CaseInsensitiveAttributeCache(self)
        
643
644
645
        mode = 'r'
        if allowWrite :
            mode = 'r+'
(no author)'s avatar
   
(no author) committed
646
647
648
        if h5py is None:
            LOG.error('h5py module is not installed and is needed in order to read h5 files')
            assert(h5py is not None)
649
        self._h5 = h5py.File(filename, mode)
650
651
    
    def __call__(self):
652
653
654
655
        
        variableList = [ ]
        def testFn (name, obj) :
            #print ('checking name: ' + name)
656
            #print ('object: ' + str(obj))
657
658
659
660
            
            if isinstance(obj, h5py.Dataset) :
                try :
                    tempType = obj.dtype # this is required to provoke a type error for closed data sets
661
                    
662
                    #LOG.debug ('type: ' + str(tempType))
663
664
665
666
667
668
669
670
671
672
                    variableList.append(name)
                except TypeError :
                    LOG.debug('TypeError prevents the use of variable ' + name
                              + '. This variable will be ignored')
        
        self._h5.visititems(testFn)
        
        LOG.debug('variables from visiting h5 file structure: ' + str(variableList))
        
        return(variableList)
673
674
675
676
677
    
    @staticmethod
    def trav(h5,pth): 
        return reduce( lambda x,a: x[a] if a else x, pth.split('/'), h5)
        
678
679
680
681
682
    # this returns a numpy array with a copy of the full, scaled
    # data for this variable, if the data type must be changed to allow
    # for scaling it will be (so the return type may not reflect the
    # type found in the original file)
    def __getitem__(self, name):
683
        
684
685
686
687
688
689
690
691
692
        # defaults
        scale_factor = 1.0
        add_offset = 0.0
        data_type = np.float32 # TODO temporary
        
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
        raw_data_copy = variable_object[:]
693
694
695
696
697
        
        #print ('*************************')
        #print (dir (variable_object.id)) # TODO, is there a way to get the scale and offset through this?
        #print ('*************************')
        
698
        # load the scale factor and add offset
699
700
701
702
703
        temp = self.attributeCache.get_variable_attributes(name)
        if (SCALE_FACTOR_STR in temp.keys()) :
            scale_factor = temp[SCALE_FACTOR_STR]
        if (ADD_OFFSET_STR in temp.keys()) :
            add_offset = temp[ADD_OFFSET_STR]
704
705
706
707
708
709
        # todo, does cdf have an equivalent of endaccess to close the variable?
        
        # don't do lots of work if we don't need to scale things
        if (scale_factor == 1.0) and (add_offset == 0.0) :
            return raw_data_copy
        
710
711
        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
712
713
        missing_mask = np.zeros(raw_data_copy.shape, dtype=np.bool)
        missing_mask[raw_data_copy == missing_val] = True
714
        
715
716
        # create the scaled version of the data
        scaled_data_copy = np.array(raw_data_copy, dtype=data_type)
717
        scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues?
718
719
720
721
        
        return scaled_data_copy
    
    def get_variable_object(self,name):
722
723
724
        return h5.trav(self._h5, name)
    
    def missing_value(self, name):
725
726
727
728
729
730
731
732
733
734
735
736
737
        
        toReturn = None
        
        # get the missing value if it has been set
        variableObject = self.get_variable_object(name)
        pListObj = variableObject.id.get_create_plist()
        fillValueStatus = pListObj.fill_value_defined()
        if (h5d.FILL_VALUE_DEFAULT is fillValueStatus) or (h5d.FILL_VALUE_USER_DEFINED is fillValueStatus) :
            temp = np.array((1), dtype=variableObject.dtype)
            pListObj.get_fill_value(temp)
            toReturn = temp
        
        return toReturn
738
739
740
741
742
743
744
745
746
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
747
        
748
        raise IOUnimplimentedError('Unable to create variable in hdf 5 file, this functionality is not yet available.')
749
750
751
752
753
754
755
756
        
        return None
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
757
758
        
        raise IOUnimplimentedError('Unable to add attribute to hdf 5 file, this functionality is not yet available.')
759
760
        
        return
761
    
762
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
763
764
765
766
        """
        returns all the attributes associated with a variable name
        """
        
767
768
769
770
771
772
773
774
        toReturn = None
        
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
            toReturn = self.get_variable_object(variableName).attrs
        
        return toReturn
775
    
776
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
777
778
779
780
781
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attrs = self.get_variable_attributes(variableName, caseInsensitive=False)
            
            if (attributeName in temp_attrs) :
                toReturn = temp_attrs[attributeName]
        
        return toReturn
    
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
        toReturn = None
798
        
799
800
801
802
        if caseInsensitive :
            self.attributeCache.get_global_attributes()
        else :
            toReturn = self._h5.attrs
803
804
        
        return toReturn
(no author)'s avatar
(no author) committed
805
    
806
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
(no author)'s avatar
(no author) committed
807
808
809
810
811
812
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
813
814
815
816
817
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
            if attributeName in self._h5.attrs :
                toReturn = self._h5.attrs[attributeName]
(no author)'s avatar
(no author) committed
818
819
        
        return toReturn
820
821
822
823
824
825
826
827
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
        
        # TODO, are there any bad types for these files?
        return True
(no author)'s avatar
(no author) committed
828

(no author)'s avatar
(no author) committed
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898

class aeri(object):
    """wrapper for AERI RNC/SUM/CXS/etc datasets
    """
    _dmv = None
    _vectors = { }
    _scalars = { }
    
    @staticmethod
    def _meta_mapping(fp):
        ids = fp.metaIDs()
        names = [fp.queryMetaDescString(1, id_, fp.SHORTNAME) for id_ in ids]
        assert len(ids) == len(names)
        return (dict((n, i) for n, i in zip(names, ids)))
    
    def _inventory(self):
        fp = self._dmv
        assert(fp is not None)
        # get list of vectors and scalars
        self._vectors = dict( (fp.queryVectorDescString(n,fp.SHORTNAME), n) for n in fp.vectorIDs() )
        self._scalars = self._meta_mapping(fp)

    def __init__(self, filename, allowWrite=False):
        assert(allowWrite==False)
        if dmvlib is None:
            LOG.error('cannot open AERI files without dmv module being available')
            return
        self._dmv = dmvlib.dmv()
        rc = self._dmv.openFile(filename)
        if rc!=0:
            LOG.error("unable to open file, rc=%d" % rc)
            self._dmv = None        
        else:
            self._inventory()
    
    def __call__(self):
        return list(self._vectors.keys()) + list(self._scalars.keys())
        
    def __getitem__(self, name):
        fp = self._dmv
        assert(fp is not None)
        if 'DMV_RECORDS' in os.environ:
            nrecs = int(os.environ['DMV_RECORDS'])
            LOG.warning('overriding dmv record count to %d' % nrecs)
        else:
            nrecs = self._dmv.recordCount()
        recrange = range(1, nrecs+1)
        if name in self._vectors:
            vid = self._vectors[name]
            vdata = [ fp.vectorDepValues(rec, vid) for rec in recrange ]
            return np.array(vdata)
        elif name in self._scalars:
            vdata = fp.metaValueMatrix(recrange, [self._scalars[name]])
            return np.array(vdata)
        else:
            raise LookupError('cannot find variable %s' % name)
       
    def get_variable_object(self,name):
        return None
    
    def missing_value(self, name):
        return float('nan')
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
899
900
901
902
        """
        
        raise IOUnimplimentedError('Unable to create variable in aeri file, this functionality is not yet available.')
        
(no author)'s avatar
(no author) committed
903
904
905
906
907
908
909
910
        return None
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
        
911
912
        raise IOUnimplimentedError('Unable to add attribute to aeri file, this functionality is not yet available.')
        
(no author)'s avatar
(no author) committed
913
        return
914
    
915
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
916
917
918
919
920
921
922
923
924
925
        """
        returns all the attributes associated with a variable name
        """
        toReturn = { }
        
        # TODO
        LOG.warn('Glance does not yet support attribute retrieval in AERI files. None will be used.')
        
        return toReturn
    
926
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
927
928
929
930
931
932
933
934
935
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
        # TODO
        LOG.warn('Glance does not yet support attribute retrieval in AERI files. None will be used.')
        
        return toReturn
(no author)'s avatar
(no author) committed
936
    
937
938
939
940
941
942
943
944
945
946
947
948
949
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
        toReturn = None
        
        # TODO
        LOG.warn('Glance does not yet support attribute retrieval in AERI files. None will be used.')
        
        return toReturn
    
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
(no author)'s avatar
(no author) committed
950
951
952
953
954
955
956
957
958
959
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
        # TODO
        LOG.warn('Glance does not yet support attribute retrieval in AERI files. None will be used.')
        
        return toReturn
960
961
962
963
964
965
966
967
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
        
        # TODO, are there any bad types for these files?
        return True
(no author)'s avatar
(no author) committed
968
969
970
971

# handle the variety of file suffixes by building aliases to aeri class
cxs = rnc = cxv = csv = spc = sum = uvs = aeri

972
973
974
975
976
977
978
979
class tiff (object):
    """wrapper for to open GeoTIFF data sets for comparison
    __call__ yields sequence of variable names
    __getitem__ returns individual variables ready for slicing to numpy arrays
    """
    
    _tiff = None
    
980
981
982
983
984
985
986
987
    GRAY_NAME  = "grayscale value"
    RED_NAME   = "red"
    GREEN_NAME = "green"
    BLUE_NAME  = "blue"
    IR_NAME    = "infrared"
    ALPHA_NAME = "alpha"
    
    
988
989
990
    # if we are using meaningful names, we will translate between
    # the band index numbers and these names (otherwise bands use generic names)
    EXPECTED_BAND_NAME_KEY = {
991
992
993
994
995
                                1: [GRAY_NAME],
                                2: [GRAY_NAME, ALPHA_NAME],
                                3: [RED_NAME, GREEN_NAME, BLUE_NAME],
                                4: [RED_NAME, GREEN_NAME, BLUE_NAME, ALPHA_NAME],
                                5: [RED_NAME, GREEN_NAME, BLUE_NAME, IR_NAME, ALPHA_NAME],
996
                             }
997
998
999
1000
    
    # a reverse look up to help disambigurate what meaningful name goes with
    # which number (one of these dictionaries will be selected based on the
    # number of bands in the geotiff)
For faster browsing, not all history is shown. View entire blame