io.py 37.6 KB
Newer Older
(no author)'s avatar
(no author) committed
1
2
3
4
5
6
7
8
9
#!/usr/bin/env python
# encoding: utf-8
"""
I/O routines supporting reading a number of file formats.

Created by rayg Apr 2009.
Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
"""

10
import os, logging
(no author)'s avatar
   
(no author) committed
11
12
13
import numpy as np

LOG = logging.getLogger(__name__)
(no author)'s avatar
(no author) committed
14

(no author)'s avatar
   
(no author) committed
15
16
17
18
19
20
21
22
23
try:
    import pyhdf
    from pyhdf.SD import SD,SDC, SDS, HDF4Error
except:
    LOG.info('no pyhdf module available for HDF4')
    pyhdf = None
    SD = SDC = SDS = object
    HDF4Error = EnvironmentError
    
24
25
try:
    import h5py
26
    from h5py import h5d
27
except ImportError:
(no author)'s avatar
   
(no author) committed
28
29
    LOG.info('no h5py module available for reading HDF5')
    h5py = None
(no author)'s avatar
(no author) committed
30

(no author)'s avatar
   
(no author) committed
31
32
33
34
35
36
37
38
39
try:    
    import pycdf
    from pycdf import CDF, NC, strerror
except:
    LOG.info('no pycdf module available')
    pycdf = None
    CDF = NC = object
    def strerror(*args):
        return 'no pycdf module installed'
(no author)'s avatar
(no author) committed
40

(no author)'s avatar
(no author) committed
41
42
43
44
45
46
47
try:
    import dmv as dmvlib
    LOG.info('loaded dmv module for AERI data file access')
except ImportError:
    LOG.info('no AERI dmv data file format module')
    dmvlib = None

(no author)'s avatar
   
(no author) committed
48
49
50
51
52
53
54
try:
    import adl_blob
    LOG.info('adl_blob module found for JPSS ADL data file access')
except ImportError:
    LOG.info('no adl_blob format handler available')
    adl_blob = None

55
UNITS_CONSTANT = "units"
(no author)'s avatar
(no author) committed
56

57
58
59
fillValConst1 = '_FillValue'
fillValConst2 = 'missing_value'

60
61
62
63
ADD_OFFSET_STR   = 'add_offset'
SCALE_FACTOR_STR = 'scale_factor'
SCALE_METHOD_STR = 'scaling_method'

64
65
66
67
68
69
70
71
72
73
74
class IOUnimplimentedError(Exception):
    """
    The exception raised when a requested io operation is not yet available.
    
        msg  -- explanation of the problem
    """
    def __init__(self, msg):
        self.msg = msg
    def __str__(self):
        return self.msg

75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
class CaseInsensitiveAttributeCache (object) :
    """
    A cache of attributes for a single file and all of it's variables.
    This cache is considered uncased, it will store all attributes it caches
    in lower case and will lower case any strings it is asked to search for
    in the cache.
    When variable or global attribute sets are not yet loaded and something
    from that part of the file is requested the cache will transparently load
    attributes from the file behind the scenes and build the cache for that
    part of the file.
    """
    
    def __init__(self, fileObject) :
        """
        set up the empty cache and hang on to the file object we'll be caching
        """
        
        self.fileToCache             = fileObject
        self.globalAttributesLower   = None
        self.variableAttributesLower = { }
    
    def _load_global_attributes_if_needed (self) :
        """
        load up the global attributes if they need to be cached
        """
        
        # load the attributes from the file if they aren't cached
        if self.globalAttributesLower is None :
            LOG.debug ("Loading file global attributes into case-insensitive cache.")
            tempAttrs                  = self.fileToCache.get_global_attributes(caseInsensitive=False)
            self.globalAttributesLower = dict((k.lower(), v) for k, v in tempAttrs.items())
    
    def _load_variable_attributes_if_needed (self, variableName) :
        """
        load up the variable attributes if they need to be cached
        """
        
        # make a lower cased version of the variable name
        tempVariableName = variableName.lower()
        
        # load the variable's attributes from the file if they aren't cached
        if tempVariableName not in self.variableAttributesLower.keys() :
            LOG.debug ("Loading attributes for variable \"" + variableName + "\" into case-insensitive cache.")
            tempAttrs = self.fileToCache.get_variable_attributes(variableName, caseInsensitive=False)
            # now if there are any attributes, make a case insensitive version
            self.variableAttributesLower[tempVariableName] = dict((k.lower(), v) for k, v in tempAttrs.items())
    
    def get_variable_attribute (self, variableName, attributeName) :
        """
        get the specified attribute for the specified variable,
        if this variable's attributes have not yet been loaded
        they will be loaded and cached
        """
        
        self._load_variable_attributes_if_needed(variableName)
        
        toReturn = None
        tempVariableName  =  variableName.lower()
        tempAttributeName = attributeName.lower()
        if (tempVariableName in self.variableAttributesLower) and (tempAttributeName in self.variableAttributesLower[tempVariableName]) :
            toReturn = self.variableAttributesLower[tempVariableName][tempAttributeName]
        else:
            LOG.debug ("Attribute \"" + attributeName + "\" was not present for variable \"" + variableName + "\".")
        
        return toReturn
    
    def get_variable_attributes (self, variableName) :
        """
        get the variable attributes for the variable name given
        """
        
        self._load_variable_attributes_if_needed(variableName)
        
        toReturn = self.variableAttributesLower[variableName.lower()] if (variableName.lower() in self.variableAttributesLower) else None
        
        return toReturn
    
    def get_global_attribute (self, attributeName) :
        """
        get a global attribute with the given name
        """
        
        self._load_global_attributes_if_needed()
        
        toReturn = self.globalAttributesLower[attributeName.lower()] if (attributeName.lower() in self.globalAttributesLower) else None
        
        return toReturn
    
    def get_global_attributes (self) :
        """
        get the global attributes,
        """
        
        self._load_global_attributes_if_needed()
        
        toReturn = self.globalAttributesLower
        
        return toReturn

class hdf (object):
(no author)'s avatar
(no author) committed
175
176
177
178
179
    """wrapper for HDF4 dataset for comparison
    __call__ yields sequence of variable names
    __getitem__ returns individual variables ready for slicing to numpy arrays
    """
    
180
181
    _hdf = None
    
182
    def __init__(self, filename, allowWrite=False):
183
        
(no author)'s avatar
   
(no author) committed
184
185
186
        if pyhdf is None:
            LOG.error('pyhdf is not installed and is needed in order to read hdf4 files')
            assert(pyhdf is not None)
187
188
189
        mode = SDC.READ
        if allowWrite:
            mode = mode | SDC.WRITE
190
191
192
        
        self._hdf = SD(filename, mode)
        self.attributeCache = CaseInsensitiveAttributeCache(self)
(no author)'s avatar
(no author) committed
193
194
195

    def __call__(self):
        "yield names of variables to be compared"
196
        return self._hdf.datasets().keys()
(no author)'s avatar
(no author) committed
197
    
198
199
200
201
    # this returns a numpy array with a copy of the full, scaled
    # data for this variable, if the data type must be changed to allow
    # for scaling it will be (so the return type may not reflect the
    # type found in the original file)
(no author)'s avatar
(no author) committed
202
    def __getitem__(self, name):
203
204
205
        # defaults
        scale_factor = 1.0
        add_offset = 0.0
206
        data_type = None 
(no author)'s avatar
(no author) committed
207
        scaling_method = None
208
209
210
211
212
213
214
215
216
        
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
        raw_data_copy = variable_object[:]
        try :
            # TODO, this currently won't work with geocat data, work around it for now
            scale_factor, scale_factor_error, add_offset, add_offset_error, data_type = SDS.getcal(variable_object)
        except HDF4Error:
217
218
219
220
            # load just the scale factor and add offset information by hand
            temp = self.attributeCache.get_variable_attributes(name)
            if ADD_OFFSET_STR in temp.keys() :
                add_offset = temp[ADD_OFFSET_STR]
221
                data_type = np.dtype(type(add_offset))
222
223
            if SCALE_FACTOR_STR in temp.keys() :
                scale_factor = temp[SCALE_FACTOR_STR]
224
                data_type = np.dtype(type(scale_factor))
225
226
            if SCALE_METHOD_STR in temp.keys() :
                scaling_method = temp[SCALE_METHOD_STR]
227
        SDS.endaccess(variable_object)
(no author)'s avatar
(no author) committed
228
        
229
230
231
232
        # don't do lots of work if we don't need to scale things
        if (scale_factor == 1.0) and (add_offset == 0.0) :
            return raw_data_copy
        
233
234
235
236
237
238
239
240
241
242
        # at the moment geocat has several scaling methods that don't match the normal standards for hdf
        """
        please see constant.f90 for a more up to date version of this information:
            INTEGER(kind=int1) :: NO_SCALE              ! 0
            INTEGER(kind=int1) :: LINEAR_SCALE          ! 1
            INTEGER(kind=int1) :: LOG_SCALE             ! 2
            INTEGER(kind=int1) :: SQRT_SCALE            ! 3 
        """
        if (scaling_method == 0) :
            return raw_data_copy
243
        if not ((scaling_method is None) or (int(scaling_method) <= 1)) :
244
245
            LOG.warn ('Scaling method of \"' + str(scaling_method) + '\" will be ignored in favor of hdf standard method. '
                      + 'This may cause problems with data consistency')
246
        
247
248
249
        # if we don't have a data type something strange has gone wrong
        assert(not (data_type is None))
        
250
251
        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
252
253
        missing_mask = np.zeros(raw_data_copy.shape, dtype=np.bool)
        missing_mask[raw_data_copy == missing_val] = True
254
        
255
        # create the scaled version of the data
256
        scaled_data_copy                = np.array(raw_data_copy, dtype=data_type)
257
        scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues?
258
259
260
261
        
        return scaled_data_copy 
    
    def get_variable_object(self, name):
262
        return self._hdf.select(name)
263
    
(no author)'s avatar
(no author) committed
264
    def missing_value(self, name):
265
        
266
        return self.get_attribute(name, fillValConst1)
267
268
269
270
271
272
273
274
275
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
(no author)'s avatar
(no author) committed
276
        
277
        raise IOUnimplimentedError('Unable to create variable in hdf file, this functionality is not yet available.')
278
279
280
281
282
283
284
285
        
        return None
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
286
287
        
        raise IOUnimplimentedError('Unable add attribute to hdf file, this functionality is not yet available.')
288
289
        
        return
290
    
291
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
292
293
294
295
        """
        returns all the attributes associated with a variable name
        """
        
296
297
298
299
300
301
302
        toReturn = None
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
            toReturn = self.get_variable_object(variableName).attributes()
        
        return toReturn
303
    
304
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
305
306
307
308
309
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
310
311
312
313
314
315
316
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attributes = self.get_variable_attributes(variableName, caseInsensitive=False)
            
            if attributeName in temp_attributes :
                toReturn = temp_attributes[attributeName]
317
318
        
        return toReturn
(no author)'s avatar
(no author) committed
319
    
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
        toReturn = None
        
        if caseInsensitive :
            self.attributeCache.get_global_attributes()
        else :
            toReturn = self._hdf.attributes()
        
        return toReturn
    
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
(no author)'s avatar
(no author) committed
335
336
337
338
339
340
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
341
342
343
344
345
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
            if attributeName in self._hdf.attributes() :
                toReturn = self._hdf.attributes()[attributeName]
(no author)'s avatar
(no author) committed
346
347
        
        return toReturn
(no author)'s avatar
(no author) committed
348

349
class nc (object):
(no author)'s avatar
(no author) committed
350
351
352
353
354
    """wrapper for NetCDF3/4/opendap dataset for comparison
    __call__ yields sequence of variable names
    __getitem__ returns individual variables ready for slicing to numpy arrays
    """
    
355
356
    _nc = None
    
357
358
    def __init__(self, filename, allowWrite=False):
        
(no author)'s avatar
   
(no author) committed
359
360
361
362
        if pycdf is None:
            LOG.error('pycdf is not installed and is needed in order to read NetCDF files')
            assert(pycdf is not None)
        
363
364
365
366
        mode = NC.NOWRITE
        if allowWrite :
            mode = NC.WRITE
        
367
368
        self._nc = CDF(filename, mode)
        self.attributeCache = CaseInsensitiveAttributeCache(self)
369

(no author)'s avatar
(no author) committed
370
371
    def __call__(self):
        "yield names of variables to be compared"
372
        return self._nc.variables().keys()
(no author)'s avatar
(no author) committed
373
    
374
375
376
377
    # this returns a numpy array with a copy of the full, scaled
    # data for this variable, if the data type must be changed to allow
    # for scaling it will be (so the return type may not reflect the
    # type found in the original file)
(no author)'s avatar
(no author) committed
378
    def __getitem__(self, name):
379
380
381
382
383
384
385
386
387
388
        # defaults
        scale_factor = 1.0
        add_offset = 0.0
        data_type = np.float32 # TODO temporary
        
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
        raw_data_copy = variable_object[:]
        # load the scale factor and add offset
389
390
391
392
393
394
        
        temp = self.attributeCache.get_variable_attributes(name)
        if SCALE_FACTOR_STR in temp.keys() :
            scale_factor = temp[SCALE_FACTOR_STR]
        if ADD_OFFSET_STR in temp.keys() :
            add_offset = temp[ADD_OFFSET_STR]
395
396
397
398
399
        # todo, does cdf have an equivalent of endaccess to close the variable?
        
        # don't do lots of work if we don't need to scale things
        if (scale_factor == 1.0) and (add_offset == 0.0) :
            return raw_data_copy
(no author)'s avatar
(no author) committed
400
        
401
402
        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
403
404
        missing_mask = np.zeros(raw_data_copy.shape, dtype=np.bool)
        missing_mask[raw_data_copy == missing_val] = True
405
        
406
407
        # create the scaled version of the data
        scaled_data_copy = np.array(raw_data_copy, dtype=data_type)
408
        scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues?
409
410
411
412
        
        return scaled_data_copy 
    
    def get_variable_object(self, name):
413
        return self._nc.var(name)
414
    
(no author)'s avatar
(no author) committed
415
    def missing_value(self, name):
416
        
417
418
419
420
421
422
423
424
425
426
427
428
        toReturn = None
        
        temp = self.attributeCache.get_variable_attribute(name, fillValConst1)
        if temp is not None :
            toReturn = temp
        else :
            temp = self.attributeCache.get_variable_attribute(name, fillValConst2)
            if temp is not None :
                toReturn = temp
        
        """ todo, why was the getattr method being used with 3 params? I can't find this documented anywhere...
        variable_object = self._nc.var(name)
429
430
        
        to_return = None
431
        if hasattr(variable_object, fillValConst1) \
432
           or \
433
434
435
           hasattr(variable_object, fillValConst2) :
            to_return = getattr(variable_object, fillValConst1,
                                getattr(variable_object, fillValConst2, None))
436
        """
437
        
438
        return toReturn
439
440
441
442
443
444
445
446
447
448
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
        
449
        self._nc.redef()
450
451
        
        # if the variable already exists, stop with a warning
452
        if variablename in self._nc.variables().keys() :
453
454
455
            LOG.warn("New variable name requested (" + variablename + ") is already present in file. " +
                     "Skipping generation of new variable.")
            return None
456
457
458
459
460
        # if we have no data we won't be able to determine the data type to create the variable
        if (data is None) or (len(data) <= 0) :
            LOG.warn("Data type for new variable (" + variablename + ") could not be determined. " +
                     "Skipping generation of new variable.")
            return None
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
        
        dataType = None
        if np.issubdtype(data.dtype, int) :
            dataType = NC.INT
            #print("Picked INT")
        # TODO, at the moment the fill type is forcing me to use a double, when sometimes I want a float
        #elif np.issubdtype(data.dtype, np.float32) :
        #    dataType = NC.FLOAT
        #    print("Picked FLOAT")
        elif np.issubdtype(data.dtype, float) :
            dataType = NC.DOUBLE
            #print("Picked DOUBLE")
        # what do we do if it's some other type?
        
        # create and set all the dimensions
        dimensions = [ ]
        dimensionNum = 0
        for dimSize in data.shape :
479
            dimensions.append(self._nc.def_dim(variablename + '-index' + str(dimensionNum), dimSize))
480
481
482
            dimensionNum = dimensionNum + 1
        
        # create the new variable
483
484
485
        #print('variable name: ' + variablename)
        #print('data type:     ' + str(dataType))
        #print('dimensions:    ' + str(dimensions))
486
        newVariable = self._nc.def_var(variablename, dataType, tuple(dimensions))
487
488
489
490
491
492
493
494
495
496
497
498
        
        # if a missing value was given, use that
        if missingvalue is not None :
            newVariable._FillValue = missingvalue
        
        # if we have a variable to copy attributes from, do so
        if variabletocopyattributesfrom is not None :
            tocopyfrom = self.get_variable_object(variabletocopyattributesfrom)
            attributes = tocopyfrom.attributes()
            for attribute in attributes.keys() :
                newVariable.__setattr__(attribute, attributes[attribute])
        
499
        self._nc.enddef()
500
501
502
503
504
505
        
        # if data was given, use that
        if data is not None :
            newVariable.put(data.tolist()) 
        
        return newVariable
506
507
508
509
510
511
512
513
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
        variableObject = self.get_variable_object(variableName)
        
514
        self._nc.redef()
515
516
        
        variableObject.__setattr__(newAttributeName, newAttributeValue)
517
518
519
        # TODO, this will cause our attribute cache to be wrong!
        # TODO, for now, brute force clear the cache
        self.attributeCache = CaseInsensitiveAttributeCache(self)
520
        
521
        self._nc.enddef()
522
523
        
        return
524
    
525
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
526
527
528
529
        """
        returns all the attributes associated with a variable name
        """
        
530
531
532
533
534
535
536
537
        toReturn = None
        
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
            toReturn = self.get_variable_object(variableName).attributes()
        
        return toReturn
538
    
539
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
540
541
542
543
544
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attributes = self.get_variable_attributes(variableName, caseInsensitive=False)
            
            if attributeName in temp_attributes :
                toReturn = temp_attributes[attributeName]
        
        return toReturn
    
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
        toReturn = None
561
        
562
563
564
565
        if caseInsensitive :
            self.attributeCache.get_global_attributes()
        else :
            toReturn = self._nc.attributes()
566
567
        
        return toReturn
568
    
569
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
570
571
572
573
574
575
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
576
577
578
579
580
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
            if attributeName in self._nc.attributes() :
                toReturn = self._nc.attributes()[attributeName]
581
582
        
        return toReturn
583

(no author)'s avatar
(no author) committed
584
585
586
nc4 = nc
cdf = nc

587
588
# TODO remove
#FIXME_IDPS = [ '/All_Data/CrIS-SDR_All/ES' + ri + band for ri in ['Real','Imaginary'] for band in ['LW','MW','SW'] ] 
589

(no author)'s avatar
(no author) committed
590
class h5(object):
591
592
593
594
    """wrapper for HDF5 datasets
    """
    _h5 = None
    
595
    def __init__(self, filename, allowWrite=False):
596
597
        self.attributeCache = CaseInsensitiveAttributeCache(self)
        
598
599
600
        mode = 'r'
        if allowWrite :
            mode = 'r+'
(no author)'s avatar
   
(no author) committed
601
602
603
        if h5py is None:
            LOG.error('h5py module is not installed and is needed in order to read h5 files')
            assert(h5py is not None)
604
        self._h5 = h5py.File(filename, mode)
605
606
    
    def __call__(self):
607
608
609
610
        
        variableList = [ ]
        def testFn (name, obj) :
            #print ('checking name: ' + name)
611
            #print ('object: ' + str(obj))
612
613
614
615
            
            if isinstance(obj, h5py.Dataset) :
                try :
                    tempType = obj.dtype # this is required to provoke a type error for closed data sets
616
                    
617
                    #LOG.debug ('type: ' + str(tempType))
618
619
620
621
622
623
624
625
626
627
                    variableList.append(name)
                except TypeError :
                    LOG.debug('TypeError prevents the use of variable ' + name
                              + '. This variable will be ignored')
        
        self._h5.visititems(testFn)
        
        LOG.debug('variables from visiting h5 file structure: ' + str(variableList))
        
        return(variableList)
628
629
630
631
632
    
    @staticmethod
    def trav(h5,pth): 
        return reduce( lambda x,a: x[a] if a else x, pth.split('/'), h5)
        
633
634
635
636
637
    # this returns a numpy array with a copy of the full, scaled
    # data for this variable, if the data type must be changed to allow
    # for scaling it will be (so the return type may not reflect the
    # type found in the original file)
    def __getitem__(self, name):
638
        
639
640
641
642
643
644
645
646
647
        # defaults
        scale_factor = 1.0
        add_offset = 0.0
        data_type = np.float32 # TODO temporary
        
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
        raw_data_copy = variable_object[:]
648
649
650
651
652
        
        #print ('*************************')
        #print (dir (variable_object.id)) # TODO, is there a way to get the scale and offset through this?
        #print ('*************************')
        
653
        # load the scale factor and add offset
654
655
656
657
658
        temp = self.attributeCache.get_variable_attributes(name)
        if (SCALE_FACTOR_STR in temp.keys()) :
            scale_factor = temp[SCALE_FACTOR_STR]
        if (ADD_OFFSET_STR in temp.keys()) :
            add_offset = temp[ADD_OFFSET_STR]
659
660
661
662
663
664
        # todo, does cdf have an equivalent of endaccess to close the variable?
        
        # don't do lots of work if we don't need to scale things
        if (scale_factor == 1.0) and (add_offset == 0.0) :
            return raw_data_copy
        
665
666
        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
667
668
        missing_mask = np.zeros(raw_data_copy.shape, dtype=np.bool)
        missing_mask[raw_data_copy == missing_val] = True
669
        
670
671
        # create the scaled version of the data
        scaled_data_copy = np.array(raw_data_copy, dtype=data_type)
672
        scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues?
673
674
675
676
        
        return scaled_data_copy
    
    def get_variable_object(self,name):
677
678
679
        return h5.trav(self._h5, name)
    
    def missing_value(self, name):
680
681
682
683
684
685
686
687
688
689
690
691
692
        
        toReturn = None
        
        # get the missing value if it has been set
        variableObject = self.get_variable_object(name)
        pListObj = variableObject.id.get_create_plist()
        fillValueStatus = pListObj.fill_value_defined()
        if (h5d.FILL_VALUE_DEFAULT is fillValueStatus) or (h5d.FILL_VALUE_USER_DEFINED is fillValueStatus) :
            temp = np.array((1), dtype=variableObject.dtype)
            pListObj.get_fill_value(temp)
            toReturn = temp
        
        return toReturn
693
694
695
696
697
698
699
700
701
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
702
        
703
        raise IOUnimplimentedError('Unable to create variable in hdf 5 file, this functionality is not yet available.')
704
705
706
707
708
709
710
711
        
        return None
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
712
713
        
        raise IOUnimplimentedError('Unable to add attribute to hdf 5 file, this functionality is not yet available.')
714
715
        
        return
716
    
717
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
718
719
720
721
        """
        returns all the attributes associated with a variable name
        """
        
722
723
724
725
726
727
728
729
        toReturn = None
        
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
            toReturn = self.get_variable_object(variableName).attrs
        
        return toReturn
730
    
731
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
732
733
734
735
736
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attrs = self.get_variable_attributes(variableName, caseInsensitive=False)
            
            if (attributeName in temp_attrs) :
                toReturn = temp_attrs[attributeName]
        
        return toReturn
    
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
        toReturn = None
753
        
754
755
756
757
        if caseInsensitive :
            self.attributeCache.get_global_attributes()
        else :
            toReturn = self._h5.attrs
758
759
        
        return toReturn
(no author)'s avatar
(no author) committed
760
    
761
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
(no author)'s avatar
(no author) committed
762
763
764
765
766
767
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
768
769
770
771
772
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
            if attributeName in self._h5.attrs :
                toReturn = self._h5.attrs[attributeName]
(no author)'s avatar
(no author) committed
773
774
        
        return toReturn
(no author)'s avatar
(no author) committed
775

(no author)'s avatar
(no author) committed
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845

class aeri(object):
    """wrapper for AERI RNC/SUM/CXS/etc datasets
    """
    _dmv = None
    _vectors = { }
    _scalars = { }
    
    @staticmethod
    def _meta_mapping(fp):
        ids = fp.metaIDs()
        names = [fp.queryMetaDescString(1, id_, fp.SHORTNAME) for id_ in ids]
        assert len(ids) == len(names)
        return (dict((n, i) for n, i in zip(names, ids)))
    
    def _inventory(self):
        fp = self._dmv
        assert(fp is not None)
        # get list of vectors and scalars
        self._vectors = dict( (fp.queryVectorDescString(n,fp.SHORTNAME), n) for n in fp.vectorIDs() )
        self._scalars = self._meta_mapping(fp)

    def __init__(self, filename, allowWrite=False):
        assert(allowWrite==False)
        if dmvlib is None:
            LOG.error('cannot open AERI files without dmv module being available')
            return
        self._dmv = dmvlib.dmv()
        rc = self._dmv.openFile(filename)
        if rc!=0:
            LOG.error("unable to open file, rc=%d" % rc)
            self._dmv = None        
        else:
            self._inventory()
    
    def __call__(self):
        return list(self._vectors.keys()) + list(self._scalars.keys())
        
    def __getitem__(self, name):
        fp = self._dmv
        assert(fp is not None)
        if 'DMV_RECORDS' in os.environ:
            nrecs = int(os.environ['DMV_RECORDS'])
            LOG.warning('overriding dmv record count to %d' % nrecs)
        else:
            nrecs = self._dmv.recordCount()
        recrange = range(1, nrecs+1)
        if name in self._vectors:
            vid = self._vectors[name]
            vdata = [ fp.vectorDepValues(rec, vid) for rec in recrange ]
            return np.array(vdata)
        elif name in self._scalars:
            vdata = fp.metaValueMatrix(recrange, [self._scalars[name]])
            return np.array(vdata)
        else:
            raise LookupError('cannot find variable %s' % name)
       
    def get_variable_object(self,name):
        return None
    
    def missing_value(self, name):
        return float('nan')
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
846
847
848
849
        """
        
        raise IOUnimplimentedError('Unable to create variable in aeri file, this functionality is not yet available.')
        
(no author)'s avatar
(no author) committed
850
851
852
853
854
855
856
857
        return None
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
        
858
859
        raise IOUnimplimentedError('Unable to add attribute to aeri file, this functionality is not yet available.')
        
(no author)'s avatar
(no author) committed
860
        return
861
    
862
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
863
864
865
866
867
868
869
870
871
872
        """
        returns all the attributes associated with a variable name
        """
        toReturn = { }
        
        # TODO
        LOG.warn('Glance does not yet support attribute retrieval in AERI files. None will be used.')
        
        return toReturn
    
873
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
874
875
876
877
878
879
880
881
882
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
        # TODO
        LOG.warn('Glance does not yet support attribute retrieval in AERI files. None will be used.')
        
        return toReturn
(no author)'s avatar
(no author) committed
883
    
884
885
886
887
888
889
890
891
892
893
894
895
896
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
        toReturn = None
        
        # TODO
        LOG.warn('Glance does not yet support attribute retrieval in AERI files. None will be used.')
        
        return toReturn
    
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
(no author)'s avatar
(no author) committed
897
898
899
900
901
902
903
904
905
906
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
        # TODO
        LOG.warn('Glance does not yet support attribute retrieval in AERI files. None will be used.')
        
        return toReturn
(no author)'s avatar
(no author) committed
907
908
909
910
911

# handle the variety of file suffixes by building aliases to aeri class
cxs = rnc = cxv = csv = spc = sum = uvs = aeri


(no author)'s avatar
   
(no author) committed
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
def _search_xml(pathname):
    xs = '.xml'
    yield pathname + xs
    yield os.path.splitext(pathname)[0] + xs
    yield pathname.replace('-', '_') + xs
    yield os.path.splitext(pathname)[0].replace('-', '_') + xs

class jpss_adl(object):
    """wrapper for JPSS ADL BLOBs 
    This is a somewhat unique case in that the BLOB loader requires both an XML path and a BLOB path.
    In this case, it is assumed that a softlinked pathname.xml exists for a given pathname.
    FORMAT=jpss_adl glance stats truth/ATMS-FSDR.BE ATMS-FSDR
    """
    _blob = None

    def __init__(self, filename, allowWrite=False):
        assert(allowWrite==False)
        for xmlname in _search_xml(filename):
            if not os.path.exists(xmlname): 
                continue
            LOG.info('using %s for %s' % (xmlname, filename))
            break
        if not os.path.exists(xmlname):
            LOG.error(xmlname + ' needs to provide layout for ' + filename)
            return            
        if adl_blob is None:
            LOG.error('cannot open JPSS ADL files without adl_blob module in $PYTHONPATH')
            return
        if filename.lower().endswith('.be'):
            endian = adl_blob.BIG_ENDIAN
        elif filename.lower().endswith('.be'):
            endian = adl_blob.LITTLE_ENDIAN
        else:
            endian = adl_blob.NATIVE_ENDIAN
        LOG.debug('endianness of %s is %s' % (filename, endian))
        self._blob = adl_blob.map(xmlname, filename, writable=False, endian=endian)        
    
    def __call__(self):
        fieldnames = [name for name,field in self._blob._fields_]
        return fieldnames
        
    def __getitem__(self, name):
        field = getattr(self._blob, name)
        if not hasattr(field,'_length_'): # FUTURE: is this rigorous? 
            LOG.info('creating numpy array out of singleton value for %s' % name)
            return np.array([field])
        return np.array(field)
       
    def get_variable_object(self,name):
        return None
    
    def missing_value(self, name):
        return float('nan')
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
        
        raise IOUnimplimentedError('Unable to create variable in JPSS ADL file, this functionality is not yet available.')
        
        return None
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
        
        raise IOUnimplimentedError('Unable to add attribute to JPSS ADL file, this functionality is not yet available.')
        
        return
    
989
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
990
991
992
993
994
995
996
997
998
999
        """
        returns all the attributes associated with a variable name
        """
        toReturn = { }
        
        # TODO
        LOG.warn('Glance does not yet support attribute retrieval in JPSS ADL files. None will be used.')
        
        return toReturn
    
1000
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
For faster browsing, not all history is shown. View entire blame