Skip to content
Snippets Groups Projects
Commit 0db6a136 authored by (no author)'s avatar (no author)
Browse files

moved from rayg tree

git-svn-id: https://svn.ssec.wisc.edu/repos/glance/trunk@2 8a9318a1-56ba-4d59-b755-99d26321be01
parent e2e02039
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python
# encoding: utf-8
"""
Top-level routines to compare two files.
Created by rayg Apr 2009.
Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
"""
import os, sys, logging, re
from pprint import pprint, pformat
import glance.io as io
import glance.delta as delta
LOG = logging.getLogger(__name__)
def _cvt_names(namelist, epsilon, missing):
""""if variable names are of the format name:epsilon, yield name,epsilon, missing
otherwise yield name,default-epsilon,default-missing
"""
for name in namelist:
if ':' not in name:
yield name, epsilon
else:
n,e,m = name.split(':')
if not e: e = epsilon
else: e = float(e)
if not m: m = missing
else: m = float(m)
yield n, e, m
def _parse_varnames(names, terms, epsilon=0.0, missing=None):
"""filter variable names and substitute default epsilon and missing settings if none provided
returns name,epsilon,missing triples
>>> _parse_varnames( ['foo','bar', 'baz', 'zoom', 'cat'], ['f..:0.5:-999', 'ba.*:0.001', 'c.t::-9999'], 1e-7 )
set([('foo', 0.5, -999.0), ('cat', 9.9999999999999995e-08, -9999.0), ('bar', 0.001, None), ('baz', 0.001, None)])
"""
terms = [x.split(':') for x in terms]
terms = [(re.compile(x[0]).match,x[1:]) for x in terms]
def _cvt_em(eps=None, mis=None):
eps = float(eps) if eps else epsilon
mis = float(mis) if mis else missing
return eps, mis
sel = [ ((x,)+_cvt_em(*em)) for x in names for (t,em) in terms if t(x) ]
return set(sel)
def main():
import optparse
usage = """
%prog [options]
run "%prog help" to list commands
examples:
python -m glance.compare info A.hdf
python -m glance.compare stats A.hdf B.hdf '.*_prof_retr_.*:1e-4' 'nwp_._index:0'
"""
parser = optparse.OptionParser(usage)
parser.add_option('-t', '--test', dest="self_test",
action="store_true", default=False, help="run internal unit tests")
parser.add_option('-q', '--quiet', dest="quiet",
action="store_true", default=False, help="only error output")
parser.add_option('-v', '--verbose', dest="verbose",
action="store_true", default=False, help="enable more informational output")
parser.add_option('-w', '--debug', dest="debug",
action="store_true", default=False, help="enable debug output")
parser.add_option('-e', '--epsilon', dest="epsilon", type='float', default=0.0,
help="set default epsilon value for comparison threshold")
parser.add_option('-m', '--missing', dest="missing", type='float', default=None,
help="set default missing-value")
options, args = parser.parse_args()
if options.self_test:
import doctest
doctest.testmod()
sys.exit(2)
lvl = logging.WARNING
if options.debug: lvl = logging.DEBUG
elif options.verbose: lvl = logging.INFO
elif options.quiet: lvl = logging.ERROR
logging.basicConfig(level = lvl)
commands = {}
prior = None
prior = dict(locals())
def info(*args):
"""list information about a list of files
List available variables for comparison.
"""
for fn in args:
lal = list(io.open(fn)())
lal.sort()
print fn + ': ' + ('\n ' + ' '*len(fn)).join(lal)
def stats(*args):
"""create statistics summary of variables
Summarize difference statistics between listed variables.
If no variable names are given, summarize all common variables.
Variable names can be of the form varname:epsilon:missing to use non-default epsilon or missing value.
Variable names can be regular expressions, e.g. 'image.*' or '.*prof_retr.*::-999'
Either epsilon or missing can be empty to stay with default.
If _FillValue is an attribute of a variable, that will be used to find missing values where no value is given.
Run with -v to get more detailed information on statistics.
Examples:
python -m glance.compare stats hdffile1 hdffile2
python -m glance.compare stats --epsilon=0.00001 A.hdf B.hdf baseline_cmask_seviri_cloud_mask:0.002:
python -m glance.compare -w stats --epsilon=0.00001 A.hdf A.hdf imager_prof_retr_abi_total_precipitable_water_low::-999
"""
afn,bfn = args[:2]
LOG.info("opening %s" % afn)
a = io.open(afn)
LOG.info("opening %s" % bfn)
b = io.open(bfn)
anames = set(a())
bnames = set(b())
cnames = anames.intersection(bnames) # common names
pats = args[2:] or ['.*']
names = _parse_varnames( cnames, pats, options.epsilon, options.missing )
LOG.debug(str(names))
doc_each = (options.verbose or options.debug) and len(names)==1
doc_atend = (options.verbose or options.debug) and len(names)!=1
for name,epsilon,missing in names:
avar = a[name]
bvar = b[name]
if missing is None:
amiss = a.missing_value(name)
bmiss = b.missing_value(name)
else:
amiss,bmiss = missing,missing
LOG.debug('comparing %s with epsilon %s and missing %s,%s' % (name,epsilon,amiss,bmiss))
aval = avar[:]
bval = bvar[:]
print '-'*32
print name
lal = list(delta.statistics(aval,bval,epsilon,(amiss,bmiss)).items())
lal.sort()
for each in lal:
print ' %s: %s' % each
if doc_each: print(' ' + delta.STATISTICS_DOC[each[0]])
if doc_atend:
print('\n\n' + delta.STATISTICS_DOC_STR)
# def build(*args):
# """build summary
# build extended info
# """
# LOG.info("building database tables")
#
# def grant(*args):
# """grant summary
# grant extended info
# """
# LOG.info("granting permissions for tables")
#
# def index(*args):
# """index summary
# index extended info
# """
# LOG.info("creating indices for tables")
def help(command=None):
"""print help for a specific command or list of commands
e.g. help stats
"""
if command is None:
# print first line of docstring
for cmd in commands:
ds = commands[cmd].__doc__.split('\n')[0]
print "%-16s %s" % (cmd,ds)
else:
print commands[command].__doc__
# def test():
# "run tests"
# test1()
#
commands.update(dict(x for x in locals().items() if x[0] not in prior))
if (not args) or (args[0] not in commands):
parser.print_help()
help()
return 9
else:
locals()[args[0]](*args[1:])
return 0
if __name__=='__main__':
sys.exit(main())
\ No newline at end of file
#!/usr/bin/env python
# encoding: utf-8
"""
Routines to do assorted difference and comparison calculations and statistics
Created by rayg Apr 2009.
Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
"""
import os, sys, logging
from numpy import *
LOG = logging.getLogger(__name__)
def _missing(x,missing_value=None):
if missing_value is not None:
return isnan(x) | (x==missing_value)
return isnan(x)
def statistics(a, b, epsilon=0., (amiss,bmiss)=(None,None)):
"""return dictionary of similarity statistics
stats not including 'nan' in name exclude nans in either arrays
"""
shape = a.shape
aflat = a.flatten()
bflat = b.flatten()
n_a_missing = 0
n_b_missing = 0
if amiss is not None:
mvm = (amiss==aflat)
n_a_missing = mvm.sum()
aflat[mvm] = nan
if bmiss is not None:
mvm = (bmiss==bflat)
n_b_missing = mvm.sum()
bflat[mvm] = nan
dflat = bflat-aflat
a_xor_b_finite = sum(isfinite(aflat) ^ isfinite(bflat))
del aflat
del bflat
n = len(dflat)
dflat = array(dflat, float64)
a_nans = isnan(a.flatten())
b_nans = isnan(b.flatten())
fin = isfinite(dflat)
dflat = dflat[fin]
perc = dflat/a.flatten()[fin]*100.
rms = sqrt(sum(dflat*dflat)/float(len(dflat))) # should n be len(dflat) or n?
outside_epsilon = abs(dflat)>epsilon
n_o_e = outside_epsilon.sum()
return { 'mean_percent_change': perc.mean(),
'max_percent_change': abs(perc).max(),
'a_xor_b_finite_count': a_xor_b_finite,
'mean_diff': dflat.mean(),
'std_diff': std(dflat),
'max_diff': abs(dflat).max(),
'rms_diff': rms,
'finite_count': len(dflat),
'a_missing_count': n_a_missing,
'b_missing_count': n_b_missing,
'outside_epsilon_count': n_o_e,
'outside_epsilon_fraction': n_o_e / float(n),
'max_count': n,
'a_nan_count': a_nans.sum(),
'b_nan_count': b_nans.sum(),
'a_and_b_nan_count': (a_nans & b_nans).sum(),
'shape': shape
}
STATISTICS_DOC = { 'general': "Finite values are non-missing and finite (not NaN or +-Inf)",
'mean_percent_change': "Percent change from A to B for finite values, averaged",
'max_percent_change': "Percent change from A to B for finite values, maximum value",
'a_xor_b_finite_count': "number of values that changed finite-ness between A and B",
'mean_diff': "Mean difference of finite values",
'std_diff': "Stdev of difference of finite values",
'max_diff': "Maximum difference of finite values",
'rms_diff': "RMS difference of finite values",
'finite_count': "number of finite values in common between A and B",
'outside_epsilon_count': "number of finite differences falling outside epsilon",
'outside_epsilon_fraction': "fraction of values falling outside epsilon (outside_epsilon_count/max_count)",
'max_count': "number of values (cumprod(shape))",
'a_missing_count': "number of values flagged missing in A",
'b_missing_count': "number of values flagged missing in B",
'a_nan_count': "number of NaNs in A",
'b_nan_count': "number of NaNs in B",
'a_and_b_nan_count': "number of NaNs in common between A and B",
'shape': "shape of A"
}
STATISTICS_DOC_STR = '\n'.join( '%s:\n %s' % x for x in sorted(list(STATISTICS_DOC.items())) ) + '\n'
if __name__=='__main__':
import doctest
doctest.testmod()
#!/usr/bin/env python
# encoding: utf-8
"""
I/O routines supporting reading a number of file formats.
Created by rayg Apr 2009.
Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
"""
import os, sys, logging
from pyhdf.SD import SD,SDC
LOG = logging.getLogger(__name__)
class hdf(SD):
"""wrapper for HDF4 dataset for comparison
__call__ yields sequence of variable names
__getitem__ returns individual variables ready for slicing to numpy arrays
"""
def __init__(self,filename):
super(self.__class__,self).__init__(filename, SDC.READ)
def __call__(self):
"yield names of variables to be compared"
return self.datasets().keys()
def __getitem__(self, name):
return self.select(name)
def missing_value(self, name):
return getattr(self.select(name),'_FillValue',None)
class h5(object):
pass
def open(pathname):
cls = globals()[os.path.splitext(pathname)[1][1:]]
return cls(pathname)
if __name__=='__main__':
import doctest
doctest.testmod()
#!/usr/bin/env python
# encoding: utf-8
"""
Plotting routines for difference values using matplotlib
Created by rayg Apr 2009.
Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
"""
import os, sys, logging
LOG = logging.getLogger(__name__)
if __name__=='__main__':
import doctest
doctest.testmod()
#!/usr/bin/env python
# encoding: utf-8
"""
PDF/HTML report generation routines
Created by rayg Apr 2009.
Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
"""
import os, sys, logging
LOG = logging.getLogger(__name__)
if __name__=='__main__':
sys.exit(main())
\ No newline at end of file
#!/usr/bin/env python
"""
$Id: setup.py 66 2009-04-24 18:47:55Z rayg $
see http://peak.telecommunity.com/DevCenter/setuptools
distribution:
python setup.py develop --install-dir=$HOME/Library/Python
python setup.py sdist
python setup.py bdist_egg
(cd dist; rsync -Cuav * larch.ssec.wisc.edu:/home/httpd/html/eggs/repos/glance/)
use:
python setup.py install --install-dir=$HOME/Library/Python
easy_install -d $HOME/Library/Python -vi http://larch.ssec.wisc.edu/eggs/repos glance
"""
# changed to support egg distribution
from setuptools import setup, find_packages
setup( name="glance",
version="0.2.2",
zip_safe = True,
entry_points = { 'console_scripts': [ 'glance = glance.compare:main' ] },
packages = find_packages('.'),
install_requires=[ 'numpy' ]
)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment