Skip to content
Snippets Groups Projects
Commit d807a36c authored by (no author)'s avatar (no author)
Browse files

work on correlation and grafiir comparisons

git-svn-id: https://svn.ssec.wisc.edu/repos/glance/trunk@5 8a9318a1-56ba-4d59-b755-99d26321be01
parent e61d252f
No related branches found
No related tags found
No related merge requests found
......@@ -120,7 +120,46 @@ python -m glance.compare stats A.hdf B.hdf '.*_prof_retr_.*:1e-4' 'nwp_._index:0
bspc = get(b)
plot.compare_spectra(bspc,aspc)
plot.show()
def noisecheck(*args):
"""gives statistics for dataset comparisons against truth with and without noise
usage: noisecheck truth-file noise-file actual-file variable1{:epsilon{:missing}} {variable2...}
glance noisecheck /Volumes/snaapy/data/justins/abi_graffir/coreg/pure/l2_data/geocatL2.GOES-R.2005155.220000.hdf.gz /Volumes/snaapy/data/justins/abi_graffir/noise/noise1x/l2_data/geocatL2.GOES-R.2005155.220000.hdf
"""
afn,noizfn,bfn = args[:3]
LOG.info("opening truth file %s" % afn)
a = io.open(afn)
LOG.info("opening actual file %s" % noizfn)
noiz = io.open(noizfn)
LOG.info("opening noise file %s" % bfn)
b = io.open(bfn)
anames = set(a())
bnames = set(b())
cnames = anames.intersection(bnames) # common names
pats = args[3:] or ['.*']
names = _parse_varnames( cnames, pats, options.epsilon, options.missing )
for name,epsilon,missing in names:
avar = a[name]
bvar = b[name]
nvar = noiz[name]
if missing is None:
amiss = a.missing_value(name)
bmiss = b.missing_value(name)
else:
amiss,bmiss = missing,missing
x = avar[:]
y = bvar[:]
z = nvar[:]
def scat(x,xn,y):
from pylab import plot,show,scatter
scatter(x,y)
show()
nfo = delta.rms_corr_withnoise(x,y,z,epsilon,(amiss,bmiss),plot=scat)
print '-'*32
print name
for kv in sorted(nfo.items()):
print ' %s: %s' % kv
def stats(*args):
"""create statistics summary of variables
......
......@@ -9,6 +9,9 @@ Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
import os, sys, logging
from numpy import *
from scipy.stats import pearsonr, spearmanr, pointbiserialr
compute_r = pointbiserialr
LOG = logging.getLogger(__name__)
......@@ -44,6 +47,48 @@ def diff(a, b, epsilon=0., (amissing,bmissing)=(None,None)):
trouble = (anfin ^ bnfin) | (amis ^ bmis) | (abs(d)>epsilon)
return d, mask, trouble, (anfin, bnfin), (amis, bmis)
def corr(x,y,mask):
"compute correlation coefficient"
gf = mask.flatten()
xf = x.flatten()[gf]
yf = y.flatten()[gf]
assert(sum(~isfinite(yf))==0)
assert(sum(~isfinite(xf))==0)
return compute_r(xf,yf)[0]
def rms_corr_withnoise(truth, actual, noiz, epsilon=0., (amissing,bmissing)=(None,None), plot=None):
""" compute RMS and R statistics for truth vs actual and truth+noiz vs actual
"""
x=truth
y=actual
z=noiz
d,good,bad,_,_ = diff(x,y,epsilon,(amissing,bmissing))
# compute RMS error
rmse = sqrt(sum(d[good]**2)) / d.size
gf = good.flatten()
# raise NotImplementedError # FIXME we're getting NaN for R
xf = x.flatten()[gf]
yf = y.flatten()[gf]
assert(sum(~isfinite(yf))==0)
assert(sum(~isfinite(xf))==0)
# compute correlation coefficient
r = compute_r(xf,yf)[0]
# create xpn, x plus noise
xpn = array(x)
xpn[good] += z[good]
xpnf = xpn.flatten()[gf]
# compute RMS error versus noise
dpn,good,bad,_,_ = diff(xpn,y,epsilon,(amissing,bmissing))
rmsepn = sqrt(sum(dpn[good]**2)) / d.size
assert(sum(~isfinite(xpnf))==0)
rpn = compute_r(xpnf,yf)[0]
if plot: plot(xf,xpnf,yf)
return { 'rms_error': rmse,
'correlation': r,
'rms_error_with_noise': rmsepn,
'correlation_with_noise': rpn,
}
def stats(dif, mask, bad, *etc):
rms = sum(abs(dif[mask] ** 2)) / dif.size
return { 'rms_diff': rms,
......@@ -52,7 +97,6 @@ def stats(dif, mask, bad, *etc):
'median_diff': median(dif[mask])
}
def summarize(a, b, epsilon=0., (amiss,bmiss)=(None,None)):
"""return dictionary of similarity statistics
stats not including 'nan' in name exclude nans in either arrays
......@@ -64,6 +108,7 @@ def summarize(a, b, epsilon=0., (amiss,bmiss)=(None,None)):
a_nans = isnan(a)
b_nans = isnan(b)
n = a.size
r = corr(a,b,mask)
out = { 'a_xor_b_finite_count': a_xor_b_finite,
'finite_count': sum(mask),
......@@ -75,7 +120,8 @@ def summarize(a, b, epsilon=0., (amiss,bmiss)=(None,None)):
'a_nan_count': sum(a_nans),
'b_nan_count': sum(b_nans),
'a_and_b_nan_count': sum(a_nans & b_nans),
'shape': a.shape
'shape': a.shape,
'correlation': r
}
out.update(stadic)
return out
......
......@@ -37,7 +37,7 @@ class hdf(SD):
return getattr(self.select(name),'_FillValue',None)
FIXME_IDPS = [ '/All_Data/CrIS-SDR_All/ES' + ri + band for ri in ['Real','Imag'] for band in ['LW','MW','SW'] ]
FIXME_IDPS = [ '/All_Data/CrIS-SDR_All/ES' + ri + band for ri in ['Real','Imaginary'] for band in ['LW','MW','SW'] ]
class h5(object):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment