work on correlation and grafiir comparisons

git-svn-id: https://svn.ssec.wisc.edu/repos/glance/trunk@5 8a9318a1-56ba-4d59-b755-99d26321be01

work on correlation and grafiir comparisons
git-svn-id: https://svn.ssec.wisc.edu/repos/glance/trunk@5 8a9318a1-56ba-4d59-b755-99d26321be01
d807a36c · (no author) · e61d252f · d807a36c · d807a36c · d807a36c
Commit d807a36c authored 15 years ago by (no author)
--- a/pyglance/glance/compare.py
+++ b/pyglance/glance/compare.py
@@ -120,7 +120,46 @@ python -m glance.compare stats A.hdf B.hdf '.*_prof_retr_.*:1e-4' 'nwp_._index:0
        bspc = get(b)
        plot.compare_spectra(bspc,aspc)
        plot.show()
+    
+    def noisecheck(*args):
+        """gives statistics for dataset comparisons against truth with and without noise
+        usage: noisecheck truth-file noise-file actual-file variable1{:epsilon{:missing}} {variable2...}
+        glance noisecheck /Volumes/snaapy/data/justins/abi_graffir/coreg/pure/l2_data/geocatL2.GOES-R.2005155.220000.hdf.gz /Volumes/snaapy/data/justins/abi_graffir/noise/noise1x/l2_data/geocatL2.GOES-R.2005155.220000.hdf 
+        """
+        afn,noizfn,bfn = args[:3]
+        LOG.info("opening truth file %s" % afn)
+        a = io.open(afn)
+        LOG.info("opening actual file %s" % noizfn)
+        noiz = io.open(noizfn)
+        LOG.info("opening noise file %s" % bfn)
+        b = io.open(bfn)
        
+        anames = set(a())
+        bnames = set(b()) 
+        cnames = anames.intersection(bnames) # common names
+        pats = args[3:] or ['.*']
+        names = _parse_varnames( cnames, pats, options.epsilon, options.missing )
+        for name,epsilon,missing in names:
+            avar = a[name]
+            bvar = b[name]
+            nvar = noiz[name]
+            if missing is None:
+                amiss = a.missing_value(name)
+                bmiss = b.missing_value(name)
+            else:
+                amiss,bmiss = missing,missing
+            x = avar[:]
+            y = bvar[:]
+            z = nvar[:]
+            def scat(x,xn,y):
+                from pylab import plot,show,scatter
+                scatter(x,y)
+                show()
+            nfo = delta.rms_corr_withnoise(x,y,z,epsilon,(amiss,bmiss),plot=scat)
+            print '-'*32
+            print name
+            for kv in sorted(nfo.items()):
+                print '  %s: %s' % kv
    
    def stats(*args):
        """create statistics summary of variables

--- a/pyglance/glance/delta.py
+++ b/pyglance/glance/delta.py
@@ -9,6 +9,9 @@ Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.

 import os, sys, logging
 from numpy import *
+from scipy.stats import pearsonr, spearmanr, pointbiserialr
+
+compute_r = pointbiserialr

 LOG = logging.getLogger(__name__)

@@ -44,6 +47,48 @@ def diff(a, b, epsilon=0., (amissing,bmissing)=(None,None)):
    trouble = (anfin ^ bnfin) | (amis ^ bmis) | (abs(d)>epsilon)
    return d, mask, trouble, (anfin, bnfin), (amis, bmis)

+def corr(x,y,mask):
+    "compute correlation coefficient"
+    gf = mask.flatten()
+    xf = x.flatten()[gf]
+    yf = y.flatten()[gf]
+    assert(sum(~isfinite(yf))==0)
+    assert(sum(~isfinite(xf))==0)
+    return compute_r(xf,yf)[0]
+
+def rms_corr_withnoise(truth, actual, noiz, epsilon=0., (amissing,bmissing)=(None,None), plot=None):
+    """ compute RMS and R statistics for truth vs actual and truth+noiz vs actual
+    """
+    x=truth
+    y=actual
+    z=noiz
+    d,good,bad,_,_ = diff(x,y,epsilon,(amissing,bmissing))
+    # compute RMS error
+    rmse = sqrt(sum(d[good]**2)) / d.size
+    gf = good.flatten()
+    # raise NotImplementedError # FIXME we're getting NaN for R
+    xf = x.flatten()[gf]
+    yf = y.flatten()[gf]
+    assert(sum(~isfinite(yf))==0)
+    assert(sum(~isfinite(xf))==0)
+    # compute correlation coefficient
+    r = compute_r(xf,yf)[0]
+    # create xpn, x plus noise
+    xpn = array(x)
+    xpn[good] += z[good]
+    xpnf = xpn.flatten()[gf]
+    # compute RMS error versus noise
+    dpn,good,bad,_,_ = diff(xpn,y,epsilon,(amissing,bmissing))
+    rmsepn = sqrt(sum(dpn[good]**2)) / d.size
+    assert(sum(~isfinite(xpnf))==0)
+    rpn = compute_r(xpnf,yf)[0]
+    if plot: plot(xf,xpnf,yf)
+    return { 'rms_error': rmse,
+             'correlation': r,
+             'rms_error_with_noise': rmsepn,
+             'correlation_with_noise': rpn,
+             }
+
 def stats(dif, mask, bad, *etc):
    rms = sum(abs(dif[mask] ** 2)) / dif.size    
    return {    'rms_diff': rms, 
@@ -52,7 +97,6 @@ def stats(dif, mask, bad, *etc):
                'median_diff': median(dif[mask]) 
                }

-
 def summarize(a, b, epsilon=0., (amiss,bmiss)=(None,None)):
    """return dictionary of similarity statistics
    stats not including 'nan' in name exclude nans in either arrays
@@ -64,6 +108,7 @@ def summarize(a, b, epsilon=0., (amiss,bmiss)=(None,None)):
    a_nans = isnan(a)
    b_nans = isnan(b)
    n = a.size
+    r = corr(a,b,mask)
        
    out = { 'a_xor_b_finite_count': a_xor_b_finite,
             'finite_count': sum(mask),
@@ -75,7 +120,8 @@ def summarize(a, b, epsilon=0., (amiss,bmiss)=(None,None)):
             'a_nan_count': sum(a_nans),
             'b_nan_count': sum(b_nans),
             'a_and_b_nan_count': sum(a_nans & b_nans),
-             'shape': a.shape
+             'shape': a.shape,
+             'correlation': r
             }
    out.update(stadic)
    return out

--- a/pyglance/glance/io.py
+++ b/pyglance/glance/io.py
@@ -37,7 +37,7 @@ class hdf(SD):
        return getattr(self.select(name),'_FillValue',None)
        

-FIXME_IDPS = [ '/All_Data/CrIS-SDR_All/ES' + ri + band for ri in ['Real','Imag'] for band in ['LW','MW','SW'] ] 
+FIXME_IDPS = [ '/All_Data/CrIS-SDR_All/ES' + ri + band for ri in ['Real','Imaginary'] for band in ['LW','MW','SW'] ] 


 class h5(object):