diff --git a/pyglance/glance/compare.py b/pyglance/glance/compare.py index 65ad4c8f4b2d94755270ca4046e790e1f7856098..08b279ba4231054520ebb5f70a55824831ab9724 100644 --- a/pyglance/glance/compare.py +++ b/pyglance/glance/compare.py @@ -135,6 +135,8 @@ def _resolve_names(fileAObject, fileBObject, defaultValues, # but override the values that would have been determined by _parse_varnames finalNames[name]['variable_name'] = name finalNames[name]['epsilon'] = epsilon + + # load the missing value if it was not provided missing_b = missing if missing is None: missing = fileAObject.missing_value(name) @@ -155,7 +157,12 @@ def _resolve_names(fileAObject, fileBObject, defaultValues, finalNames[name] = defaultValues.copy() finalNames[name]['variable_name'] = name finalNames[name].update(requestedNames[name]) - # TODO what's the correct behavior here for missing? + + # load the missing value if it was not provided + if finalNames[name]['missing_value'] is None : + finalNames[name]['missing_value'] = fileAObject.missing_value(name) + if not('missing_value_alt_in_b' in finalNames[name]) or (finalNames[name]['missing_value_alt_in_b'] is None) : + finalNames[name]['missing_value_alt_in_b'] = fileBObject.missing_value(name) else: # format command line input similarly to the stuff from the config file print (requestedNames) @@ -169,7 +176,14 @@ def _resolve_names(fileAObject, fileBObject, defaultValues, # but override the values that would have been determined by _parse_varnames finalNames[name]['variable_name'] = name finalNames[name]['epsilon'] = epsilon - finalNames[name]['missing_value'] = missing # TODO, what's the correct behavior here? + + # load the missing value if it was not provided + missing_b = missing + if missing is None: + missing = fileAObject.missing_value(name) + missing_b = fileBObject.missing_value(name) + finalNames[name]['missing_value'] = missing + finalNames[name]['missing_value_alt_in_b'] = missing_b LOG.debug("Final selected set of variables to analyze:") LOG.debug(str(finalNames)) @@ -381,7 +395,6 @@ def _compare_spatial_invalidity(invalid_in_a_mask, invalid_in_b_mask, spatial_in # so how many do they have together? spatial_info['perInvPtsInBoth'] = _get_percentage_from_mask(invalid_in_common_mask)[0] - #spatial_info['perInvPtsInBoth'], totalNumSpaciallyInvPts = _get_percentage_from_mask(invalid_in_common_mask) todo, remove? # make a "clean" version of the lon/lat longitude_common[valid_only_in_mask_a] = longitude_a[valid_only_in_mask_a] longitude_common[valid_only_in_mask_b] = longitude_b[valid_only_in_mask_b] @@ -439,7 +452,9 @@ examples: python -m glance.compare info A.hdf python -m glance.compare stats A.hdf B.hdf '.*_prof_retr_.*:1e-4' 'nwp_._index:0' -python -m glance.compare plotDiffs A.hdf B.hdf [optional output path] +python -m glance.compare plotDiffs A.hdf B.hdf +python -m glance compare reportGen A.hdf B.hdf +python -m glance """ parser = optparse.OptionParser(usage) @@ -803,7 +818,7 @@ python -m glance.compare plotDiffs A.hdf B.hdf [optional output path] runInfo['time'] = datetime.datetime.ctime(datetime.datetime.now()) #get info on the variable variableStats = delta.summarize(aData, bData, varRunInfo['epsilon'], - (varRunInfo['missing_value'], varRunInfo['missing_value']), + (varRunInfo['missing_value'], varRunInfo['missing_value_alt_in_b']), spaciallyInvalidMaskA, spaciallyInvalidMaskB) # hang on to our good % and our epsilon value to describe our comparison passedFraction = (1.0 - variableStats['Numerical Comparison Statistics']['diff_outside_epsilon_fraction']) @@ -828,7 +843,11 @@ python -m glance.compare plotDiffs A.hdf B.hdf [optional output path] else : nonfiniteTolerance = defaultValues['nonfinite_data_tolerance'] if not (nonfiniteTolerance is None) : - passedNonFinite = (passedFraction >= (1.0 - nonfiniteTolerance)) + non_finite_pts = variableStats['Finite Data Statistics']['finite_in_only_one_count'] + non_finite_pts = non_finite_pts + variableStats['Missing Value Statistics']['common_missing_count'] + non_finite_pts = non_finite_pts + variableStats['NaN Statistics']['common_nan_count'] + non_finite_fraction = float(non_finite_pts) / float(variableStats['General Statistics']['num_data_points']) + passedNonFinite = non_finite_fraction <= nonfiniteTolerance if (didPass is None) : didPass = passedNonFinite else : diff --git a/pyglance/glance/delta.py b/pyglance/glance/delta.py index 71d75155f3f3d085100caac244efe54e41a11d5e..1732dfa80389ee9107307f8bd3d49c49d169b929 100644 --- a/pyglance/glance/delta.py +++ b/pyglance/glance/delta.py @@ -11,7 +11,7 @@ import os, sys, logging from numpy import * from scipy.stats import pearsonr, spearmanr, pointbiserialr -compute_r = spearmanr +compute_r = pearsonr #spearmanr LOG = logging.getLogger(__name__) @@ -53,14 +53,14 @@ def diff(a, b, epsilon=0., (amissing,bmissing)=(None,None), ignoreMask=None): # build the comparison data that includes the "good" values d = empty_like(a) - mask = ~(anfin | bnfin | amis | bmis | ignoreMask) - d[~mask] = nan + mask = ~(anfin | bnfin | amis | bmis | ignoreMask) # mask to get just the "valid" data + d[~mask] = nan # throw away invalid data d[mask] = b[mask] - a[mask] - # trouble areas - mismatched nans, mismatched missing-values, differences > epsilon - trouble = (anfin ^ bnfin) | (amis ^ bmis) | (abs(d)>epsilon) - # the outside epsilon mask + # the valid data that's outside epsilon outeps = (abs(d) > epsilon) & mask + # trouble areas - mismatched nans, mismatched missing-values, differences > epsilon + trouble = (anfin ^ bnfin) | (amis ^ bmis) | outeps return d, mask, trouble, (anfin, bnfin), (amis, bmis), outeps @@ -279,13 +279,11 @@ def summarize(a, b, epsilon=0., (a_missing_value, b_missing_value)=(None,None), # build some other finite data masks that we'll need finite_a_mask = ~(anfin | amis) finite_b_mask = ~(bnfin | bmis) - finite_mask = finite_a_mask & finite_b_mask - if not (ignoreInAMask is None) : - finite_a_mask = finite_a_mask & (~ ignoreInAMask) - if not (ignoreInBMask is None) : - finite_b_mask = finite_b_mask & (~ ignoreInBMask) - if not (ignoreMask is None) : - finite_mask = finite_mask & (~ ignoreMask) + finite_mask = finite_a_mask & finite_b_mask + # also factor in the ignore masks + finite_a_mask = finite_a_mask & (~ ignoreInAMask) + finite_b_mask = finite_b_mask & (~ ignoreInBMask) + finite_mask = finite_mask & (~ ignoreMask) general_stats = _get_general_data_stats(a_missing_value, b_missing_value, epsilon, trouble, ignoreInAMask, ignoreInBMask) additional_statistics = stats(*nfo) # grab some additional comparison statistics diff --git a/pyglance/glance/plot.py b/pyglance/glance/plot.py index 078ec1f6f346136a9a67f6ed4cc896de2c8eb8d9..b93924cca3234d068046446efbd71e72d45613a4 100644 --- a/pyglance/glance/plot.py +++ b/pyglance/glance/plot.py @@ -137,7 +137,7 @@ def _create_histogram(data, bins, title, xLabel, yLabel, displayStats=False) : medianVal = tempStats['median_diff'] meanVal = tempStats['mean_diff'] stdVal = tempStats['std_diff'] - numPts = len(data.ravel()) + numPts = data.size # info on the display of our statistics xbounds = axes.get_xbound() @@ -207,7 +207,9 @@ def _create_mapped_figure(data, latitude, longitude, boundingAxes, title, kwargs['projection'] = 'mill' # use a miller cylindrical projection to show the whole world elif (longitudeRange > 100) or (latitudeRange > 70) : kwargs['projection'] = 'ortho' # use an orthographic projection to show half the globe - # otherwise the default is just fine! + else : + # TODO figure out why the default is cutting off the field of view, until then, use miller + kwargs['projection'] = 'mill' # draw our data placed on a map bMap, x, y = maps.mapshow(longitudeCleaned, latitudeCleaned, data, boundingAxes, **kwargs) @@ -494,14 +496,16 @@ def plot_and_save_figure_comparison(aData, bData, LOG.info("\t\tsaving smaller versions of images") figureA.savefig(outputPath + "/" + variableName + ".A.small.png", dpi=50) figureB.savefig(outputPath + "/" + variableName + ".B.small.png", dpi=50) - if not shortCircuitComparisons : - figureAbsDiff.savefig(outputPath + "/" + variableName + ".AbsDiff.small.png", dpi=50) - figureDiff.savefig(outputPath + "/" + variableName + ".Diff.small.png", dpi=50) - figureBadDataInDiff.savefig(outputPath + "/" + variableName + ".Trouble.small.png", dpi=50) - diffHistogramFigure.savefig(outputPath + "/" + variableName + ".Hist.small.png", dpi=50) - if not (imperfectHistogramFigure is None) : - imperfectHistogramFigure.savefig(outputPath + "/" + variableName + ".ImpHist.small.png", dpi=50) - diffScatterPlot.savefig(outputPath + "/" + variableName + ".Scatter.small.png", dpi=50) + #if not shortCircuitComparisons : + figureAbsDiff.savefig(outputPath + "/" + variableName + ".AbsDiff.small.png", dpi=50) + figureDiff.savefig(outputPath + "/" + variableName + ".Diff.small.png", dpi=50) + figureBadDataInDiff.savefig(outputPath + "/" + variableName + ".Trouble.small.png", dpi=50) + diffHistogramFigure.savefig(outputPath + "/" + variableName + ".Hist.small.png", dpi=50) + ''' + if not (imperfectHistogramFigure is None) : + imperfectHistogramFigure.savefig(outputPath + "/" + variableName + ".ImpHist.small.png", dpi=50) + ''' + diffScatterPlot.savefig(outputPath + "/" + variableName + ".Scatter.small.png", dpi=50) return diff --git a/pyglance/glance/variablereport.txt b/pyglance/glance/variablereport.txt index 5c1dd34ad947c0f117a41e1e76ba929bb25b40aa..f4c91c25cca581ddc0bc264a08d48ab28dbeefd6 100644 --- a/pyglance/glance/variablereport.txt +++ b/pyglance/glance/variablereport.txt @@ -101,19 +101,23 @@ Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved. ## comparison parameters <p> + ## display the variable names % if 'alternate_name_in_B' in runInfo : variable name in A: ${variableName} <br> variable name in B: ${runInfo['alternate_name_in_B']} <br> % else : variable name: ${variableName} <br> % endif + + ## display the epsilon epsilon value: ${runInfo['epsilon']} <br> - "missing" data value: - <% missingDataValue = runInfo['missing_value'] %> - % if missingDataValue is None : - None + + ## display the missing value + % if ('missing_value_alt_in_b' in runInfo) and (not (runInfo['missing_value_alt_in_b'] is runInfo['missing_value'])) : + "missing" data value in A: ${str(runInfo['missing_value'])}<br> + "missing" data value in B: ${str(runInfo['missing_value_alt_in_b'])}<br> % else : - ${missingDataValue} + "missing" data value: ${str(runInfo['missing_value'])} % endif </p>