diff --git a/pyglance/glance/compare.py b/pyglance/glance/compare.py
index 1783e571dc4f12036edf7e8afb5112c42725711f..2b0464bd2fabbafd59238584789dd71d0927a488 100644
--- a/pyglance/glance/compare.py
+++ b/pyglance/glance/compare.py
@@ -48,11 +48,14 @@ glance_lon_lat_defaults = {'longitude': 'pixel_longitude',
 
 # these are the built in default settings for the variable analysis
 glance_analysis_defaults = {'epsilon': 0.0,
+                            'epsilon_percent': None,
                             'missing_value': None,
-                            'epsilon_failure_tolerance': 0.0, 
-                            'nonfinite_data_tolerance':  0.0 
+                            'epsilon_failure_tolerance': 0.0,
+                            'nonfinite_data_tolerance':  0.0,
+                            'minimum_acceptable_squared_correlation_coefficient': None
                             }
 
+# TODO, remove?
 def _cvt_names(namelist, epsilon, missing):
     """"if variable names are of the format name:epsilon, yield name,epsilon, missing
         otherwise yield name,default-epsilon,default-missing
@@ -78,6 +81,7 @@ def _clean_path(string_path) :
         clean_path = string_path
     return clean_path
 
+# TODO comment more clearly
 def _parse_varnames(names, terms, epsilon=0.0, missing=None):
     """filter variable names and substitute default epsilon and missing settings if none provided
     returns name,epsilon,missing triples
@@ -362,8 +366,8 @@ def _load_config_or_options(aPath, bPath, optionsSet, requestedVars = [ ]) :
         requestedNames = requestedVars or ['.*'] 
         
         # user selected defaults
-        defaultsToUse['epsilon'] = optionsSet['epsilon']
-        defaultsToUse['missing_value'] = optionsSet['missing']
+        defaultsToUse['epsilon']         = optionsSet['epsilon']
+        defaultsToUse['missing_value']   = optionsSet['missing']
         
         # note: there is no way to set the tolerances from the command line
     
@@ -439,7 +443,8 @@ def _get_percentage_from_mask(dataMask) :
 def _check_lon_lat_equality(longitudeA, latitudeA,
                             longitudeB, latitudeB,
                             ignoreMaskA, ignoreMaskB,
-                            llepsilon, doMakeImages, outputPath) :
+                            llepsilon, doMakeImages,
+                            outputPath) :
     """
     check to make sure the longitude and latitude are equal everywhere that's not in the ignore masks
     if they are not and doMakeImages was passed as True, generate appropriate figures to show where
@@ -684,45 +689,67 @@ def _check_pass_or_fail(varRunInfo, variableStats, defaultValues) :
     
     also returns information about the fractions of failure
     """
-    didPass = None
     
-    # get our tolerance values
+    passValues = [ ]
+    
+    # test the epsilon value tolerance
     
-    # get the tolerance for failures in comparison compared to epsilon
+    # get the tolerance for failures compared to epsilon
     epsilonTolerance = None
     if ('epsilon_failure_tolerance' in varRunInfo) :
         epsilonTolerance = varRunInfo['epsilon_failure_tolerance']
     else :
         epsilonTolerance = defaultValues['epsilon_failure_tolerance']
-    # get the tolerance for failures in amount of nonfinite data
-    # found in spatially valid areas
+    
+    # did we fail based on the epsilon?
+    failed_fraction = variableStats['Numerical Comparison Statistics']['diff_outside_epsilon_fraction']
+    passed_epsilon  = None
+    if epsilonTolerance is not None :
+        passed_epsilon = failed_fraction <= epsilonTolerance
+    passValues.append(passed_epsilon)
+    
+    # test the nonfinite tolerance
+    
+    # get the tolerance for failures in amount of nonfinite data (in spatially valid areas)
     nonfiniteTolerance = None
     if ('nonfinite_data_tolerance'  in varRunInfo) :
         nonfiniteTolerance = varRunInfo['nonfinite_data_tolerance']
     else :
         nonfiniteTolerance = defaultValues['nonfinite_data_tolerance']
     
-    # test to see if we passed or failed
+    # did we fail based on nonfinite data
+    non_finite_diff_fraction = variableStats['Finite Data Statistics']['finite_in_only_one_fraction']
+    passed_nonfinite         = None
+    if nonfiniteTolerance is not None :
+        passed_nonfinite = non_finite_diff_fraction <= nonfiniteTolerance
+    passValues.append(passed_nonfinite)
     
-    # check for our epsilon tolerance
-    failed_fraction = 0.0
-    if not (epsilonTolerance is None) :
-        failed_fraction = variableStats['Numerical Comparison Statistics']['diff_outside_epsilon_fraction']
-        didPass = failed_fraction <= epsilonTolerance
+    # test the r-squared correlation coefficent
     
-    # check to see if it failed on nonfinite data
-    non_finite_diff_fraction = 0.0
-    if not (nonfiniteTolerance is None) :
-        non_finite_diff_fraction = variableStats['Finite Data Statistics']['finite_in_only_one_fraction']
-        passedNonFinite = non_finite_diff_fraction <= nonfiniteTolerance
-        
-        # combine the two test results
-        if (didPass is None) :
-            didPass = passedNonFinite
+    # get the minimum acceptable r-squared correlation coefficient
+    min_r_squared = None
+    if ('minimum_acceptable_squared_correlation_coefficient' in varRunInfo) :
+        min_r_squared = varRunInfo['minimum_acceptable_squared_correlation_coefficient']
+    else :
+        min_r_squared = defaultValues['minimum_acceptable_squared_correlation_coefficient']
+    
+    # did we fail based on the r-squared correlation coefficient?
+    r_squared_value  = None
+    passed_r_squared = None
+    if min_r_squared is not None :
+        r_squared_value  = variableStats['Numerical Comparison Statistics']['r-squared correlation']
+        passed_r_squared = r_squared_value >= min_r_squared
+    passValues.append(passed_r_squared)
+    
+    # figure out the overall pass/fail result
+    didPass = None
+    for passValue in passValues :
+        if (passValue is None) or (didPass is None) :
+            didPass = didPass or  passValue
         else :
-            didPass = didPass and passedNonFinite
+            didPass = didPass and passValue
     
-    return didPass, failed_fraction, non_finite_diff_fraction
+    return didPass, failed_fraction, non_finite_diff_fraction, r_squared_value
 
 def _get_run_identification_info( ) :
     """
@@ -1158,11 +1185,14 @@ def reportGen_library_call (a_path, b_path, var_list=[ ],
                                                   varRunInfo['epsilon'],
                                                   (varRunInfo['missing_value'],
                                                    varRunInfo['missing_value_alt_in_b']),
-                                                  mask_a_to_use, mask_b_to_use)
+                                                  mask_a_to_use, mask_b_to_use,
+                                                  varRunInfo['epsilon_percent'])
             
             # add a little additional info to our variable run info before we squirrel it away
             varRunInfo['time'] = datetime.datetime.ctime(datetime.datetime.now())  # todo is this needed?
-            didPass, epsilon_failed_fraction, non_finite_fail_fraction = _check_pass_or_fail(varRunInfo,
+            didPass, epsilon_failed_fraction, \
+                     non_finite_fail_fraction, \
+                     r_squared_value = _check_pass_or_fail(varRunInfo,
                                                                                      variable_stats,
                                                                                      defaultValues)
             varRunInfo['did_pass'] = didPass
@@ -1263,6 +1293,7 @@ def reportGen_library_call (a_path, b_path, var_list=[ ],
                 finitePassedPercent  = (1.0 - non_finite_fail_fraction) * 100.0 
                 variableComparisons[displayName] = {'pass_epsilon_percent':   epsilonPassedPercent,
                                                     'finite_similar_percent': finitePassedPercent,
+                                                    'r_squared_correlation':  r_squared_value,
                                                     'variable_run_info':      varRunInfo
                                                     }
                 
diff --git a/pyglance/glance/data.py b/pyglance/glance/data.py
index 885319af18f564a9050c6a2a4a9b14802a45648d..988b60e254f3f23a39a2333e98ec09ad287c8f1f 100644
--- a/pyglance/glance/data.py
+++ b/pyglance/glance/data.py
@@ -249,7 +249,7 @@ class DiffInfoObject (object) :
                                   aDataObject.data[valid_in_both].astype(sharedType)
         
         # the valid data which is too different between the two sets according to the given epsilon
-        outside_epsilon_mask = (abs(raw_diff) > epsilonValue) & valid_in_both
+        outside_epsilon_mask = (abs(raw_diff) > epsilonValue) & valid_in_both # TODO also use epsilon percent
         # trouble points = mismatched nans, mismatched missing-values, differences that are too large 
         trouble_pt_mask = ( (aDataObject.masks.non_finite_mask ^ bDataObject.masks.non_finite_mask) |
                             (aDataObject.masks.missing_mask    ^ bDataObject.masks.missing_mask)    |
diff --git a/pyglance/glance/mainreport.txt b/pyglance/glance/mainreport.txt
index 7681a7714903dcdc541d834e0e85d9f8d2b23317..9de0b50083701fe31b716f02a24a65c29d4acb6a 100644
--- a/pyglance/glance/mainreport.txt
+++ b/pyglance/glance/mainreport.txt
@@ -177,6 +177,7 @@ Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
                             if 'display_name' in tempVarRunInfo :
                                 varDisplayName = tempVarRunInfo['display_name']
                             
+                            rSquaredCorr         = tempVariableInfo['r_squared_correlation']
                             passPercent          = tempVariableInfo['pass_epsilon_percent']
                             finiteSimilarPercent = tempVariableInfo['finite_similar_percent']
                             didPass              = tempVarRunInfo['did_pass']
@@ -194,6 +195,9 @@ Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
                             <td>
                                 Variable: <a href="${tempVarRunInfo['variable_report_path_escaped']}">${varDisplayName}</a> <br>
                                 Epsilon used: ${tempVarRunInfo['epsilon']} <br>
+                                % if rSquaredCorr is not None :
+                                    R-squared Correlation Coefficient: ${rSquaredCorr}<br>
+                                % endif
                                 Finite values within one epsilon of difference:
                                 ${report.make_formatted_display_string(passPercent, '%.6g')}%<br>
                                 Data that matched in finite-ness between the files:
diff --git a/pyglance/glance/stats.py b/pyglance/glance/stats.py
index f5df9f5cee2fd559a50807119cc072a0ec6400d2..3a7ab36e88c62672db2c339228fcd588216998dc 100644
--- a/pyglance/glance/stats.py
+++ b/pyglance/glance/stats.py
@@ -13,16 +13,85 @@ import glance.delta as delta
 
 import numpy as np
 
+# TODO, finish transitioning to classes
+
+# TODO, I don't like this design, but it's what I could come up
+# with for now. Reconsider this again later.
+class StatisticalData (object) :
+    """
+    This class represents a set of statistical data generated from
+    the examination of two data sets. This data set is relatively
+    abstract. 
+    
+    All Statistics Data objects should have a title and be able to provide
+    a dictionary of their statistics (see dictionary_form function) and
+    a dictionary documenting their statistics.
+    
+    Child classes can include whatever actual statistics they like.
+    """
+    
+    def __init__ (self) :
+        """
+        a minimal constructor that only sets the title
+        """
+        
+        self.title = None
+
+class MissingValueStatistics (StatisticalData) :
+    """
+    A class representing information about where fill values are found
+    in a pair of data sets.
+    
+    includes the following statistics:
+    
+    a_missing_count         -    count of points that are missing in the a data set
+    a_missing_fraction      - fraction of points that are missing in the a data set
+    b_missing_count         -    count of points that are missing in the b data set
+    b_missing_fraction      - fraction of points that are missing in the b data set
+    common_missing_count    -    count of points that are missing in both data sets
+    common_missing_fraction - fraction of points that are missing in both data sets
+    """
+    
+
 # --------------------- general statistics methods ------------------
 
-def summarize(a, b, epsilon=0., (a_missing_value, b_missing_value)=(None,None), ignoreInAMask=None, ignoreInBMask=None):
+def _get_missing_value_stats(a_missing_mask, b_missing_mask) :
+    """
+    Get a list of statistics about missing data values in data sets a and b,
+    given masks describing the positions of the missing data (such masks
+    can be gotten from the diff function), 
+    the return value will be a dictionary of statistics
+    """
+    # calculate information about the missing values
+    num_a_missing = np.sum(a_missing_mask)
+    num_b_missing = np.sum(b_missing_mask)
+    num_common_missing = np.sum(a_missing_mask & b_missing_mask)
+    
+    # make the assumption that a and b are the same size and only use the size of a's mask
+    total_num_values = a_missing_mask.size
+    
+    missing_value_stats = {'a_missing_count': num_a_missing,
+                           'a_missing_fraction': (float(num_a_missing) / float(total_num_values)),
+                           'b_missing_count': num_b_missing,
+                           'b_missing_fraction': (float(num_b_missing) / float(total_num_values)),
+                           'common_missing_count': num_common_missing,
+                           'common_missing_fraction': (float(num_common_missing) / float(total_num_values))
+                           }
+    
+    return missing_value_stats
+
+def summarize(a, b, epsilon=0.,
+              (a_missing_value, b_missing_value)=(None,None),
+              ignoreInAMask=None, ignoreInBMask=None,
+              epsilonPercent=None):
     """return dictionary of statistics dictionaries
     stats not including 'nan' in name exclude nans in either arrays
     """
     # diff our two data sets
     aDataObject = dataobj.DataObject(a, fillValue=a_missing_value, ignoreMask=ignoreInAMask)
     bDataObject = dataobj.DataObject(b, fillValue=b_missing_value, ignoreMask=ignoreInBMask)
-    diffInfo = dataobj.DiffInfoObject(aDataObject, bDataObject, epsilonValue=epsilon) #TODO, needs epsilon percent
+    diffInfo = dataobj.DiffInfoObject(aDataObject, bDataObject,
+                                      epsilonValue=epsilon, epsilonPercent=epsilonPercent) 
     #TODO, for the moment, unpack these values into local variables
     diffData = diffInfo.diff_data_object.data
     finite_mask    = diffInfo.diff_data_object.masks.valid_mask
@@ -104,6 +173,7 @@ def _get_numerical_data_stats(a, b, diff_data,  data_is_finite_mask,
         fraction_perfect = num_perfect / float(total_num_finite_values)
     
     comparison = {  'correlation': r_corr,
+                    'r-squared correlation': r_corr * r_corr,
                     'diff_outside_epsilon_count': num_finite_values_too_different,
                     'diff_outside_epsilon_fraction': fraction_too_different,
                     'perfect_match_count': num_perfect,
@@ -176,30 +246,7 @@ def _get_finite_data_stats(a_is_finite_mask, b_is_finite_mask, common_ignore_mas
     
     return finite_value_stats
 
-def _get_missing_value_stats(a_missing_mask, b_missing_mask) :
-    """
-    Get a list of statistics about missing data values in data sets a and b,
-    given masks describing the positions of the missing data (such masks
-    can be gotten from the diff function), 
-    the return value will be a dictionary of statistics
-    """
-    # calculate information about the missing values
-    num_a_missing = np.sum(a_missing_mask)
-    num_b_missing = np.sum(b_missing_mask)
-    num_common_missing = np.sum(a_missing_mask & b_missing_mask)
-    
-    # make the assumption that a and b are the same size and only use the size of a's mask
-    total_num_values = a_missing_mask.size
-    
-    missing_value_stats = {'a_missing_count': num_a_missing,
-                           'a_missing_fraction': (float(num_a_missing) / float(total_num_values)),
-                           'b_missing_count': num_b_missing,
-                           'b_missing_fraction': (float(num_b_missing) / float(total_num_values)),
-                           'common_missing_count': num_common_missing,
-                           'common_missing_fraction': (float(num_common_missing) / float(total_num_values))
-                           }
-    
-    return missing_value_stats
+
 
 def _get_nan_stats(a_nan_mask, b_nan_mask) :
     """
@@ -284,6 +331,7 @@ STATISTICS_DOC = {  'general': "Finite values are non-missing and finite (not Na
                     'perfect_match_count': "number of perfectly matched finite data points between A and B",
                     'perfect_match_fraction': "fraction of finite values perfectly matching between A and B (out of common_finite_count)",
                     'rms_diff': "root mean square (RMS) difference of finite values",
+                    'r-squared correlation': "the square of the r correlation (see correlation)",
                     'std_diff': "standard deviation of difference of finite values",
                     'trouble_points_count': 'number of points that differ in finite/missing status between the input data sets A and B,' +
                                             ' or are unacceptable when compared according to the current epsilon value',