From 67581437634d12d91464e0f0a76f805f392814fb Mon Sep 17 00:00:00 2001
From: "(no author)" <(no author)@8a9318a1-56ba-4d59-b755-99d26321be01>
Date: Thu, 14 Oct 2010 23:16:56 +0000
Subject: [PATCH] adding new scatter plot colored by bin to the bin/tuple
 plots, also fixed link to config file on the variable reports

git-svn-id: https://svn.ssec.wisc.edu/repos/glance/trunk@136 8a9318a1-56ba-4d59-b755-99d26321be01
---
 pyglance/glance/compare.py       |  12 ++-
 pyglance/glance/figures.py       | 122 ++++++++++++++++++++++---------
 pyglance/glance/plotcreatefns.py |  23 +++++-
 3 files changed, 114 insertions(+), 43 deletions(-)

diff --git a/pyglance/glance/compare.py b/pyglance/glance/compare.py
index 8d80ade..a93f00f 100644
--- a/pyglance/glance/compare.py
+++ b/pyglance/glance/compare.py
@@ -1330,15 +1330,13 @@ def reportGen_library_call (a_path, b_path, var_list=[ ],
                 LOG.debug("Creating variable directory.")
                 os.makedirs(variableDir)
             
-            # form the doc path relative to where the variable is
-            docPath = './'
+            # form the doc and config paths relative to where the variable is
+            upwardPath = './'
             for number in range(len(displayName.split('/'))) : # TODO this is not general to windows
-                docPath = os.path.join(docPath, '../')
-            varRunInfo['doc_path'] = quote(os.path.join(docPath, 'doc.html')) # should this be somewhere else?
-            
-            # hang onto the config file path as well
+                upwardPath = os.path.join(upwardPath, '../')
+            varRunInfo['doc_path'] = quote(os.path.join(upwardPath, 'doc.html'))
             if 'config_file_name' in runInfo :
-                varRunInfo['config_file_path'] = quote(os.path.join(pathsTemp['out'], './' + runInfo['config_file_name']))
+                varRunInfo['config_file_path'] = quote(os.path.join(upwardPath, runInfo['config_file_name']))
             
             # figure out the masks we want, and then do our statistical analysis
             mask_a_to_use = None
diff --git a/pyglance/glance/figures.py b/pyglance/glance/figures.py
index 05d5615..715eece 100644
--- a/pyglance/glance/figures.py
+++ b/pyglance/glance/figures.py
@@ -180,45 +180,66 @@ def _plot_tag_data_mapped(bMap, tagData, x, y, addExplinationLabel=True) :
 
 # build a scatter plot of the x,y points
 def create_scatter_plot(dataX, dataY, title, xLabel, yLabel, badMask=None, epsilon=None) :
+    """
+    build a scatter plot of the data
+    if a bad mask is given the points selected by that mask will be plotted in a different color
+    if an epsilon is given the lines for +/- epsilon will be drawn on the plot
+    
+    by default this plot uses blue for data points and red for data marked by the bad mask
+    """
+    
+    return create_complex_scatter_plot ([(dataX, dataY, badMask,
+                                          'b', 'r',
+                                          'within\nepsilon', 'outside\nepsilon')],
+                                        title, xLabel, yLabel, epsilon=epsilon)
+
+def create_complex_scatter_plot(dataList, title, xLabel, yLabel, epsilon=None) :
+    """
+    build a scatter plot with multiple data sets in different colors
+    the dataList parameter should be in the form:
+    [(set1), (set2), ... , (setN)]
+    
+    where a set looks like:
+    (x data, y data, mask of bad points or None, matlab color code for display, matlab color code for 'bad' points, good label, bad label)
+    
+    if a mask of bad points is given, it will be applyed to both the x and y data
+    
+    at least one data set must be given or no image will be created.
+    """
     
     # make the figure
     figure = plt.figure()
     axes = figure.add_subplot(111)
     
-    # if we have "bad" data to plot, pull it out
-    badX = None
-    badY = None
-    if (badMask != None) :
-        badX = dataX[badMask]
-        badY = dataY[badMask]
-        dataX = dataX[~badMask]
-        dataY = dataY[~badMask]
-    
-    # the scatter plot of the good data 
-    axes.plot(dataX, dataY, 'b,', label='within\nepsilon')
-    
-    # plot the bad data
-    numTroublePts = 0
-    if (badX is not None) and (badY is not None) and (badMask is not None) :
-        numTroublePts = badX.shape[0]
-        LOG.debug('\t\tnumber of trouble points in scatter plot: ' + str(badX.shape[0]))
-        if numTroublePts > 0 :
-            axes.plot(badX, badY, 'r,', label='outside\nepsilon')
-    
-    # draw the line for the "perfect fit" 
-    xbounds = axes.get_xbound()
-    xrange = xbounds[1] - xbounds[0]
-    ybounds = axes.get_ybound()
-    yrange = ybounds[1] - ybounds[0]
-    perfect = [max(xbounds[0], ybounds[0]), min(xbounds[1], ybounds[1])]
-    axes.plot(perfect, perfect, 'k--', label='A = B')
+    # if we have no data, stop now
+    if (dataList is None) or (len(dataList) <= 0) :
+        return figure;
     
-    # now draw the epsilon bound lines if they are visible and the lines won't be the same as A = B
-    if (not (epsilon is None)) and (epsilon > 0.0) and (epsilon < xrange) and (epsilon < yrange):
-        # plot the top line
-        axes.plot([perfect[0], perfect[1] - epsilon], [perfect[0] + epsilon, perfect[1]], '--', color='#00FF00', label='+/-epsilon')
-        # plot the bottom line
-        axes.plot([perfect[0] + epsilon, perfect[1]], [perfect[0], perfect[1] - epsilon], '--', color='#00FF00')
+    # look at the stuff in each of the data sets and plot that set
+    for dataX, dataY, badMask, goodColor, badColor, goodLabel, badLabel in dataList :
+        
+        # if we have "bad" data to plot, pull it out
+        badX = None
+        badY = None
+        if (badMask != None) :
+            badX  = dataX[badMask]
+            badY  = dataY[badMask]
+            dataX = dataX[~badMask]
+            dataY = dataY[~badMask]
+        
+        # the scatter plot of the good data
+        axes.plot(dataX, dataY, ',', color=goodColor, label=goodLabel)
+        
+        # plot the bad data
+        numTroublePts = 0
+        if (badX is not None) and (badY is not None) and (badMask is not None) :
+            numTroublePts = badX.size
+            LOG.debug('\t\tplotting ' + str(numTroublePts) + ' trouble points in scatter plot.' )
+            if numTroublePts > 0 :
+                axes.plot(badX, badY, ',', color=badColor, label=badLabel)
+    
+    # draw some extra informational lines
+    _draw_x_equals_y_line(axes, epsilon=epsilon)
     
     # make a key to explain our plot
     # as long as things have been plotted with proper labels they should show up here
@@ -238,7 +259,7 @@ def create_scatter_plot(dataX, dataY, title, xLabel, yLabel, badMask=None, epsil
     return figure
 
 # build a hexbin plot of the x,y points and show the density of the point distribution
-def create_hexbin_plot(dataX, dataY, title, xLabel, yLabel) :
+def create_hexbin_plot(dataX, dataY, title, xLabel, yLabel, epsilon=None) :
     
     # make the figure
     figure = plt.figure()
@@ -252,6 +273,9 @@ def create_hexbin_plot(dataX, dataY, title, xLabel, yLabel) :
     cb = plt.colorbar()
     cb.set_label('log10 (count + 1)')
     
+    # draw some extra informational lines
+    _draw_x_equals_y_line(axes, color='w', epsilon=epsilon, epsilonColor='k')
+    
     # and some informational stuff
     axes.set_title(title)
     plt.xlabel(xLabel)
@@ -265,6 +289,36 @@ def create_hexbin_plot(dataX, dataY, title, xLabel, yLabel) :
     
     return figure
 
+def _draw_x_equals_y_line(axes, color='k', style='--', epsilon=None, epsilonColor='#00FF00', epsilonStyle='--') :
+    """
+    Draw the x = y line using the axes and color/style given
+    If epsilon is not None, also draw the +/- epsilon lines,
+    if they fall in the graph
+    """
+    
+    # get the bounds for our calculations and so we can reset the viewing window later
+    xbounds = axes.get_xbound()
+    ybounds = axes.get_ybound()
+    
+    # figure out the size of the ranges
+    xrange = xbounds[1] - xbounds[0]
+    yrange = ybounds[1] - ybounds[0]
+    
+    # draw the x=y line
+    perfect = [max(xbounds[0], ybounds[0]), min(xbounds[1], ybounds[1])]
+    axes.plot(perfect, perfect, style, color=color, label='A = B')
+    
+    # now draw the epsilon bound lines if they are visible and the lines won't be the same as A = B
+    if (not (epsilon is None)) and (epsilon > 0.0) and (epsilon < xrange) and (epsilon < yrange):
+        # plot the top line
+        axes.plot([perfect[0], perfect[1] - epsilon], [perfect[0] + epsilon, perfect[1]], epsilonStyle, color=epsilonColor, label='+/-epsilon')
+        # plot the bottom line
+        axes.plot([perfect[0] + epsilon, perfect[1]], [perfect[0], perfect[1] - epsilon], epsilonStyle, color=epsilonColor)
+    
+    # reset the bounds
+    axes.set_xbound(xbounds)
+    axes.set_ybound(ybounds)
+
 # build a histogram figure of the given data with the given title and number of bins
 def create_histogram(data, bins, title, xLabel, yLabel, displayStats=False) :
     
diff --git a/pyglance/glance/plotcreatefns.py b/pyglance/glance/plotcreatefns.py
index 9da0654..2d9367e 100644
--- a/pyglance/glance/plotcreatefns.py
+++ b/pyglance/glance/plotcreatefns.py
@@ -14,6 +14,7 @@ matplotlib.use('Agg') # use the Anti-Grain Geometry rendering engine
 from pylab import *
 
 import matplotlib.colors as colors
+import matplotlib.cm     as colormapinfo
 
 import logging
 import random as random
@@ -281,7 +282,7 @@ class BasicComparisonPlotsFunctionFactory (PlottingFunctionFactory) :
             
             functionsToReturn['scatterD']  = ((lambda : figures.create_hexbin_plot(aData[goodInBothMask], bData[goodInBothMask],
                                                                                    "Value in File A vs Value in File B",
-                                                                                   "File A Value", "File B Value")),
+                                                                                   "File A Value", "File B Value", epsilon)),
                                               "density of file a values vs file b values for " + variableDisplayName,
                                               "Hex.png", compared_fig_list)
         
@@ -820,6 +821,24 @@ class BinTupleAnalysisFunctionFactory (PlottingFunctionFactory) :
         # our list of functions that will later create the plots
         functionsToReturn = { }
         
+        
+        # create the scatter plot with colors for each section
+        scatterPlotList = [ ]
+        tempColorMap = colormapinfo.get_cmap('jet', rawDiffData.shape[0])
+        for binNumber in range(rawDiffData.shape[0]) :
+            tempColor = tempColorMap(binNumber)
+            if len(tempColor) > 3 :
+                tempColor = tempColor[:3]
+            scatterPlotList.append(((aData[binNumber][goodInBothMask[binNumber]]).ravel(),
+                                    (bData[binNumber][goodInBothMask[binNumber]]).ravel(), None,
+                                    colors.rgb2hex(tempColor), None, 'bin ' + str(binNumber + 1), None))
+        functionsToReturn['multi-scatter'] = ((lambda : figures.create_complex_scatter_plot(scatterPlotList,
+                                                                        "Value in File A vs Value in File B, Colored by Bin",
+                                                                        "File A Value", "File B Value",
+                                                                        epsilon)),
+                                          "scatter plot of file a values vs file b values for " + variableDisplayName + " by bin",
+                                          "MultiScatter.png", compared_fig_list)
+        
         # for each of the bins, make the rms histogram data
         numHistogramSections = 7 # TODO at some point make this a user controlled setting
         for binNumber in range(rawDiffData.shape[0]) :
@@ -865,7 +884,7 @@ class BinTupleAnalysisFunctionFactory (PlottingFunctionFactory) :
                 for limitIndex in range(histogramSectionLimits.size - 1) :
                     
                     # if it falls in this section, add it's case number index to the list for this section
-                    if ( (rmsDiffValues[caseNumber] > histogramSectionLimits[limitIndex]) and
+                    if ( (rmsDiffValues[caseNumber] >  histogramSectionLimits[limitIndex]) and
                          (rmsDiffValues[caseNumber] <= histogramSectionLimits[limitIndex + 1]) ) :
                         
                         if limitIndex not in histogramSections :
-- 
GitLab