diff --git a/.gitignore b/.gitignore index 64cc5de48fd5d0937b395c907063174eb0c3e2fc..7beb617ffcb82d8b2f70cdcf493e80fe7dd0757a 100644 --- a/.gitignore +++ b/.gitignore @@ -57,3 +57,6 @@ nosetests.xml ._.DS_Store Thumbs.db +### ide files ### +.idea + diff --git a/pyglance/glance/constants.py b/pyglance/glance/constants.py index 1232cae14514dc3164494207b3a43d448c8a108e..abdc8cb39652c72a893f4efa66c237b992f66d2f 100644 --- a/pyglance/glance/constants.py +++ b/pyglance/glance/constants.py @@ -142,6 +142,7 @@ THUMBNAIL_DPI_KEY = 'thumb_DPI' HIST_FUNCTION_KEY = 'histogram' SCATTER_FUNCTION_KEY = 'scatter' +DENSITY_SCATTER_FN_KEY = 'density-scatter' MULTI_SCATTER_FUNCTION_KEY = 'multi-scatter' HEX_PLOT_FUNCTION_KEY = 'scatterD' ORIG_FUNCTION_KEY = 'original' diff --git a/pyglance/glance/figures.py b/pyglance/glance/figures.py index fac8226583276a5f13c2dd546225328909dd0b88..835fc637ff8c84919936c91482dc6f86f37a60a1 100644 --- a/pyglance/glance/figures.py +++ b/pyglance/glance/figures.py @@ -29,6 +29,10 @@ LOG = logging.getLogger(__name__) # and how it handles range boundaries. Find a better solution if at all possible. offsetToRange = 0.0000000000000000001 +# how much data are we willing to put into the matplotlib functions? +MAX_SCATTER_PLOT_DATA = 1e6 # FUTURE: this limit was determined experimentally on Eva's laptop, may need to revisit this +MAX_HEX_PLOT_DATA = 1e7 # FUTURE: this limit was determined experimentally on Eva's laptop, may need to revisit this + # make a custom medium grayscale color map for putting our bad data on top of mediumGrayColorMapData = { 'red' : ((0.0, 1.00, 1.00), @@ -219,11 +223,22 @@ def create_scatter_plot(dataX, dataY, title, xLabel, yLabel, badMask=None, epsil by default this plot uses blue for data points and red for data marked by the bad mask """ - - return create_complex_scatter_plot ([(dataX, dataY, badMask, - 'b', 'r', - 'within\nepsilon', 'outside\nepsilon')], - title, xLabel, yLabel, epsilon=epsilon, units_x=units_x, units_y=units_y) + + to_return = None + + # make a regular scatter plot if we don't have too much data + if dataX.size < MAX_SCATTER_PLOT_DATA : + to_return = create_complex_scatter_plot ([(dataX, dataY, badMask, + 'b', 'r', + 'within\nepsilon', 'outside\nepsilon')], + title, + xLabel, yLabel, + epsilon=epsilon, + units_x=units_x, units_y=units_y) + else : + LOG.warn("Too much data present to allow creation of scatter plot for \"" + title + "\". Plot will not be created.") + + return to_return def create_complex_scatter_plot(dataList, title, xLabel, yLabel, epsilon=None, units_x=None, units_y=None) : """ @@ -234,10 +249,12 @@ def create_complex_scatter_plot(dataList, title, xLabel, yLabel, epsilon=None, u where a set looks like: (x data, y data, mask of bad points or None, matlab color code for display, matlab color code for 'bad' points, good label, bad label) - if a mask of bad points is given, it will be applyed to both the x and y data + if a mask of bad points is given, it will be applied to both the x and y data at least one data set must be given or no image will be created. """ + + # TODO, there is currently no cutoff at this level for data size, this should only affect the bin-tuple analysis # make the figure figure = plt.figure() @@ -253,7 +270,7 @@ def create_complex_scatter_plot(dataList, title, xLabel, yLabel, epsilon=None, u # if we have "bad" data to plot, pull it out badX = None badY = None - if (badMask != None) : + if (badMask is not None) : badX = dataX[badMask] badY = dataY[badMask] dataX = dataX[~badMask] @@ -298,13 +315,88 @@ def create_complex_scatter_plot(dataList, title, xLabel, yLabel, epsilon=None, u return figure +def create_density_scatter_plot(dataX, dataY, + title, + xLabel, yLabel, + epsilon=None, + units_x=None, units_y=None, + num_bins=200) : + """ + build a density scatter plot of the X data vs the Y data + """ + + # make the figure + figure = plt.figure() + axes = figure.add_subplot(111) + + # if we have no data, stop now + if (dataX is None) or (dataY is None) or (dataX.size <= 0) or (dataY.size <= 0) : + LOG.warn ("Insufficient data present to create density scatter plot.") + return figure + # if our data sizes don't match, warn and stop + if (dataX.size != dataY.size) : + LOG.warn ("The X and Y data given to create scatter plot \"" + "\" were different sizes and could not be compared." ) + return figure + + # figure out the range of the data + min_value = min(np.min(dataX), np.min(dataY)) + max_value = max(np.max(dataX), np.max(dataY)) + # bounds should be defined in the form [[xmin, xmax], [ymin, ymax]] + bounds = [[min_value, max_value], [min_value, max_value]] + + # make our data flat if needed + dataX = dataX.ravel if len(dataX.shape) > 1 else dataX + dataY = dataY.ravel if len(dataY.shape) > 1 else dataY + + # make the binned density map for this data set + density_map, _, _ = np.histogram2d(dataX, dataY, bins=num_bins, range=bounds) + # mask out zero counts; flip because y goes the opposite direction in an imshow graph + density_map = np.flipud(np.transpose(np.ma.masked_array(density_map, mask=density_map == 0))) + + # display the density map data + imshow(density_map, extent=[min_value, max_value, min_value, max_value], + interpolation='nearest', norm=matplotlib.colors.LogNorm()) + + # draw some extra informational lines + _draw_x_equals_y_line(axes, epsilon=epsilon) + + # show a color bar + cb = plt.colorbar() + cb.set_label('log(count of data points)') + + # add the units to the x and y labels + tempXLabel = xLabel + tempYLabel = yLabel + if str.lower(str(units_x)) != "none" : + tempXLabel = tempXLabel + " in " + units_x + if str.lower(str(units_y)) != "none" : + tempYLabel = tempYLabel + " in " + units_y + + # and some informational stuff + axes.set_title(title) + plt.xlabel(tempXLabel) + plt.ylabel(tempYLabel) + + # format our axes so they display gracefully + yFormatter = FormatStrFormatter("%4.4g") + axes.yaxis.set_major_formatter(yFormatter) + xFormatter = FormatStrFormatter("%4.4g") + axes.xaxis.set_major_formatter(xFormatter) + + return figure + # build a hexbin plot of the x,y points and show the density of the point distribution def create_hexbin_plot(dataX, dataY, title, xLabel, yLabel, epsilon=None, units_x=None, units_y=None) : - + + # if we have too much data, stop now + if dataX.size > MAX_HEX_PLOT_DATA : + LOG.warn("Too much data present to allow creation of hex plot for \"" + title + "\". Plot will not be created.") + return None + # make the figure figure = plt.figure() axes = figure.add_subplot(111) - + # for some reason, if you give the hexplot a data set that's all the same number it dies horribly if ( ((dataX is None) or (len(dataX) <= 0)) or ((dataY is None) or (len(dataY) <= 0)) or ((dataX.max() == dataX.min()) and (dataY.max() == dataY.min())) ): diff --git a/pyglance/glance/gui_constants.py b/pyglance/glance/gui_constants.py index de3144ed68aeccf6774850485964eecb83925b58..123d3ff0af7e652aad7b1e791d939cf8f312ab54 100644 --- a/pyglance/glance/gui_constants.py +++ b/pyglance/glance/gui_constants.py @@ -31,6 +31,7 @@ HISTOGRAM_A = "Historgram of A Data" HISTOGRAM_B = "Historgram of B Data" MISMATCH = "Mismatch Areas" SCATTER = "Scatter Plot" +D_SCATTER = "Density Scatter Plot" HEX_PLOT = "Hex Plot" # a list of all the image types, for convenience @@ -43,6 +44,7 @@ IMAGE_TYPES = [ORIGINAL_A, HISTOGRAM, MISMATCH, SCATTER, + D_SCATTER, HEX_PLOT ] @@ -52,6 +54,7 @@ COMPARISON_IMAGES = [ABS_DIFF, HISTOGRAM, MISMATCH, SCATTER, + D_SCATTER, HEX_PLOT ] diff --git a/pyglance/glance/gui_figuremanager.py b/pyglance/glance/gui_figuremanager.py index 2cc91435331d47cd8e1ee770067b492e9a51457d..f31e45355aa6012758ca4c4020cc724afd72e2b4 100644 --- a/pyglance/glance/gui_figuremanager.py +++ b/pyglance/glance/gui_figuremanager.py @@ -55,6 +55,7 @@ CAN_BE_MAPPED = { HISTOGRAM : False, MISMATCH : True, SCATTER : False, + D_SCATTER : False, HEX_PLOT : False, } @@ -70,6 +71,7 @@ NEEDED_DATA_PER_PLOT = \ HISTOGRAM : set([A_CONST, B_CONST]), MISMATCH : set([A_CONST, B_CONST]), SCATTER : set([A_CONST, B_CONST]), + D_SCATTER : set([A_CONST, B_CONST]), HEX_PLOT : set([A_CONST, B_CONST]), } @@ -556,7 +558,7 @@ class GlanceGUIFigures (object) : tempFigure = figures.create_histogram(rawDiffDataClean, DEFAULT_NUM_BINS, titleText, "Value of (B - A) at each data point", "Number of points with a given difference", units=aUnitsText) - elif (imageType == SCATTER) or (imageType == HEX_PLOT) : + elif (imageType == SCATTER) or (imageType == D_SCATTER) or (imageType == HEX_PLOT) : # Note: scatter and hex plots don't care about data format requested, they're scatter or hex plots @@ -567,18 +569,29 @@ class GlanceGUIFigures (object) : if imageType == SCATTER : cleanMismatchMask = diffData.diff_data_object.masks.mismatch_mask[tempCleanMask] - figures.create_scatter_plot(aDataClean, bDataClean, "Value in File A vs Value in File B", - "File A Value in " + aVarName, - "File B Value in " + bVarName, - badMask=cleanMismatchMask, epsilon=self.dataModel.getEpsilon(), - units_x=aUnitsText, units_y=bUnitsText) - + tempFigure = figures.create_scatter_plot(aDataClean, bDataClean, + "Value in File A vs Value in File B", + "File A Value for " + aVarName, + "File B Value for " + bVarName, + badMask=cleanMismatchMask, + epsilon=self.dataModel.getEpsilon(), + units_x=aUnitsText, units_y=bUnitsText) + + elif imageType == D_SCATTER : + + tempFigure = figures.create_density_scatter_plot(aDataClean, bDataClean, + "Density of Value in File A vs Value in File B", + "File A Value for " + aVarName, + "File B Value for " + bVarName, + epsilon=self.dataModel.getEpsilon(), + units_x=aUnitsText, units_y=bUnitsText) + else: tempFigure = figures.create_hexbin_plot(aDataClean, bDataClean, "Value in File A vs Value in File B", - "File A Value in " + aVarName, - "File B Value in " + bVarName, + "File A Value for " + aVarName, + "File B Value for " + bVarName, epsilon=self.dataModel.getEpsilon(), units_x=aUnitsText, units_y=bUnitsText) diff --git a/pyglance/glance/gui_view.py b/pyglance/glance/gui_view.py index ad3c987d8e35e3fcb5051724fd80d8b3c8a093ea..255fc4763d41fdfba5fdd8cf5a1e6e3c0f3c15a6 100644 --- a/pyglance/glance/gui_view.py +++ b/pyglance/glance/gui_view.py @@ -1136,14 +1136,22 @@ class NumpyArrayTableModel (QtCore.QAbstractTableModel) : return self.np_array.shape[0] def columnCount(self, parent=None): - return self.np_array.shape[1] + to_return = 1 + if len(self.np_array.shape) > 1 : + to_return = self.np_array.shape[1] + + return to_return def data(self, index, role=QtCore.Qt.DisplayRole): if index.isValid(): if role == QtCore.Qt.DisplayRole: row = index.row() col = index.column() - return QtCore.QVariant("%.5f"%self.np_array[row, col]) + if len(self.np_array.shape) > 1 : + return QtCore.QVariant("%.5f"%self.np_array[row, col]) + else : + return QtCore.QVariant("%.5f"%self.np_array[row]) + return QtCore.QVariant() class RawDataDisplayWindow (QtGui.QWidget) : diff --git a/pyglance/glance/plot.py b/pyglance/glance/plot.py index 0db58aaa1ffd8e144489a4b769523f98bdfb1a4b..d65f9de08aa9b7cb9d84c992c43c7417c0bacee6 100644 --- a/pyglance/glance/plot.py +++ b/pyglance/glance/plot.py @@ -59,19 +59,22 @@ def _handle_fig_creation_task(child_figure_function, log_message, plt.ioff() figure = child_figure_function() LOG.info(log_message) - figure.savefig(os.path.join(outputPath, fullFigName), dpi=fullDPI) - if (shouldMakeSmall) : - - tempImage = Image.open(os.path.join(outputPath, fullFigName)) - scaleFactor = float(thumbDPI) / float(fullDPI) - originalSize = tempImage.size - newSize = (int(originalSize[0] * scaleFactor), int(originalSize[1] * scaleFactor)) - tempImage = tempImage.resize(newSize, Image.ANTIALIAS) - tempImage.save(os.path.join(outputPath, 'small.' + fullFigName)) - - # get rid of the figure - plt.close(figure) - del(figure) + if figure is not None : + figure.savefig(os.path.join(outputPath, fullFigName), dpi=fullDPI) + if (shouldMakeSmall) : + + tempImage = Image.open(os.path.join(outputPath, fullFigName)) + scaleFactor = float(thumbDPI) / float(fullDPI) + originalSize = tempImage.size + newSize = (int(originalSize[0] * scaleFactor), int(originalSize[1] * scaleFactor)) + tempImage = tempImage.resize(newSize, Image.ANTIALIAS) + tempImage.save(os.path.join(outputPath, 'small.' + fullFigName)) + + # get rid of the figure + plt.close(figure) + del(figure) + else: + LOG.warn("Unable to create plot.") # if we've reached this point and we did fork, # then we're the child process and we should stop now diff --git a/pyglance/glance/plotcreatefns.py b/pyglance/glance/plotcreatefns.py index eb1ccff537dd36ebbe2a7af078e42e13c85cf6b0..c2f7011a9af4ea6625cadc4388c122d3dd59af50 100644 --- a/pyglance/glance/plotcreatefns.py +++ b/pyglance/glance/plotcreatefns.py @@ -273,26 +273,46 @@ class BasicComparisonPlotsFunctionFactory (PlottingFunctionFactory) : assert(goodInBothMask.shape == outsideEpsilonMask.shape) # TODO, if there's an epsilon percent, how should the epsilon lines be drawn? - functionsToReturn[SCATTER_FUNCTION_KEY] = ((lambda : figures.create_scatter_plot(aData[goodInBothMask], bData[goodInBothMask], - "Value in File A vs Value in File B", - "File A Value", "File B Value", - outsideEpsilonMask[goodInBothMask], - epsilon, units_x=units_a, units_y=units_b)), - "scatter plot of file a values vs file b values for " + variableDisplayName, - "Scatter.png", compared_fig_list) + + good_a_data = aData[goodInBothMask] + good_b_data = bData[goodInBothMask] + + if good_a_data.size <= figures.MAX_SCATTER_PLOT_DATA : + # make a basic scatter plot + functionsToReturn[SCATTER_FUNCTION_KEY] = ((lambda : figures.create_scatter_plot(good_a_data, good_b_data, + "Value in File A vs Value in File B", + "File A Value", "File B Value", + outsideEpsilonMask[goodInBothMask], + epsilon, units_x=units_a, units_y=units_b)), + "scatter plot of file a values vs file b values for " + variableDisplayName, + "Scatter.png", compared_fig_list) + else : + LOG.warn("Too much data to allow creation of scatter plot for " + variableDisplayName + ".") + + # make a density scatter plot as well + functionsToReturn[DENSITY_SCATTER_FN_KEY] = ((lambda : figures.create_density_scatter_plot(good_a_data, good_b_data, + "Density of Value in File A vs Value in File B", + "File A Value", "File B Value", + epsilon=epsilon, + units_x=units_a, units_y=units_b)), + "density scatter plot of file a values vs file b values for " + variableDisplayName, + "DensityScatter.png", compared_fig_list) # make a hexplot, which is like a scatter plot with density if (DO_PLOT_HEX_KEY not in doPlotSettingsDict) or (doPlotSettingsDict[DO_PLOT_HEX_KEY]) : assert(aData.shape == bData.shape) assert(bData.shape == goodInBothMask.shape) - - functionsToReturn[HEX_PLOT_FUNCTION_KEY] = ((lambda : figures.create_hexbin_plot(aData[goodInBothMask], bData[goodInBothMask], - "Value in File A vs Value in File B", - "File A Value", "File B Value", epsilon, - units_x=units_a, units_y=units_b)), - "density of file a values vs file b values for " + variableDisplayName, - "Hex.png", compared_fig_list) + + if np.sum(goodInBothMask) <= figures.MAX_HEX_PLOT_DATA : + functionsToReturn[HEX_PLOT_FUNCTION_KEY] = ((lambda : figures.create_hexbin_plot(aData[goodInBothMask], bData[goodInBothMask], + "Value in File A vs Value in File B", + "File A Value", "File B Value", epsilon, + units_x=units_a, units_y=units_b)), + "density of file a values vs file b values for " + variableDisplayName, + "Hex.png", compared_fig_list) + else : + LOG.warn("Too much data to allow creation of hex plot for " + variableDisplayName + ".") return functionsToReturn diff --git a/pyglance/glance/variablereport.txt b/pyglance/glance/variablereport.txt index 6c48cbb9539988c4a5d9ef8d863c75f2821db99c..85ff1da45b99afc72c840f6eb556d29a6cfc0bd0 100644 --- a/pyglance/glance/variablereport.txt +++ b/pyglance/glance/variablereport.txt @@ -149,7 +149,7 @@ Copyright (c) 2011 University of Wisconsin SSEC. All rights reserved. <% inSubSet = False %> % endif <a href="./${image}"><img src="./small.${image}"></a> - + ## if we have a subset of images, separate it from the rest % elif (type(image) is list) : <% inSubSet = True %> diff --git a/pyglance/setup.py b/pyglance/setup.py index 9fe104fd7d4f8f288cb4d0b86b0b2fe8f4c3661d..de8f7d2fcbe5c7a375c41fd1f67283683b513393 100644 --- a/pyglance/setup.py +++ b/pyglance/setup.py @@ -22,7 +22,7 @@ easy_install -d $HOME/Library/Python -vi http://larch.ssec.wisc.edu/eggs/repos u from setuptools import setup, find_packages setup( name="uwglance", - version="0.3.1.7", + version="0.3.1.8", zip_safe = False, entry_points = { 'console_scripts': [ 'glance = glance.compare:main' ] }, packages = ['glance'], #find_packages('.'),