diff --git a/pyglance/glance/compare.py b/pyglance/glance/compare.py index 75711978ba931ac80001721c35dcc81584d8470e..5b1424ecba8ec73eaaa5ccd51eda091f3e3f50ba 100644 --- a/pyglance/glance/compare.py +++ b/pyglance/glance/compare.py @@ -19,6 +19,7 @@ from pycdf import CDFError import glance.io as io import glance.delta as delta import glance.plot as plot +import glance.plotcreatefns as plotcreate import glance.report as report from urllib import quote @@ -854,6 +855,9 @@ def reportGen_library_call (a_path, b_path, var_list=[ ], LOG.debug ("do_not_test_with_lon_lat = " + str(do_not_test_with_lon_lat)) LOG.debug ("include_images_for_this_variable = " + str(include_images_for_this_variable)) + # handle vector data + isVectorData = False # TODO actually figure out if we have vector data from user inputted settings + # check if this data can be displayed but # don't compare lon/lat sizes if we won't be plotting if ( (aData.shape == bData.shape) @@ -895,7 +899,7 @@ def reportGen_library_call (a_path, b_path, var_list=[ ], # based on the settings and whether the variable passsed or failed, # should we include images for this variable? if ('only_plot_on_fail' in varRunInfo) and varRunInfo['only_plot_on_fail'] : - include_images_for_this_variable = include_images_for_this_variable and didPass + include_images_for_this_variable = include_images_for_this_variable and (not didPass) varRunInfo['shouldIncludeImages'] = include_images_for_this_variable # to hold the names of any images created @@ -908,78 +912,52 @@ def reportGen_library_call (a_path, b_path, var_list=[ ], # TODO, will need to handle averaged/sliced 3D data at some point if (include_images_for_this_variable) : - # if we only have 1D data, we will want a line plot - if (len(aData.shape) is 1) : - image_names['original'], image_names['compared'] = \ - plot.plot_and_save_comparison_figures \ - (aData, bData, - plot.make_line_plot_plotting_functions, - varRunInfo['variable_dir'], - displayName, - varRunInfo['epsilon'], - varRunInfo['missing_value'], - missingValueAltInB = varRunInfo['missingValueAltInB'] if 'missingValueAltInB' in varRunInfo else None, - makeSmall=True, - doFork=runInfo['doFork'], - shouldClearMemoryWithThreads=runInfo['useThreadsToControlMemory'], - shouldUseSharedRangeForOriginal=runInfo['useSharedRangeForOriginal'], - doPlotOriginal = varRunInfo['do_plot_originals'] if 'do_plot_originals' in varRunInfo else True, - doPlotAbsDiff = varRunInfo['do_plot_abs_diff'] if 'do_plot_abs_diff' in varRunInfo else True, - doPlotSubDiff = varRunInfo['do_plot_sub_diff'] if 'do_plot_sub_diff' in varRunInfo else True, - doPlotTrouble = varRunInfo['do_plot_trouble'] if 'do_plot_trouble' in varRunInfo else True, - doPlotHistogram= varRunInfo['do_plot_histogram'] if 'do_plot_histogram' in varRunInfo else True, - doPlotScatter = varRunInfo['do_plot_scatter'] if 'do_plot_scatter' in varRunInfo else True) - + plotFunctionGenerationObjects = [ ] + + # if the data is the same size, we can always make our basic statistical comparison plots + if (aData.shape == bData.shape) : + plotFunctionGenerationObjects.append(plotcreate.BasicComparisonPlotsFunctionFactory()) + + # TODO, need a way to match data if it is not the same shape + + # if it's vector data with longitude and latitude, quiver plot it on the Earth + if isVectorData and (not do_not_test_with_lon_lat) : + plotFunctionGenerationObjects.append(plotcreate.MappedQuiverPlotFunctionFactory()) + + # if the data is one dimensional we can plot it as lines + elif (len(aData.shape) is 1) : + plotFunctionGenerationObjects.append(plotcreate.LinePlotsFunctionFactory()) + + # if the data is 2D we have some options based on the type of data elif (len(aData.shape) is 2) : - # create 2D images comparing the variable - print("\tcreating figures for: " + explanationName) - if do_not_test_with_lon_lat : - # plot our non lon/lat info - image_names['original'], image_names['compared'] = \ - plot.plot_and_save_comparison_figures \ - (aData, bData, - plot.make_pure_data_plotting_functions, - varRunInfo['variable_dir'], - displayName, - varRunInfo['epsilon'], - varRunInfo['missing_value'], - missingValueAltInB = varRunInfo['missingValueAltInB'] if 'missingValueAltInB' in varRunInfo else None, - makeSmall=True, - doFork=runInfo['doFork'], - shouldClearMemoryWithThreads=runInfo['useThreadsToControlMemory'], - shouldUseSharedRangeForOriginal=runInfo['useSharedRangeForOriginal'], - doPlotOriginal = varRunInfo['do_plot_originals'] if 'do_plot_originals' in varRunInfo else True, - doPlotAbsDiff = varRunInfo['do_plot_abs_diff'] if 'do_plot_abs_diff' in varRunInfo else True, - doPlotSubDiff = varRunInfo['do_plot_sub_diff'] if 'do_plot_sub_diff' in varRunInfo else True, - doPlotTrouble = varRunInfo['do_plot_trouble'] if 'do_plot_trouble' in varRunInfo else True, - doPlotHistogram= varRunInfo['do_plot_histogram'] if 'do_plot_histogram' in varRunInfo else True, - doPlotScatter = varRunInfo['do_plot_scatter'] if 'do_plot_scatter' in varRunInfo else True) + # if the data is not mapped to a longitude and latitude, just show it as an image + if (do_not_test_with_lon_lat) : + plotFunctionGenerationObjects.append(plotcreate.IMShowPlotFunctionFactory()) + + # if it's 2D and mapped to the Earth, contour plot it on the earth else : - # plot our lon/lat related info - image_names['original'], image_names['compared'] = \ - plot.plot_and_save_comparison_figures \ - (aData, bData, - plot.make_geolocated_plotting_functions, - varRunInfo['variable_dir'], - displayName, - varRunInfo['epsilon'], - varRunInfo['missing_value'], - missingValueAltInB = varRunInfo['missingValueAltInB'] if 'missingValueAltInB' in varRunInfo else None, - lonLatDataDict=lon_lat_data, - dataRanges = varRunInfo['display_ranges'] if 'display_ranges' in varRunInfo else None, - dataRangeNames = varRunInfo['display_range_names'] if 'display_range_names' in varRunInfo else None, - dataColors = varRunInfo['display_colors'] if 'display_colors' in varRunInfo else None, - makeSmall=True, - doFork=runInfo['doFork'], - shouldClearMemoryWithThreads=runInfo['useThreadsToControlMemory'], - shouldUseSharedRangeForOriginal=runInfo['useSharedRangeForOriginal'], - doPlotOriginal = varRunInfo['do_plot_originals'] if 'do_plot_originals' in varRunInfo else True, - doPlotAbsDiff = varRunInfo['do_plot_abs_diff'] if 'do_plot_abs_diff' in varRunInfo else True, - doPlotSubDiff = varRunInfo['do_plot_sub_diff'] if 'do_plot_sub_diff' in varRunInfo else True, - doPlotTrouble = varRunInfo['do_plot_trouble'] if 'do_plot_trouble' in varRunInfo else True, - doPlotHistogram= varRunInfo['do_plot_histogram'] if 'do_plot_histogram' in varRunInfo else True, - doPlotScatter = varRunInfo['do_plot_scatter'] if 'do_plot_scatter' in varRunInfo else True) + plotFunctionGenerationObjects.append(plotcreate.MappedContourPlotFunctionFactory()) + + # plot our lon/lat related info + image_names['original'], image_names['compared'] = \ + plot.plot_and_save_comparison_figures \ + (aData, bData, + plotFunctionGenerationObjects, + varRunInfo['variable_dir'], + displayName, + varRunInfo['epsilon'], + varRunInfo['missing_value'], + missingValueAltInB = varRunInfo['missingValueAltInB'] if 'missingValueAltInB' in varRunInfo else None, + lonLatDataDict=lon_lat_data, + dataRanges = varRunInfo['display_ranges'] if 'display_ranges' in varRunInfo else None, + dataRangeNames = varRunInfo['display_range_names'] if 'display_range_names' in varRunInfo else None, + dataColors = varRunInfo['display_colors'] if 'display_colors' in varRunInfo else None, + makeSmall=True, + doFork=runInfo['doFork'], + shouldClearMemoryWithThreads=runInfo['useThreadsToControlMemory'], + shouldUseSharedRangeForOriginal=runInfo['useSharedRangeForOriginal'], + doPlotSettingsDict = varRunInfo) print("\tfinished creating figures for: " + explanationName) diff --git a/pyglance/glance/delta.py b/pyglance/glance/delta.py index 6169b9f73531be1c027d4b0613dd2bf099eecb84..aac25e4c5e228f6e75a7f03e277867a9c00a943b 100644 --- a/pyglance/glance/delta.py +++ b/pyglance/glance/delta.py @@ -124,6 +124,7 @@ def corr(x,y,mask): return nan return compute_r(xf,yf)[0] +# TODO, should this ultimately be removed? def rms_corr_withnoise(truth, actual, noiz, epsilon=0., (amissing,bmissing)=(None,None), plot=None): """ compute RMS and R statistics for truth vs actual and truth+noiz vs actual """ @@ -168,7 +169,7 @@ def stats(diffData, mask, *etc): return { } absDiffData = abs(diffData) - rms = sqrt( sum(diffData[mask] ** 2) / sum(mask) ) + rms = calculate_root_mean_square(diffData, mask) return { 'rms_diff': rms, 'std_diff': std(absDiffData[mask]), 'mean_diff': mean(absDiffData[mask]), @@ -176,6 +177,143 @@ def stats(diffData, mask, *etc): 'max_diff': max(absDiffData[mask]) } +def convert_mag_dir_to_U_V_vector(magnitude_data, direction_data, invalidMask=None): + """ + This method is intended to convert magnitude and direction data into (U, V) vector data. + An invalid mask may be given if some of the points in the set should be masked out. + + TODO, this method is not fully tested + """ + + if invalidMask is None : + invalidMask = zeros(magnitude_data.shape, dtype=bool) + + uData = zeros(magnitude_data.shape, dtype=float) + uData[invalidMask] = nan + uData[~invalidMask] = magnitude_data[~invalidMask] * cos (direction_data[~invalidMask]) + + vData = zeros(magnitude_data.shape, dtype=float) + vData[invalidMask] = nan + vData[~invalidMask] = magnitude_data[~invalidMask] * sin (direction_data[~invalidMask]) + + return uData, vData + +def calculate_root_mean_square (data, goodMask=None) : + """ + calculate the root mean square of the data, + possibly selecting only the points in the given + goodMask, if no mask is given, all points will + be used + """ + if goodMask is None: + goodMask = np.ones(data.shape, dtype=bool) + + rootMeanSquare = sqrt( sum( data[goodMask] ** 2 ) / sum( goodMask ) ) + + return rootMeanSquare + +def organize_dimensions_and_produce_rms_info(dataA, dataB, binDimensionIndexNum, tupleDimensionIndexNum, + epsilon=0., (a_missing_value, b_missing_value)=(None, None), + (ignore_mask_a, ignore_mask_b)=(None, None)) : + """ + reorganize the dimensions in the given data sets in order to put the bin dimension first + and the tuple dimension last + collapse all other dimensions into a single "case" dimension, but preserve information on their + previous shape so that the coresponding index of a given case can be recovered + + once the data is reshaped to the desired ordering with the appopriate set of cases, diff the + data and calculate the rms on a per case basis + + return reshaped original and diffrence data sets, masks as corresponding to the output of diff and + matching the shape of the reshaped data, and information on the shape of the collapsed case dimensions + + TODO, this method has not yet been tested or integrated with the rest of glance + """ + + # make sure we meet the minimal requirement for compatable shapes/index sizes + assert(dataA.shape is dataB.shape) + assert(binDimensionIndexNum < len(dataA.shape)) + assert(tupleDimensionIndexNum < len(dataA.shape)) + assert(tupleDimensionIndexNum is not binDimensionIndexNum) + + # figure out the order to put the index we want first + new_index_order = range(len(dataA.shape)) + smaller = tupleDimensionIndexNum + larger = binDimensionIndexNum + if (tupleDimensionIndexNum > binDimensionIndexNum) : + larger = tupleDimensionIndexNum + smaller = binDimensionIndexNum + del(new_index_order[larger]) + del(new_index_order[smaller]) + new_index_order = [binDimensionIndexNum] + new_index_order + [tupleDimensionIndexNum] + + # copy our data, then put the indexes into the new order + new_a_data = dataA.copy() + new_a_data = new_a_data.transpose(new_index_order) + new_b_data = dataB.copy() + new_b_data = new_b_data.transpose(new_index_order) + # if our masks need to be reordered, do that + new_a_missing_mask = a_missing_mask + if new_a_missing_mask is not None : + new_a_missing_mask = new_a_missing_mask.copy() + new_a_missing_mask = new_a_missing_mask.transpose(new_index_order) + new_b_missing_mask = b_missing_mask + if new_b_missing_mask is not None : + new_b_missing_mask = new_b_missing_mask.copy() + new_b_missing_mask = new_b_missing_mask.transpose(new_index_order) + new_a_ignore_mask = ignore_mask_a + if new_a_ignore_mask is not None : + new_a_ignore_mask = new_a_ignore_mask.copy() + new_a_ignore_mask = new_a_ignore_mask.transpose(new_index_order) + new_b_ignore_mask = ignore_mask_b + if new_b_ignore_mask is not None : + new_b_ignore_mask = new_b_ignore_mask.copy() + new_b_ignore_mask = new_b_ignore_mask.transpose(new_index_order) + + # get the shape information on the internal dimensions we're going to combine + case_dimension_original_shape = new_b_data.shape[1:-1] + + # combine the internal dimensions, to figure out what shape things + # will be with the flattened cases + numDimensionsToFlatten = len(case_dimension_original_shape) + sizeAfterFlattened = np.multiply.accumulate(case_dimension_original_shape)[-1] + newShape = (new_a_data.shape[0], sizeAfterFlattened, new_a_data.shape[-1]) + + # flatten the case dimensions + new_a_data = new_a_data.reshape(newShape) + new_b_data = new_b_data.reshape(newShape) + if new_a_missing_mask is not None : + new_a_missing_mask = new_a_missing_mask.reshape(newShape) + if new_b_missing_mask is not None : + new_b_missing_mask = new_b_missing_mask.reshape(newShape) + if new_a_ignore_mask is not None : + new_a_ignore_mask = new_a_ignore_mask.reshape(newShape) + if new_b_ignore_mask is not None : + new_b_ignore_mask = new_b_ignore_mask.reshape(newShape) + + # diff our data + rawDiffData, goodInBothMask, (goodInAMask, goodInBMask), troubleMask, outsideEpsilonMask, \ + (aNotFiniteMask, bNotFiniteMask), (aMissingMask, bMissingMask), (finalAIgnoreMask, finalBIgnoreMask) = diffOutput = \ + diff(new_a_data, new_b_data, epsilon, (new_a_missing_mask, new_b_missing_mask), (new_a_ignore_mask, new_b_ignore_mask)) + + # calculate the rms diffs for all the cases + caseRMSInfo = np.zeros(rawDiffData.shape[:-1], dtype=np.float64) + # TODO, how to do this without a loop? + for caseNum in range(sizeAfterFlattened) : + caseRMSInfo[caseNum] = calculate_root_mean_square(rawDiffData[caseNum], goodMask=goodInBothMask[caseNum]) + + # return the reshaped original data, + # information on the original shape of the case dimension that was flattened, + # the rms diff info, + # and then the full set of info that came out of the diff between the reshaped a and b data + return (new_a_data, new_b_data), case_dimension_original_shape, caseRMSInfo, diffOutput + """ + diffOutput is in the form: + + rawDiffData, goodInBothMask, (goodInAMask, goodInBMask), troubleMask, outsideEpsilonMask, \ + (aNotFiniteMask, bNotFiniteMask), (aMissingMask, bMissingMask), (finalAIgnoreMask, finalBIgnoreMask) + """ + def _get_num_perfect(a, b, ignoreMask=None): numPerfect = 0 if not (ignoreMask is None) : diff --git a/pyglance/glance/figures.py b/pyglance/glance/figures.py new file mode 100644 index 0000000000000000000000000000000000000000..4228719e7acf2336c4f3c9f03485ac77b4eeb435 --- /dev/null +++ b/pyglance/glance/figures.py @@ -0,0 +1,544 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +Plotting routines for different types of figures using matplotlib + +Created by evas Dec 2009. +Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved. +""" + +# these first two lines must stay before the pylab import +import matplotlib +matplotlib.use('Agg') # use the Anti-Grain Geometry rendering engine + +from pylab import * + +from mpl_toolkits.mplot3d import axes3d +import matplotlib.pyplot as plt +from matplotlib import cm +import matplotlib.colors as colors +from matplotlib.ticker import FormatStrFormatter + +from PIL import Image + +import os, sys, logging +import numpy as np +from numpy import ma + +import glance.graphics as maps +import glance.delta as delta +import glance.report as report + +LOG = logging.getLogger(__name__) + +# TODO this value is being used to work around a problem with the contourf +# and how it handles range boundaries. Find a better solution if at all possible. +offsetToRange = 0.0000000000000000001 + +# make an all green color map +greenColorMapData = { + 'red' : ((0.0, 0.00, 0.00), + (1.0, 0.00, 0.00)), + 'green' : ((0.0, 1.00, 1.00), + (1.0, 1.00, 1.00)), + 'blue' : ((0.0, 0.00, 0.00), + (1.0, 0.00, 0.00)) +} +greenColorMap = colors.LinearSegmentedColormap('greenColorMap', greenColorMapData, 256) + +# todo, the use off the offset here is covering a problem with +# contourf hiding data exactly at the end of the range and should +# be removed if a better solution can be found +def _make_range(data_a, invalid_a_mask, num_intervals, offset_to_range=0.0, data_b=None, invalid_b_mask=None) : + """ + get an array with numbers representing the bounds of a set of ranges + that covers all the data present in data_a + (these may be used for plotting the data) + if an offset is passed, the outtermost range will be expanded by that much + if the b data is passed, a total range that encompasses both sets of + data will be used + """ + minVal = delta.min_with_mask(data_a, invalid_a_mask) + maxVal = delta.max_with_mask(data_a, invalid_a_mask) + + # if we have a second set of data, include it in the min/max calculations + if (data_b is not None) : + minVal = min(delta.min_with_mask(data_b, invalid_b_mask), minVal) + maxVal = max(delta.max_with_mask(data_b, invalid_b_mask), maxVal) + + + minVal = minVal - offset_to_range + maxVal = maxVal + offset_to_range + + return np.linspace(minVal, maxVal, num_intervals) + +def _plot_tag_data_simple(tagData) : + """ + This method will plot tag data listed as true in the + tagData mask on the current figure. It is assumed that + the correlation between the mask and the pixel coordinates + is exact (ie. no translation is needed). + + The return will be the number of points plotted or + -1 if no valid tagData was given. + """ + + numTroublePoints = -1 + + # if there are "tag" masks, plot them over the existing map + if not (tagData is None) : + + numTroublePoints = sum(tagData) + + # if we have trouble points, we need to show them + if numTroublePoints > 0: + + # figure out how many bad points there are + totalNumPoints = tagData.size # the number of points + percentBad = (float(numTroublePoints) / float(totalNumPoints)) * 100.0 + LOG.debug('\t\tnumber of trouble points: ' + str(numTroublePoints)) + LOG.debug('\t\tpercent of trouble points: ' + str(percentBad)) + + new_kwargs = {} + new_kwargs['cmap'] = greenColorMap + cleanTagData = ma.array(tagData, mask=~tagData) + p = contourf(cleanTagData, **new_kwargs) + # TODO, need to incorporate plot for small numbers of pts + + # display the number of trouble points on the report if we were passed a set of tag data + troublePtString = '\n\nShowing ' + str(numTroublePoints) + ' Trouble Points' + # if our plot is more complex, add clarification + if numTroublePoints > 0 : + troublePtString = troublePtString + ' in Green' + plt.xlabel(troublePtString) + + return numTroublePoints + +def _plot_tag_data_mapped(bMap, tagData, x, y, addExplinationLabel=True) : + """ + This method will plot the tagged data listed as true in the tagData mask + on the current figure using the given basemap. + + A message will also be added below the map describing the number of + points plotted, unless the addExplinationLabel variable is passed as False. + + The return will be the number of points plotted or + -1 if no valid tagData was given. + + numTroublePoints = _plot_tag_data_mapped(bMap, tagData, x, y) + """ + + numTroublePoints = -1 + + # if there are "tag" masks, plot them over the existing map + if (tagData is not None) and (tagData.size > 0) : + + # look at how many trouble points we have + numTroublePoints = sum(tagData) + neededHighlighting = False + + if numTroublePoints > 0 : + + # pick out the cooridinates of the points we want to plot + newX = np.array(x[tagData]) + newY = np.array(y[tagData]) + + print ("****newX type: " + str(type(newX))) + print ("****newY type: " + str(type(newY))) + + # figure out how many bad points there are + totalNumPoints = x.size # the number of points + percentBad = (float(numTroublePoints) / float(totalNumPoints)) * 100.0 + LOG.debug('\t\tnumber of trouble points: ' + str(numTroublePoints)) + LOG.debug('\t\tpercent of trouble points: ' + str(percentBad)) + + # if there are very few points, make them easier to notice + # by plotting some colored circles underneath them + if (percentBad < 0.25) or (totalNumPoints < 20) : + neededHighlighting = True + p = bMap.plot(newX, newY, 'o', color='#993399', markersize=5) + elif (percentBad < 1.0) or (totalNumPoints < 200) : + neededHighlighting = True + p = bMap.plot(newX, newY, 'o', color='#993399', markersize=3) + + # if there are way too many trouble points, we can't use plot for this + if (numTroublePoints > 1000000) : + new_kwargs = {} + new_kwargs['cmap'] = greenColorMap + p = maps.show_x_y_data(x, y, bMap, data=tagData, **new_kwargs) + else : + # plot our point on top of the existing figure + p = bMap.plot(newX, newY, '.', color='#00FF00', markersize=1) + + if addExplinationLabel : + # display the number of trouble points on the report if we were passed a set of tag data + # I'm not thrilled with this solution for getting it below the labels drawn by the basemap + # but I don't think there's a better one at the moment given matplotlib's workings + troublePtString = '\n\nShowing ' + str(numTroublePoints) + ' Trouble Points' + # if our plot is more complex, add clarification + if numTroublePoints > 0 : + troublePtString = troublePtString + ' in Green' + if neededHighlighting : + troublePtString = troublePtString + '\nwith Purple Circles for Visual Clarity' + plt.xlabel(troublePtString) + + return numTroublePoints + +# build a scatter plot of the x,y points +def create_scatter_plot(dataX, dataY, title, xLabel, yLabel, badMask=None, epsilon=None) : + + # make the figure + figure = plt.figure() + axes = figure.add_subplot(111) + + # if we have "bad" data to plot, pull it out + badX = None + badY = None + if (badMask != None) : + badX = dataX[badMask] + badY = dataY[badMask] + dataX = dataX[~badMask] + dataY = dataY[~badMask] + + # the scatter plot of the good data + axes.plot(dataX, dataY, 'b,', label='within\nepsilon') + + # plot the bad data + numTroublePts = 0 + if (badX is not None) and (badY is not None) and (badMask is not None) : + numTroublePts = badX.shape[0] + LOG.debug('\t\tnumber of trouble points in scatter plot: ' + str(badX.shape[0])) + if numTroublePts > 0 : + axes.plot(badX, badY, 'r,', label='outside\nepsilon') + + # draw the line for the "perfect fit" + xbounds = axes.get_xbound() + xrange = xbounds[1] - xbounds[0] + ybounds = axes.get_ybound() + yrange = ybounds[1] - ybounds[0] + perfect = [max(xbounds[0], ybounds[0]), min(xbounds[1], ybounds[1])] + axes.plot(perfect, perfect, 'k--', label='A = B') + + # now draw the epsilon bound lines if they are visible and the lines won't be the same as A = B + if (not (epsilon is None)) and (epsilon > 0.0) and (epsilon < xrange) and (epsilon < yrange): + # plot the top line + axes.plot([perfect[0], perfect[1] - epsilon], [perfect[0] + epsilon, perfect[1]], '--', color='#00FF00', label='+/-epsilon') + # plot the bottom line + axes.plot([perfect[0] + epsilon, perfect[1]], [perfect[0], perfect[1] - epsilon], '--', color='#00FF00') + + # make a key to explain our plot + # as long as things have been plotted with proper labels they should show up here + axes.legend(loc=0, markerscale=3.0) # Note: at the moment markerscale doesn't seem to work + + # and some informational stuff + axes.set_title(title) + plt.xlabel(xLabel) + plt.ylabel(yLabel) + + # format our axes so they display gracefully + yFormatter = FormatStrFormatter("%4.4g") + axes.yaxis.set_major_formatter(yFormatter) + xFormatter = FormatStrFormatter("%4.4g") + axes.xaxis.set_major_formatter(xFormatter) + + return figure + +# build a histogram figure of the given data with the given title and number of bins +def create_histogram(data, bins, title, xLabel, yLabel, displayStats=False) : + + # make the figure + figure = plt.figure() + axes = figure.add_subplot(111) + + if (data is None) or (len(data) <= 0) : + return figure + + # the histogram of the data + n, bins, patches = plt.hist(data, bins) + + # format our axes so they display gracefully + yFormatter = FormatStrFormatter("%3.3g") + axes.yaxis.set_major_formatter(yFormatter) + xFormatter = FormatStrFormatter("%.4g") + axes.xaxis.set_major_formatter(xFormatter) + + # and some informational stuff + axes.set_title(title) + plt.xlabel(xLabel) + plt.ylabel(yLabel) + + # if stats were passed in, put some of the information on the graph + # the location is in the form x, y (I think) + if displayStats : + # info on the basic stats + tempMask = ones(data.shape,dtype=bool) + tempStats = delta.stats(data, tempMask, None) + medianVal = tempStats['median_diff'] + meanVal = tempStats['mean_diff'] + stdVal = tempStats['std_diff'] + numPts = data.size + + # info on the display of our statistics + xbounds = axes.get_xbound() + numBinsToUse = len(bins) + xrange = xbounds[1] - xbounds[0] + binSize = xrange / float(numBinsToUse) + + # build the display string + statText = ('%1.2e' % numPts) + ' data points' + statText = statText + '\n' + 'mean: ' + report.make_formatted_display_string(meanVal) + statText = statText + '\n' + 'median: ' + report.make_formatted_display_string(medianVal) + statText = statText + '\n' + 'std: ' + report.make_formatted_display_string(stdVal) + statText = statText + '\n\n' + 'bins: ' + report.make_formatted_display_string(numBinsToUse) + statText = statText + '\n' + 'bin size ' + report.make_formatted_display_string(binSize) + + # figure out where to place the text and put it on the figure + centerOfDisplay = xbounds[0] + (float(xrange) / 2.0) + xValToUse = 0.67 + # if most of the values will be on the right, move our text to the left... + if (medianVal > centerOfDisplay) : + xValToUse = 0.17 + figtext(xValToUse, 0.60, statText) + + return figure + +# create a figure including our data mapped onto a map at the lon/lat given +# the colorMap parameter can be used to control the colors the figure is drawn in +# if any masks are passed in the tagData list they will be plotted as an overlays +# set on the existing image +def create_mapped_figure(data, latitude, longitude, baseMapInstance, boundingAxes, title, + invalidMask=None, colorMap=None, tagData=None, + dataRanges=None, dataRangeNames=None, dataRangeColors=None, **kwargs) : + + # make a clean version of our lon/lat + latitudeClean = ma.array(latitude, mask=~invalidMask) + longitudeClean = ma.array(longitude, mask=~invalidMask) + + # build the plot + figure = plt.figure() + axes = figure.add_subplot(111) + + # build extra info to go to the map plotting function + kwargs = { } + + # figure the range for the color bars + # this is controllable with the "dataRanges" parameter for discrete data display + if not (data is None) : + if dataRanges is None : + dataRanges = _make_range(data, invalidMask, 50, offset_to_range=offsetToRange) + else: # make sure the user range will not discard data TODO, find a better way to handle this + dataRanges[0] = dataRanges[0] - offsetToRange + dataRanges[len(dataRanges) - 1] = dataRanges[len(dataRanges) - 1] + offsetToRange + kwargs['levelsToUse'] = dataRanges + if dataRangeColors is not None : + kwargs['colors'] = dataRangeColors # add in the list of colors (may be None) + + # if we've got a color map, pass it to the list of things we want to tell the plotting function + if not (colorMap is None) : + kwargs['cmap'] = colorMap + + # draw our data placed on a map + #bMap, x, y = maps.mapshow(longitudeClean, latitudeClean, data, boundingAxes, **kwargs) + maps.draw_basic_features(baseMapInstance, boundingAxes) + bMap, x, y = maps.show_lon_lat_data(longitudeClean, latitudeClean, baseMapInstance, data=data, **kwargs) + + # and some informational stuff + axes.set_title(title) + # show a generic color bar + doLabelRanges = False + if not (data is None) : + cbar = colorbar(format='%.3f') + # if there are specific requested labels, add them + if not (dataRangeNames is None) : + + # if we don't have exactly the right number of range names to label the ranges + # then label the tick marks + if not (len(dataRangeNames) is (len(dataRanges) - 1)) : + cbar.ax.set_yticklabels(dataRangeNames) + else : # we will want to label the ranges themselves + cbar.ax.set_yticklabels(dataRangeNames) # todo, this line is temporary + doLabelRanges = True + + numTroublePoints = _plot_tag_data_mapped(bMap, tagData, x, y) + + print ('number of trouble points: ' + str(numTroublePoints)) + + # if we still need to label the ranges, do it now that our fake axis won't mess the trouble points up + if doLabelRanges : + """ TODO get this working properly + fakeAx = plt.axes ([0.77, 0.05, 0.2, 0.9], frameon=False) + fakeAx.xaxis.set_visible(False) + fakeAx.yaxis.set_visible(False) + + testRect = Rectangle((0, 0), 1, 1, fc="r") + legendKey = fakeAx.legend([testRect], ["r\n\n\n"], mode="expand", ncol=1, borderaxespad=0.) + """ + + return figure + +# create a figure including a quiver plot of our vector data mapped onto a map at the lon/lat +# given, the colorMap parameter can be used to control the colors the figure is drawn. +# if any masks are passed in the tagData list they will be plotted as an overlays +# set on the existing image +# TODO, this method has not been throughly tested +def create_quiver_mapped_figure(data, latitude, longitude, baseMapInstance, boundingAxes, title, + invalidMask=None, tagData=None, uData=None, vData=None, **kwargs) : + + # make a clean version of our lon/lat/data + latitudeClean = latitude[~invalidMask] + longitudeClean = longitude[~invalidMask] + colorData = None + if (data is not None) : + colorData = data[~invalidMask] + uDataClean = None + vDataClean = None + if (uData is not None) and (vData is not None) : + uDataClean = uData[~invalidMask] + vDataClean = vData[~invalidMask] + tagDataClean = None + if tagData is not None : + tagDataClean = tagData[~invalidMask] + + # build the plot + figure = plt.figure() + axes = figure.add_subplot(111) + + # draw our data placed on a map + maps.draw_basic_features(baseMapInstance, boundingAxes) + bMap, x, y = maps.show_quiver_plot (longitudeClean, latitudeClean, baseMapInstance, (uData, vData), colordata=colorData) + + # show the title + axes.set_title(title) + + numTroublePoints = _plot_tag_data_mapped(bMap, tagDataClean, x, y) + + return figure + +def create_simple_figure(data, figureTitle, invalidMask=None, tagData=None, colorMap=None) : + """ + create a simple figure showing the data given masked by the invalid mask + any tagData passed in will be interpreted as trouble points on the image and plotted as a + filled contour overlay in green on the image + if a colorMap is given it will be used to plot the data, + if not the default colorMap for imshow will be used + """ + + cleanData = ma.array(data, mask=invalidMask) + + # build the plot + figure = plt.figure() + axes = figure.add_subplot(111) + + # build extra info to go to the map plotting function + kwargs = { } + + # if we've got a color map, pass it to the list of things we want to tell the plotting function + if not (colorMap is None) : + kwargs['cmap'] = colorMap + + if (data is not None) and (sum(invalidMask) < invalidMask.size) : + # draw our data + im = imshow(cleanData, **kwargs) + # make a color bar + cbar = colorbar(format='%.3f') + + # and some informational stuff + axes.set_title(figureTitle) + + numTroublePoints = _plot_tag_data_simple(tagData) + + return figure + +def create_line_plot_figure(dataList, figureTitle) : + """ + create a basic line plot of the data vs. it's index, ignoring any invalid data + if tagData is given, under-label those points with green circles + + Each entry in the dataList should be a tupple containing: + (data, invalidMask, colorString, labelName, tagData) + + The color string describes a color for plotting in matplotlib. + The label names will be used for the legend, which will be shown if there is + more than one set of data plotted or if there is tag data plotted. Invalid + masks, colors, and label names may be given as None, in which case no data + will be masked and a default label of "data#" (where # is an arbitrary + unique counter) will be used. + tagData may also be passed as None if tagging is not desired in the output. + """ + + # build the plot + figure = plt.figure() + axes = figure.add_subplot(111) + + # plot each of the data sets + dataSetLabelNumber = 1 + minTagPts = -1 + maxTagPts = -1 + plottedTagData = False + for dataSet, invalidMask, colorString, labelName, tagData in dataList : + + # if we don't have these, set them to defaults + if invalidMask is None : + invalidMask = zeros(dataSet.shape, dtype=bool) + if labelName is None : + labelName = 'data' + str(dataSetLabelNumber) + dataSetLabelNumber = dataSetLabelNumber + 1 + if colorString is None: + colorString = '' + + if (dataSet is not None) and (sum(invalidMask) < invalidMask.size) : + + # if we don't have a real min yet, set it based on the size + if minTagPts < 0 : + minTagPts = dataSet.size + 1 + + indexData = ma.array(range(dataSet.size), mask=invalidMask) + cleanData = ma.array(dataSet, mask=invalidMask) + + # plot the tag data and gather information about it + if tagData is not None : + + plottedTagData = True + numTroublePoints = sum(tagData) + LOG.debug('\t\tnumber of trouble points: ' + str(numTroublePoints)) + if numTroublePoints < minTagPts: + minTagPts = numTroublePoints + if numTroublePoints > maxTagPts : + maxTagPts = numTroublePoints + + # if we have trouble points, we need to show them + if numTroublePoints > 0: + + cleanTagData = ma.array(dataSet, mask=~tagData | invalidMask) + axes.plot(indexData, cleanTagData, 'yo', label='trouble point') + + axes.plot(indexData, cleanData, '-' + colorString, label=labelName) + + # display the number of trouble points on the report if we were passed + # a set of tag data and we were able to compare it to some actual data + if (plottedTagData and (minTagPts >= 0) and (maxTagPts >=0)) : + + troublePtString = '\nMarking ' + + if (minTagPts == maxTagPts) : + troublePtString = troublePtString + str(minTagPts) + ' Trouble Points with Yellow Circles' + else : + troublePtString = (troublePtString + 'between ' + str(minTagPts) + ' and ' + str(maxTagPts) + ' Trouble Points' + + '\non the various data sets (using Yellow Circles)') + + plt.xlabel(troublePtString) + + if (len(dataList) > 1) or plottedTagData : + # make a key to explain our plot + # as long as things have been plotted with proper labels they should show up here + #axes.legend(loc=0, markerscale=3.0) # Note: at the moment markerscale doesn't seem to work # TODO, turn back on? + pass + + # and some informational stuff + axes.set_title(figureTitle) + + return figure + +if __name__=='__main__': + import doctest + doctest.testmod() diff --git a/pyglance/glance/filters.py b/pyglance/glance/filters.py index ba81d09830a96b46083977b472415033b642660d..03b9f553c119b87e1c73f5b69743cd61e57e85bc 100644 --- a/pyglance/glance/filters.py +++ b/pyglance/glance/filters.py @@ -177,7 +177,7 @@ def collapse_to_index(data, index, collapsing_function=np.mean, new_data = new_data.transpose(new_index_order) # find any bad points that we don't want to collapse - bad_mask = np.zeros(new_data.shape, dtype=bool) + bad_mask = ~np.isfinite(new_data) if missing_value is not None : bad_mask[new_data == missing_value] = True if ignore_above_exclusive is not None : @@ -217,8 +217,13 @@ def organize_ipopp_data_into_image(original_ipopp_data, wave_number=None, missin |_|_|_| |_|_|_| - block of the 9 detectors (TODO order? - for the moment assuming increasing linear by line then field of regard) + block of the 9 detectors + + with the index to physical mapping: + + 0 1 2 + 3 4 5 + 6 7 8 for each detector point, if the wave_number was given in the parameters, that specific interferogram data pt will be used diff --git a/pyglance/glance/graphics.py b/pyglance/glance/graphics.py index a6bfa40af61418c148ee93e1b1b49bdfab80e869..c2041007d11d71976def95ed617257e0d73f5304 100644 --- a/pyglance/glance/graphics.py +++ b/pyglance/glance/graphics.py @@ -57,7 +57,7 @@ def create_basemap (lon, lat=None, axis=None, projection='lcc', resolution='i') m = None if projection is 'ortho' : # orthographic projections require this call - m = Basemap(resolution=resolution, projection=projection, + m = Basemap(resolution=resolution, area_thresh=10000., projection=projection, lat_0=lat_mid, lon_0=lon_mid) else : # most of the other projections use this call @@ -84,9 +84,9 @@ def draw_basic_features(baseMapInstance, axis) : lat_top = axis[3] # draw the parallels and meridians - parallels = arange(-80.,90,abs(lat_top - lat_bottom) / 4) + parallels = arange(-80.,90.,abs(lat_top - lat_bottom) / 4.0) baseMapInstance.drawparallels(parallels,labels=[1,0,0,1]) - meridians = arange(0.,360.,abs(lon_left - lon_right) / 4) + meridians = arange(0., 360.,abs(lon_left - lon_right) / 4.0) baseMapInstance.drawmeridians(meridians,labels=[1,0,0,1]) return @@ -108,18 +108,36 @@ def show_x_y_data(x, y, baseMapInstance, data=None, levelsToUse=None, **kwargs) artistsAdded = [ ] # only try to plot the data if there is some - if data!=None: + if data is not None: newX = make_2D_version_if_needed(x, badLonLat) newY = make_2D_version_if_needed(y, badLonLat) newData = make_2D_version_if_needed(data, nan) - if levelsToUse != None : + if levelsToUse is not None : p = baseMapInstance.contourf(newX, newY, newData, levelsToUse, **kwargs) else : p = baseMapInstance.contourf(newX, newY, newData, **kwargs) - # return the original x and y so the match any external data in shape + # return the original x and y so the caller can match any external data in shape + return baseMapInstance, x, y + +def show_quiver_plot (lon, lat, baseMapInstance, (uData, vData)=(None,None), colordata=None, **kwargs) : + """ + Show a quiver plot of the given vector data at the given longitude and latitude + """ + + x, y = baseMapInstance(lon, lat) # translate into the coordinate system of the basemap + + # show the quiver plot if there is data + + if (uData is not None) and (vData is not None) : + if colordata is None: + p = baseMapInstance.quiver(x, y, uData, vData, **kwargs) + else : + p = baseMapInstance.quiver(x, y, uData, vData, colordata, **kwargs) + + # return the original x and y so the caller can match any external data in shape return baseMapInstance, x, y def make_2D_version_if_needed(originalArray, fillValue) : diff --git a/pyglance/glance/io.py b/pyglance/glance/io.py index 0ffd7ba8f37153cfff530a6f13f6fb773324bad2..44deb6cf78fe66968f1f995b2659fc88831a2628 100644 --- a/pyglance/glance/io.py +++ b/pyglance/glance/io.py @@ -200,12 +200,12 @@ class h5(object): variableList = [ ] def testFn (name, obj) : #print ('checking name: ' + name) - print ('object: ' + str(obj)) + #print ('object: ' + str(obj)) if isinstance(obj, h5py.Dataset) : try : tempType = obj.dtype # this is required to provoke a type error for closed data sets - #TODO, why are there closed data sets?!? + LOG.debug ('type: ' + str(tempType)) variableList.append(name) except TypeError : diff --git a/pyglance/glance/plot.py b/pyglance/glance/plot.py index 4f57bf1cfab06cccbc9bbd994ebce6c364b9f557..1acbc3fda43c90cee39426273ad7146399c8d018 100644 --- a/pyglance/glance/plot.py +++ b/pyglance/glance/plot.py @@ -13,6 +13,7 @@ matplotlib.use('Agg') # use the Anti-Grain Geometry rendering engine from pylab import * +from mpl_toolkits.mplot3d import axes3d import matplotlib.pyplot as plt from matplotlib import cm import matplotlib.colors as colors @@ -25,588 +26,18 @@ import numpy as np from numpy import ma import glance.graphics as maps -import glance.delta as delta -import glance.report as report +import glance.delta as delta +import glance.report as report +import glance.figures as figures +import glance.plotcreatefns as plotfns LOG = logging.getLogger(__name__) -# TODO this value is being used to work around a problem with the contourf -# and how it handles range boundaries. Find a better solution if at all possible. -offsetToRange = 0.0000000000000000001 - -# the value that will denote "bad" longitudes and latitudes -badLonLat = maps.badLonLat - # a constant for the larger size dpi fullSizeDPI = 150 # 200 # a constant for the thumbnail size dpi thumbSizeDPI = 50 -# make a custom medium grayscale color map for putting our bad data on top of -mediumGrayColorMapData = { - 'red' : ((0.0, 1.00, 1.00), - (0.5, 0.60, 0.60), - (1.0, 0.20, 0.20)), - 'green' : ((0.0, 1.00, 1.00), - (0.5, 0.60, 0.60), - (1.0, 0.20, 0.20)), - 'blue' : ((0.0, 1.00, 1.00), - (0.5, 0.60, 0.60), - (1.0, 0.20, 0.20)) -} -mediumGrayColorMap = colors.LinearSegmentedColormap('mediumGrayColorMap', mediumGrayColorMapData, 256) - -# make an all green color map -greenColorMapData = { - 'red' : ((0.0, 0.00, 0.00), - (1.0, 0.00, 0.00)), - 'green' : ((0.0, 1.00, 1.00), - (1.0, 1.00, 1.00)), - 'blue' : ((0.0, 0.00, 0.00), - (1.0, 0.00, 0.00)) -} -greenColorMap = colors.LinearSegmentedColormap('greenColorMap', greenColorMapData, 256) - -# build a scatter plot of the x,y points -def _create_scatter_plot(dataX, dataY, title, xLabel, yLabel, badMask=None, epsilon=None) : - - # make the figure - figure = plt.figure() - axes = figure.add_subplot(111) - - # if we have "bad" data to plot, pull it out - badX = None - badY = None - if (badMask != None) : - badX = dataX[badMask] - badY = dataY[badMask] - dataX = dataX[~badMask] - dataY = dataY[~badMask] - - # the scatter plot of the good data - axes.plot(dataX, dataY, 'b,', label='within\nepsilon') - - # plot the bad data - numTroublePts = 0 - if (badMask != None) : - numTroublePts = badX.shape[0] - LOG.debug('\t\tnumber of trouble points in scatter plot: ' + str(badX.shape[0])) - if numTroublePts > 0 : - axes.plot(badX, badY, 'r,', label='outside\nepsilon') - - # draw the line for the "perfect fit" - xbounds = axes.get_xbound() - xrange = xbounds[1] - xbounds[0] - ybounds = axes.get_ybound() - yrange = ybounds[1] - ybounds[0] - perfect = [max(xbounds[0], ybounds[0]), min(xbounds[1], ybounds[1])] - axes.plot(perfect, perfect, 'k--', label='A = B') - - # now draw the epsilon bound lines if they are visible and the lines won't be the same as A = B - if (not (epsilon is None)) and (epsilon > 0.0) and (epsilon < xrange) and (epsilon < yrange): - # plot the top line - axes.plot([perfect[0], perfect[1] - epsilon], [perfect[0] + epsilon, perfect[1]], '--', color='#00FF00', label='+/-epsilon') - # plot the bottom line - axes.plot([perfect[0] + epsilon, perfect[1]], [perfect[0], perfect[1] - epsilon], '--', color='#00FF00') - - # make a key to explain our plot - # as long as things have been plotted with proper labels they should show up here - axes.legend(loc=0, markerscale=3.0) # Note: at the moment markerscale doesn't seem to work - - # and some informational stuff - axes.set_title(title) - plt.xlabel(xLabel) - plt.ylabel(yLabel) - - # format our axes so they display gracefully - yFormatter = FormatStrFormatter("%4.4g") - axes.yaxis.set_major_formatter(yFormatter) - xFormatter = FormatStrFormatter("%4.4g") - axes.xaxis.set_major_formatter(xFormatter) - - return figure - -# build a histogram figure of the given data with the given title and number of bins -def _create_histogram(data, bins, title, xLabel, yLabel, displayStats=False) : - - # make the figure - figure = plt.figure() - axes = figure.add_subplot(111) - - if (data is None) or (len(data) <= 0) : - return figure - - # the histogram of the data - n, bins, patches = plt.hist(data, bins) - - # format our axes so they display gracefully - yFormatter = FormatStrFormatter("%3.3g") - axes.yaxis.set_major_formatter(yFormatter) - xFormatter = FormatStrFormatter("%.4g") - axes.xaxis.set_major_formatter(xFormatter) - - # and some informational stuff - axes.set_title(title) - plt.xlabel(xLabel) - plt.ylabel(yLabel) - - # if stats were passed in, put some of the information on the graph - # the location is in the form x, y (I think) - if displayStats : - # info on the basic stats - tempMask = ones(data.shape,dtype=bool) - tempStats = delta.stats(data, tempMask, None) - medianVal = tempStats['median_diff'] - meanVal = tempStats['mean_diff'] - stdVal = tempStats['std_diff'] - numPts = data.size - - # info on the display of our statistics - xbounds = axes.get_xbound() - numBinsToUse = len(bins) - xrange = xbounds[1] - xbounds[0] - binSize = xrange / float(numBinsToUse) - - # build the display string - statText = ('%1.2e' % numPts) + ' data points' - statText = statText + '\n' + 'mean: ' + report.make_formatted_display_string(meanVal) - statText = statText + '\n' + 'median: ' + report.make_formatted_display_string(medianVal) - statText = statText + '\n' + 'std: ' + report.make_formatted_display_string(stdVal) - statText = statText + '\n\n' + 'bins: ' + report.make_formatted_display_string(numBinsToUse) - statText = statText + '\n' + 'bin size ' + report.make_formatted_display_string(binSize) - - # figure out where to place the text and put it on the figure - centerOfDisplay = xbounds[0] + (float(xrange) / 2.0) - xValToUse = 0.67 - # if most of the values will be on the right, move our text to the left... - if (medianVal > centerOfDisplay) : - xValToUse = 0.17 - figtext(xValToUse, 0.60, statText) - - return figure - -# make a "clean" copy of the longitude or latitude array passed in by replacing all -# values which would fall in the provided "invalid" mask with the default bad-lon-lat -# value, which will keep them from being plotted -def _clean_lon_or_lat_with_mask(lon_or_lat_data, invalid_data_mask): - clean_data = empty_like(lon_or_lat_data) - clean_data[~invalid_data_mask] = lon_or_lat_data[~invalid_data_mask] - clean_data[ invalid_data_mask] = badLonLat - - return clean_data - -# chose a projection based on the bounding axes that will be shown -def _select_projection(boundingAxes) : - - # TODO at the moment the default (lcc) is cutting off the field of view, - # I think the same problem will occur with all of the conic projections, because - # they all allow you to specify the field of view either as corners or as a width/height in - # meters, but they do not take the distortion that a conic projection causes into account. - # This means that either the corners or the bottom curve of the data area will be clipped off - # the edge of the screen. There is also some sort of persistent bug that causes the basemap - # to ignore the requested corners for some data sets and shift west and north, cutting off - # a pretty considerable amount of data. I've tried various tactics to control the field of - # view and can't find any way to get the basemap to show an acceptable area programatically - # that will match arbitrary data sets. - # For the moment, I am setting this to use a cylindrical projection rather than a conic. - # At some point in the future this should be revisited so that glance will be able to handle - # a wider range of projections. - projToUse = 'mill' - - # how big is the field of view? - longitudeRange = abs(boundingAxes[1] - boundingAxes[0]) - latitudeRange = abs(boundingAxes[3] - boundingAxes[2]) - # chose the projection based on the range we have to cover - if (longitudeRange > 180) : - projToUse = 'mill' # use a miller cylindrical projection to show the whole world - elif (longitudeRange > 100) or (latitudeRange > 70) : - projToUse = 'ortho' # use an orthographic projection to show about half the globe - - return projToUse - -# todo, the use off the offset here is covering a problem with -# contourf hiding data exactly at the end of the range and should -# be removed if a better solution can be found -def _make_range(data_a, invalid_a_mask, num_intervals, offset_to_range=0.0, data_b=None, invalid_b_mask=None) : - """ - get an array with numbers representing the bounds of a set of ranges - that covers all the data present in data_a - (these may be used for plotting the data) - if an offset is passed, the outtermost range will be expanded by that much - if the b data is passed, a total range that encompasses both sets of - data will be used - """ - minVal = delta.min_with_mask(data_a, invalid_a_mask) - maxVal = delta.max_with_mask(data_a, invalid_a_mask) - - # if we have a second set of data, include it in the min/max calculations - if (data_b is not None) : - minVal = min(delta.min_with_mask(data_b, invalid_b_mask), minVal) - maxVal = max(delta.max_with_mask(data_b, invalid_b_mask), maxVal) - - - minVal = minVal - offset_to_range - maxVal = maxVal + offset_to_range - - return np.linspace(minVal, maxVal, num_intervals) - - -# create a figure including our data mapped onto a map at the lon/lat given -# the colorMap parameter can be used to control the colors the figure is drawn in -# if any masks are passed in the tagData list they will be plotted as an overlays -# set on the existing image -def _create_mapped_figure(data, latitude, longitude, baseMapInstance, boundingAxes, title, - invalidMask=None, colorMap=None, tagData=None, - dataRanges=None, dataRangeNames=None, dataRangeColors=None) : - - # make a clean version of our lon/lat - latitudeClean = _clean_lon_or_lat_with_mask(latitude, invalidMask) - longitudeClean = _clean_lon_or_lat_with_mask(longitude, invalidMask) - - # build the plot - figure = plt.figure() - axes = figure.add_subplot(111) - - # build extra info to go to the map plotting function - kwargs = {} - - # figure the range for the color bars - # this is controllable with the "dataRanges" parameter for discrete data display - if not (data is None) : - if dataRanges is None : - dataRanges = _make_range(data, invalidMask, 50, offset_to_range=offsetToRange) - else: # make sure the user range will not discard data TODO, find a better way to handle this - dataRanges[0] = dataRanges[0] - offsetToRange - dataRanges[len(dataRanges) - 1] = dataRanges[len(dataRanges) - 1] + offsetToRange - kwargs['levelsToUse'] = dataRanges - if dataRangeColors is not None : - kwargs['colors'] = dataRangeColors # add in the list of colors (may be None) - - # if we've got a color map, pass it to the list of things we want to tell the plotting function - if not (colorMap is None) : - kwargs['cmap'] = colorMap - - # draw our data placed on a map - #bMap, x, y = maps.mapshow(longitudeClean, latitudeClean, data, boundingAxes, **kwargs) - maps.draw_basic_features(baseMapInstance, boundingAxes) - bMap, x, y = maps.show_lon_lat_data(longitudeClean, latitudeClean, baseMapInstance, data, **kwargs) - - # and some informational stuff - axes.set_title(title) - # show a generic color bar - doLabelRanges = False - if not (data is None) : - cbar = colorbar(format='%.3f') - # if there are specific requested labels, add them - if not (dataRangeNames is None) : - - # if we don't have exactly the right number of range names to label the ranges - # then label the tick marks - if not (len(dataRangeNames) is (len(dataRanges) - 1)) : - cbar.ax.set_yticklabels(dataRangeNames) - else : # we will want to label the ranges themselves - cbar.ax.set_yticklabels(dataRangeNames) # todo, this line is temporary - doLabelRanges = True - - # if there are "tag" masks, plot them over the existing map - if not (tagData is None) : - - # pick out the cooridinates of the points we want to plot - newX = x[tagData] - newY = y[tagData] - - - # look at how many trouble points we have - numTroublePoints = newX.size - hasTrouble = False - neededHighlighting = False - - if numTroublePoints > 0 : - hasTrouble = True - # figure out how many bad points there are - totalNumPoints = x.size # the number of points - percentBad = (float(numTroublePoints) / float(totalNumPoints)) * 100.0 - LOG.debug('\t\tnumber of trouble points: ' + str(numTroublePoints)) - LOG.debug('\t\tpercent of trouble points: ' + str(percentBad)) - - # if there are very few points, make them easier to notice - # by plotting some colored circles underneath them - if (percentBad < 0.25) : - neededHighlighting = True - p = bMap.plot(newX, newY, 'o', color='#993399', markersize=5) - elif (percentBad < 1.0) : - neededHighlighting = True - p = bMap.plot(newX, newY, 'o', color='#993399', markersize=3) - - # if there are way too many trouble points, we can't use plot for this - if (numTroublePoints > 1000000) : - new_kwargs = {} - new_kwargs['cmap'] = greenColorMap - p = maps.show_x_y_data(x, y, bMap, data=tagData, **new_kwargs) - else : - # plot our point on top of the existing figure - p = bMap.plot(newX, newY, '.', color='#00FF00', markersize=1) - - # display the number of trouble points on the report if we were passed a set of tag data - # I'm not thrilled with this solution for getting it below the labels drawn by the basemap - # but I don't think there's a better one at the moment given matplotlib's workings - troublePtString = '\n\nShowing ' + str(numTroublePoints) + ' Trouble Points' - # if our plot is more complex, add clarification - if hasTrouble : - troublePtString = troublePtString + ' in Green' - if neededHighlighting : - troublePtString = troublePtString + '\nwith Purple Circles for Visual Clarity' - plt.xlabel(troublePtString) - - # if we still need to label the ranges, do it now that our fake axis won't mess the trouble points up - if doLabelRanges : - """ TODO get this working properly - fakeAx = plt.axes ([0.77, 0.05, 0.2, 0.9], frameon=False) - fakeAx.xaxis.set_visible(False) - fakeAx.yaxis.set_visible(False) - - testRect = Rectangle((0, 0), 1, 1, fc="r") - legendKey = fakeAx.legend([testRect], ["r\n\n\n"], mode="expand", ncol=1, borderaxespad=0.) - """ - - return figure - -def _create_simple_figure(data, figureTitle, invalidMask=None, tagData=None, colorMap=None) : - """ - create a simple figure showing the data given masked by the invalid mask - any tagData passed in will be interpreted as trouble points on the image and plotted as a - filled contour overlay in green on the image - if a colorMap is given it will be used to plot the data, - if not the default colorMap for imshow will be used - """ - - cleanData = ma.array(data, mask=invalidMask) - - # build the plot - figure = plt.figure() - axes = figure.add_subplot(111) - - # build extra info to go to the map plotting function - kwargs = { } - - # if we've got a color map, pass it to the list of things we want to tell the plotting function - if not (colorMap is None) : - kwargs['cmap'] = colorMap - - if (data is not None) and (sum(invalidMask) < invalidMask.size) : - # draw our data - im = imshow(cleanData, **kwargs) - # make a color bar - cbar = colorbar(format='%.3f') - - # and some informational stuff - axes.set_title(figureTitle) - - # if there are "tag" masks, plot them over the existing map - if not (tagData is None) : - - numTroublePoints = sum(tagData) - - # if we have trouble points, we need to show them - if numTroublePoints > 0: - - # figure out how many bad points there are - totalNumPoints = tagData.size # the number of points - percentBad = (float(numTroublePoints) / float(totalNumPoints)) * 100.0 - LOG.debug('\t\tnumber of trouble points: ' + str(numTroublePoints)) - LOG.debug('\t\tpercent of trouble points: ' + str(percentBad)) - - - new_kwargs = {} - new_kwargs['cmap'] = greenColorMap - cleanTagData = ma.array(tagData, mask=~tagData) - p = contourf(cleanTagData, **new_kwargs) - - # display the number of trouble points on the report if we were passed a set of tag data - troublePtString = '\n\nShowing ' + str(numTroublePoints) + ' Trouble Points' - # if our plot is more complex, add clarification - if numTroublePoints > 0 : - troublePtString = troublePtString + ' in Green' - plt.xlabel(troublePtString) - - return figure - -def _create_line_plot_figure(dataList, figureTitle, tagData=None) : - """ - create a basic line plot of the data vs. it's index, ignoring any invalid data - if tagData is given, under-label those points with green circles - - Each entry in the dataList should be a tupple containing: - (data, invalidMask, colorString, labelName) - - The color string describes a color for plotting in matplotlib. - The label names will be used for the legend, which will be shown if there is - more than one set of data plotted or if there is tag data plotted. Invalid - masks, colors, and label names may be given as None, in which case no data - will be masked and a default label of "data#" (where # is an arbitrary - unique counter) will be used. - """ - - # build the plot - figure = plt.figure() - axes = figure.add_subplot(111) - - # plot each of the data sets - dataSetLabelNumber = 1 - minTagPts = -1 - maxTagPts = -1 - for dataSet, invalidMask, colorString, labelName in dataList : - - # if we don't have these, set them to defaults - if invalidMask is None : - invalidMask = zeros(dataSet.shape, dtype=bool) - if labelName is None : - labelName = 'data' + str(dataSetLabelNumber) - dataSetLabelNumber = dataSetLabelNumber + 1 - if colorString is None: - colorString = '' - - if (dataSet is not None) and (sum(invalidMask) < invalidMask.size) : - - # if we don't have a real min yet, set it based on the size - if minTagPts < 0 : - minTagPts = dataSet.size + 1 - - indexData = ma.array(range(dataSet.size), mask=invalidMask) - cleanData = ma.array(dataSet, mask=invalidMask) - - # plot the tag data and gather information about it - if tagData is not None : - - numTroublePoints = sum(tagData) - LOG.debug('\t\tnumber of trouble points: ' + str(numTroublePoints)) - if numTroublePoints < minTagPts: - minTagPts = numTroublePoints - if numTroublePoints > maxTagPts : - maxTagPts = numTroublePoints - - # if we have trouble points, we need to show them - if numTroublePoints > 0: - - cleanTagData = ma.array(dataSet, mask=~tagData | invalidMask) - axes.plot(indexData, cleanTagData, 'yo', label='trouble point') - - axes.plot(indexData, cleanData, '-' + colorString, label=labelName) - - # display the number of trouble points on the report if we were passed - # a set of tag data and we were able to compare it to some actual data - if ((tagData is not None) and (minTagPts >= 0) and (maxTagPts >=0)) : - - troublePtString = '\nMarking ' - - if (minTagPts == maxTagPts) : - troublePtString = troublePtString + str(minTagPts) + ' Trouble Points with Yellow Circles' - else : - troublePtString = (troublePtString + 'between ' + str(minTagPts) + ' and ' + str(maxTagPts) + ' Trouble Points' - + '\non the various data sets (using Yellow Circles)') - - plt.xlabel(troublePtString) - - if (len(dataList) > 1) or (tagData is not None) : - # make a key to explain our plot - # as long as things have been plotted with proper labels they should show up here - axes.legend(loc=0, markerscale=3.0) # Note: at the moment markerscale doesn't seem to work - - # and some informational stuff - axes.set_title(figureTitle) - - return figure - -# figure out the bounding axes for the display given a set of -# longitude and latitude and possible a mask of invalid values -# that we should ignore in them -def _get_visible_axes(longitudeData, latitudeData, toIgnoreMask) : - - # calculate the bounding range for the display - # this is in the form [longitude min, longitude max, latitude min, latitude max] - visibleAxes = [delta.min_with_mask(longitudeData, toIgnoreMask), - delta.max_with_mask(longitudeData, toIgnoreMask), - delta.min_with_mask(latitudeData, toIgnoreMask), - delta.max_with_mask(latitudeData, toIgnoreMask)] - - return visibleAxes - -def spectral_diff_plot( mean_diff, std_diff, max_diff, min_diff, acceptable_diff=None, x=None ): - """plot spectral difference in current axes, wrapped in std - >>> x = arange(0,9,0.1) - >>> y1 = sin(x) - >>> y2 = sin(x+0.05) - >>> d = y2-y1 - >>> s = std(d) - >>> spectral_diff_plot( d, s, array([0.1] * len(d)), x=x ) - """ - cla() - if x is None: x = range(len(mean_diff)) - if acceptable_diff is not None: - plot(x, acceptable_diff, 'g.', hold=True, alpha=0.2) - plot(x, -acceptable_diff, 'g.', hold=True, alpha=0.2) - plot(x, mean_diff+std_diff, 'r', hold=True, alpha=0.5) - plot(x,min_diff,'b') - plot(x,max_diff,'b') - plot(x, mean_diff-std_diff, 'r', hold=True, alpha=0.5) - plot(x, mean_diff, 'k', hold=True) - -def compare_spectra(actual, desired=None, acceptable=None, x=None): - """ given an actual[spectrum][wnum], desired[spectrum][wnum], plot comparisons of differences - """ - delta = actual-desired if (desired is not None) else actual - d_mean = mean(delta,axis=0) - d_max = delta.max(axis=0) - d_min = delta.min(axis=0) - d_std = std(delta,axis=0) - des_mean = mean(desired,axis=0) - subplot(211) - cla() - if x is None: x = range(len(des_mean)) - # plot(x,des_mean+d_max,'b') - # plot(x,des_mean+d_min,'b') - plot(x,des_mean,'k') - grid() - title("mean spectrum") - subplot(212) - spectral_diff_plot(d_mean, d_std, d_max, d_min, acceptable, x) - grid() - title("difference min-max (blue), mean (black), mean +/- std (red)") - -def plot_and_save_spacial_trouble(longitude, latitude, - spacialTroubleMask, spaciallyInvalidMask, - fileNameDiscriminator, title, fileBaseName, outputPath, makeSmall=False) : - """ - given information on spatially placed trouble points in A and B, plot only those points in a very obvious way - on top of a background plot of a's data shown in grayscale, save this plot to the output path given - if makeSmall is passed as true a smaller version of the image will also be saved - """ - - # get the bounding axis and make a basemap - boundingAxes = _get_visible_axes(longitude, latitude, spaciallyInvalidMask) - LOG.debug("Visible axes for lon/lat trouble figure are: " + str(boundingAxes)) - baseMapInstance, boundingAxes = maps.create_basemap(longitude, latitude, boundingAxes, _select_projection(boundingAxes)) - - # make the figure - LOG.info("Creating spatial trouble image") - spatialTroubleFig = _create_mapped_figure(None, latitude, longitude, baseMapInstance, boundingAxes, - title, spaciallyInvalidMask, None, spacialTroubleMask) - # save the figure - LOG.info("Saving spatial trouble image") - spatialTroubleFig.savefig(outputPath + "/" + fileBaseName + "." + fileNameDiscriminator + ".png", dpi=fullSizeDPI) - - # we may also save a smaller versions of the figure - if (makeSmall) : - - spatialTroubleFig.savefig(outputPath + "/" + fileBaseName + "." + fileNameDiscriminator + ".small.png", dpi=thumbSizeDPI) - - # clear the figure - spatialTroubleFig.clf() - plt.close(spatialTroubleFig) - del(spatialTroubleFig) - - return - def _handle_fig_creation_task(child_figure_function, log_message, outputPath, fullFigName, shouldMakeSmall, doFork) : @@ -650,7 +81,7 @@ def _handle_fig_creation_task(child_figure_function, log_message, # if we didn't fork, return the 0 pid to indicate that return pid -def _log_spawn_and_wait_if_needed (imageDescription, childPids, imageList, +def _log_spawn_and_wait_if_needed (imageDescription, childPids, taskFunction, taskOutputPath, taskFigName, doMakeThumb=True, doFork=False, shouldClearMemoryWithThreads=False) : """ @@ -674,212 +105,45 @@ def _log_spawn_and_wait_if_needed (imageDescription, childPids, imageList, LOG.debug ("Started child process (pid: " + str(pid) + ") to create image of " + imageDescription) else : os.waitpid(pid, 0) - imageList.append(taskFigName) return -def make_geolocated_plotting_functions(aData, bData, - absDiffData, rawDiffData, - variableDisplayName, - epsilon, - goodInAMask, goodInBMask, goodMask, - troubleMask, outsideEpsilonMask, - - # parameters that are only needed for geolocated data - lonLatDataDict, - shouldUseSharedRangeForOriginal, - dataRanges, dataRangeNames, dataColors) : - - functionsToReturn = { } - - # TODO, possibly remove this section of definitions in the future? - latitudeAData = lonLatDataDict['a']['lat'] - longitudeAData = lonLatDataDict['a']['lon'] - latitudeBData = lonLatDataDict['b']['lat'] - longitudeBData = lonLatDataDict['b']['lon'] - latitudeCommonData = lonLatDataDict['common']['lat'] - longitudeCommonData = lonLatDataDict['common']['lon'] - spaciallyInvalidMaskA = lonLatDataDict['a']['inv_mask'] - spaciallyInvalidMaskB = lonLatDataDict['b']['inv_mask'] - - # figure out the bounding axis - aAxis = _get_visible_axes(longitudeAData, latitudeAData, ~goodInAMask) - bAxis = _get_visible_axes(longitudeBData, latitudeBData, ~goodInBMask) - fullAxis = [min(aAxis[0], bAxis[0]), max(aAxis[1], bAxis[1]), - min(aAxis[2], bAxis[2]), max(aAxis[3], bAxis[3])] - LOG.debug("Visible axes for file A variable data (" + variableDisplayName + ") are: " + str(aAxis)) - LOG.debug("Visible axes for file B variable data (" + variableDisplayName + ") are: " + str(bAxis)) - LOG.debug("Visible axes shared for both file's variable data (" + variableDisplayName + ") are: " + str(fullAxis)) - - if (fullAxis[0] is None) or (fullAxis[1] is None) or (fullAxis[2] is None) or (fullAxis[3] is None) : - LOG.warn("Unable to display figures for variable (" + variableDisplayName + ") because of inability to identify" + - " usable bounding longitude and latitude range on the earth. Bounding range that was identified:" + str(fullAxis)) - return # TODO, the figures need to be disabled from the report and possibly a warning on the report? - - # create our basemap - LOG.info('\t\tloading base map data') - baseMapInstance, fullAxis = maps.create_basemap(longitudeCommonData, latitudeCommonData, fullAxis, _select_projection(fullAxis)) - - # figure out the shared range for A and B's data, by default don't share a range - shared_range = None - if (shouldUseSharedRangeForOriginal) : - shared_range = _make_range(aData, ~goodInAMask, 50, offset_to_range=offsetToRange, - data_b=bData, invalid_b_mask=~goodInBMask) - - # make the plotting functions - - functionsToReturn['originalA'] = (lambda : _create_mapped_figure(aData, latitudeAData, longitudeAData, - baseMapInstance, fullAxis, - (variableDisplayName + "\nin File A"), - invalidMask=(~goodInAMask), - dataRanges=dataRanges or shared_range, - dataRangeNames=dataRangeNames, - dataRangeColors=dataColors)) - functionsToReturn['originalB'] = (lambda : _create_mapped_figure(bData, latitudeBData, longitudeBData, - baseMapInstance, fullAxis, - (variableDisplayName + "\nin File B"), - invalidMask=(~ goodInBMask), - dataRanges=dataRanges or shared_range, - dataRangeNames=dataRangeNames, - dataRangeColors=dataColors)) - - functionsToReturn['absDiff'] = (lambda : _create_mapped_figure(absDiffData, - latitudeCommonData, longitudeCommonData, - baseMapInstance, fullAxis, - ("Absolute value of difference in\n" + variableDisplayName), - invalidMask=(~ goodMask))) - functionsToReturn['subDiff'] = (lambda : _create_mapped_figure(rawDiffData, latitudeCommonData, longitudeCommonData, - baseMapInstance, fullAxis, - ("Value of (Data File B - Data File A) for\n" + variableDisplayName), - invalidMask=(~ goodMask))) - # this is not an optimal solution, but we need to have at least somewhat valid data at any mismatched points so - # that our plot won't be totally destroyed by missing or non-finite data from B - bDataCopy = bData[:] - tempMask = goodInAMask & (~goodInBMask) - bDataCopy[tempMask] = aData[tempMask] - functionsToReturn['trouble'] = (lambda : _create_mapped_figure(bDataCopy, latitudeCommonData, longitudeCommonData, - baseMapInstance, fullAxis, - ("Areas of trouble data in\n" + variableDisplayName), - (~(goodInAMask | goodInBMask)), - mediumGrayColorMap, troubleMask, - dataRanges=dataRanges, - dataRangeNames=dataRangeNames)) - - # setup the data bins for the histogram - numBinsToUse = 50 - valuesForHist = rawDiffData[goodMask] - functionsToReturn['histogram'] = (lambda : _create_histogram(valuesForHist, numBinsToUse, - ("Difference in\n" + variableDisplayName), - ('Value of (Data File B - Data File A) at a Data Point'), - ('Number of Data Points with a Given Difference'), - True)) - functionsToReturn['scatter'] = (lambda : _create_scatter_plot(aData[goodMask], bData[goodMask], - "Value in File A vs Value in File B", - "File A Value", "File B Value", - outsideEpsilonMask[goodMask], - epsilon)) - - return functionsToReturn - -def make_pure_data_plotting_functions (aData, bData, - absDiffData, rawDiffData, - variableDisplayName, - epsilon, - goodInAMask, goodInBMask, goodMask, - troubleMask, outsideEpsilonMask, - - # additional parameters this function isn't using - lonLatDataDict=None, - shouldUseSharedRangeForOriginal=False, - dataRanges=None, dataRangeNames=None, dataColors=None) : - - functionsToReturn = { } - - functionsToReturn['originalA'] = (lambda: _create_simple_figure(aData, variableDisplayName + "\nin File A", - invalidMask=~goodInAMask)) - functionsToReturn['originalB'] = (lambda: _create_simple_figure(bData, variableDisplayName + "\nin File B", - invalidMask=~goodInBMask)) - - functionsToReturn['absDiff'] = (lambda: _create_simple_figure(absDiffData, - "Absolute value of difference in\n" + variableDisplayName, - invalidMask=~goodMask)) - functionsToReturn['subDiff'] = (lambda: _create_simple_figure(rawDiffData, - "Value of (Data File B - Data File A) for\n" + variableDisplayName, - invalidMask=~goodMask)) - # this is not an optimal solution, but we need to have at least somewhat valid data at any mismatched points so - # that our plot won't be totally destroyed by missing or non-finite data from B - bDataCopy = bData[:] - tempMask = goodInAMask & (~goodInBMask) - bDataCopy[tempMask] = aData[tempMask] - functionsToReturn['trouble'] = (lambda: _create_simple_figure(bDataCopy, "Areas of trouble data in\n" + variableDisplayName, - invalidMask=~(goodInAMask | goodInBMask), tagData=troubleMask, - colorMap=mediumGrayColorMap)) - - # set up the bins and data for a histogram of the values of fileA - file B - numBinsToUse = 50 - valuesForHist = rawDiffData[goodMask] - functionsToReturn['histogram'] = (lambda : _create_histogram(valuesForHist, numBinsToUse, - ("Difference in\n" + variableDisplayName), - ('Value of (Data File B - Data File A) at a Data Point'), - ('Number of Data Points with a Given Difference'), - True)) - functionsToReturn['scatter'] = (lambda : _create_scatter_plot(aData[goodMask], bData[goodMask], - "Value in File A vs Value in File B", - "File A Value", "File B Value", - outsideEpsilonMask[goodMask], - epsilon)) - - return functionsToReturn - -def make_line_plot_plotting_functions (aData, bData, - absDiffData, rawDiffData, - variableDisplayName, - epsilon, - goodInAMask, goodInBMask, goodMask, - troubleMask, outsideEpsilonMask, - - # additional parameters this function isn't using - lonLatDataDict=None, - shouldUseSharedRangeForOriginal=False, - dataRanges=None, dataRangeNames=None, dataColors=None) : +def plot_and_save_spacial_trouble(longitude, latitude, + spacialTroubleMask, spaciallyInvalidMask, + fileNameDiscriminator, title, fileBaseName, outputPath, makeSmall=False) : + """ + given information on spatially placed trouble points in A and B, plot only those points in a very obvious way + on top of a background plot of a's data shown in grayscale, save this plot to the output path given + if makeSmall is passed as true a smaller version of the image will also be saved + """ - functionsToReturn = { } + # get the bounding axis and make a basemap + boundingAxes = plotfns.get_visible_axes(longitude, latitude, spaciallyInvalidMask) + LOG.debug("Visible axes for lon/lat trouble figure are: " + str(boundingAxes)) + baseMapInstance, boundingAxes = maps.create_basemap(longitude, latitude, boundingAxes, plotfns.select_projection(boundingAxes)) - functionsToReturn['originalA'] = (lambda: _create_line_plot_figure([(aData, ~goodInAMask, 'r', 'A data'), - (bData, ~goodInBMask, 'b', 'B data')], - variableDisplayName + "\nin File A")) - #functionsToReturn['originalA'] = (lambda: _create_line_plot_figure(aData, variableDisplayName + "\nin File A", - # invalidMask=~goodInAMask)) - functionsToReturn['originalB'] = (lambda: _create_line_plot_figure([(bData, ~goodInBMask, 'b', 'B data')], - variableDisplayName + "\nin File B")) + # make the figure + LOG.info("Creating spatial trouble image") + spatialTroubleFig = figures.create_mapped_figure(None, latitude, longitude, baseMapInstance, boundingAxes, + title, invalidMask=spaciallyInvalidMask, tagData=spacialTroubleMask) + # save the figure + LOG.info("Saving spatial trouble image") + spatialTroubleFig.savefig(outputPath + "/" + fileBaseName + "." + fileNameDiscriminator + ".png", dpi=fullSizeDPI) - functionsToReturn['absDiff'] = (lambda: _create_line_plot_figure([(absDiffData, ~goodMask, 'k', 'abs. diff.')], - "Absolute value of difference in\n" + variableDisplayName)) - functionsToReturn['subDiff'] = (lambda: _create_line_plot_figure([(rawDiffData, ~goodMask, 'k', 'raw diff.')], - "Value of (Data File B - Data File A) for\n" + variableDisplayName)) - functionsToReturn['trouble'] = (lambda: _create_line_plot_figure([(aData, ~goodInAMask, 'r', 'A data'), - (bData, ~goodInBMask, 'b', 'B data')], - "Areas of trouble data in\n" + variableDisplayName, - tagData=troubleMask)) + # we may also save a smaller versions of the figure + if (makeSmall) : + + spatialTroubleFig.savefig(outputPath + "/" + fileBaseName + "." + fileNameDiscriminator + ".small.png", dpi=thumbSizeDPI) - # set up the bins and data for a histogram of the values of fileA - file B - numBinsToUse = 50 - valuesForHist = rawDiffData[goodMask] - functionsToReturn['histogram'] = (lambda : _create_histogram(valuesForHist, numBinsToUse, - ("Difference in\n" + variableDisplayName), - ('Value of (Data File B - Data File A) at a Data Point'), - ('Number of Data Points with a Given Difference'), - True)) - functionsToReturn['scatter'] = (lambda : _create_scatter_plot(aData[goodMask], bData[goodMask], - "Value in File A vs Value in File B", - "File A Value", "File B Value", - outsideEpsilonMask[goodMask], - epsilon)) + # get rid of the figure + spatialTroubleFig.clf() + plt.close(spatialTroubleFig) + del(spatialTroubleFig) - return functionsToReturn + return def plot_and_save_comparison_figures (aData, bData, - plottingFunctionGenerationFunction, + plottingFunctionFactoryObjects, outputPath, variableDisplayName, epsilon, @@ -894,12 +158,10 @@ def plot_and_save_comparison_figures (aData, bData, doFork=False, shouldClearMemoryWithThreads=False, shouldUseSharedRangeForOriginal=False, - doPlotOriginal=True, - doPlotAbsDiff=True, - doPlotSubDiff=True, - doPlotTrouble=True, - doPlotHistogram=True, - doPlotScatter=True) : + doPlotSettingsDict={ }, + aUData=None, aVData=None, + bUData=None, bVData=None, + binIndex=None, tupleIndex=None) : """ Plot images for a set of figures based on the data sets and settings passed in. The images will be saved to disk according to the settings. @@ -936,13 +198,14 @@ def plot_and_save_comparison_figures (aData, bData, images to label the variable being analyzed epsilon - the epsilon that should be used for the data comparison missingValue - the missing value that should be used for the data comparison + plottingFunctionFactoryObjects - a list of objects that will generate the plotting + functions; each object in the list must be an + instance of a child of PlottingFunctionFactory optional parameters: missingValueAltInB - an alternative missing value in the B file; if None is given then the missingValue will be used for the B file - plottingFunction - the plotting function that will be used to make the plots, - defaults to using basemap to place your data on the globe makeSmall - should small "thumbnail" images be made for each large image? shortCircuitComparisons - should the comparison plots be disabled? doFork - should the process fork to create new processes to create each @@ -951,18 +214,14 @@ def plot_and_save_comparison_figures (aData, bData, memory bloating? ** shouldUseSharedRangeForOriginal - should the original images share an all-inclusive data range? - doPlotOriginal - should the plots of the two original data sets be made? - doPlotAbsDiff - should the plot of the absolute difference comparison image be made? - doPlotSubDiff - should the plot of the subtractive difference comparison image be made? - doPlotTrouble - should the plot of the trouble point image be made? - doPlotHistogram - should the plot of the historgram be made? - doPlotScatter - should the plot of the scatter plot be made? + doPlotSettingsDict - a dictionary containting settings to turn off individual plots + if an entry for a plot is not pressent or set to True, + then the plot will be created ** May fail due to a known bug on MacOSX systems. """ # lists to hold information on the images we make - # TODO, this will interact poorly with doFork original_images = [ ] compared_images = [ ] @@ -973,7 +232,7 @@ def plot_and_save_comparison_figures (aData, bData, # figure out if we have spatially invalid masks to consider spaciallyInvalidMaskA = None spaciallyInvalidMaskB = None - if lonLatDataDict is not None: + if (lonLatDataDict is not None) and (len(lonLatDataDict.keys()) > 0): spaciallyInvalidMaskA = lonLatDataDict['a']['inv_mask'] spaciallyInvalidMaskB = lonLatDataDict['b']['inv_mask'] @@ -984,87 +243,65 @@ def plot_and_save_comparison_figures (aData, bData, (spaciallyInvalidMaskA, spaciallyInvalidMaskB)) absDiffData = np.abs(rawDiffData) # we also want to show the distance between our two, not just which one's bigger/smaller - LOG.debug("Visible axes will not be calculated for variable data (" + variableDisplayName - + ") due to lack of lon/lat comparison.") - # from this point on, we will be forking to create child processes so we can parallelize our image and # report generation isParent = True childPids = [ ] - # generate our plotting functions - plottingFunctions = plottingFunctionGenerationFunction (aData, bData, - absDiffData, rawDiffData, - variableDisplayName, - epsilon, - goodInAMask, goodInBMask, goodMask, - troubleMask, outsideEpsilonMask, - # below this comment are parameters only needed - # for geolocated images - lonLatDataDict, - shouldUseSharedRangeForOriginal, - dataRanges, dataRangeNames, dataColors) + plottingFunctions = { } - # only plot the two original data plots if they haven't been turned off - if doPlotOriginal : - - _log_spawn_and_wait_if_needed(variableDisplayName + " in file a", childPids, original_images, - plottingFunctions['originalA'], - outputPath, "A.png", - makeSmall, doFork, shouldClearMemoryWithThreads) + for factoryObject in plottingFunctionFactoryObjects : - _log_spawn_and_wait_if_needed(variableDisplayName + " in file b", childPids, original_images, - plottingFunctions['originalB'], - outputPath, "B.png", - makeSmall, doFork, shouldClearMemoryWithThreads) - - # this is the end of the if to plot the original data + # generate our plotting functions + moreFunctions = factoryObject.create_plotting_functions ( + # the most basic data set needed + aData, bData, + variableDisplayName, + epsilon, + goodInAMask, goodInBMask, + doPlotSettingsDict, + + # where the names of the created figures will be stored + original_images, compared_images, + + # parameters that are only needed for geolocated data + lonLatDataDict=lonLatDataDict, + + # only used if we are plotting a contour + dataRanges=dataRanges, dataRangeNames=dataRangeNames, + dataColors=dataColors, + shouldUseSharedRangeForOriginal=shouldUseSharedRangeForOriginal, + + # parameters that are only used if the data can be compared + # point by point + absDiffData=absDiffData, rawDiffData=rawDiffData, + goodInBothMask=goodMask, + troubleMask=troubleMask, outsideEpsilonMask=outsideEpsilonMask, + + # only used for plotting quiver data + aUData=aUData, aVData=aVData, + bUData=bUData, bVData=bVData, + + # only used for line plots TODO + binIndex=0, tupleIndex=1 # TODO, temporary binIndex=None, tupleIndex=None + ) + plottingFunctions.update(moreFunctions) - # make the data comparison figures - if not shortCircuitComparisons : - - # only plot the absolute difference if if it hasn't been turned off - if doPlotAbsDiff : - - _log_spawn_and_wait_if_needed("absolute value of difference in " + variableDisplayName, childPids, compared_images, - plottingFunctions['absDiff'], - outputPath, "AbsDiff.png", - makeSmall, doFork, shouldClearMemoryWithThreads) - - # only plot the difference plot if it hasn't been turned off - if doPlotSubDiff : - - _log_spawn_and_wait_if_needed("the difference in " + variableDisplayName, childPids, compared_images, - plottingFunctions['subDiff'], - outputPath, "Diff.png", - makeSmall, doFork, shouldClearMemoryWithThreads) - - # only plot the trouble plot if it hasn't been turned off - if doPlotTrouble : - - _log_spawn_and_wait_if_needed("trouble data in " + variableDisplayName, childPids, compared_images, - plottingFunctions['trouble'], - outputPath, "Trouble.png", - makeSmall, doFork, shouldClearMemoryWithThreads) + LOG.debug ('plotting function information: ' + str(plottingFunctions)) + + # for each function in the list, run it to create the figure + for figDesc in sorted(list(plottingFunctions.keys())) : - # only plot the histogram if it hasn't been turned off - if doPlotHistogram : - - _log_spawn_and_wait_if_needed("histogram of the amount of difference in " + variableDisplayName, childPids, compared_images, - plottingFunctions['histogram'], - outputPath, "Hist.png", - makeSmall, doFork, shouldClearMemoryWithThreads) + LOG.info("plotting " + figDesc + " figure for " + variableDisplayName) + # returnDictionary['descriptive name'] = (function, title, file_name, list_this_figure_should_go_into) + figFunction, figLongDesc, figFileName, outputInfoList = plottingFunctions[figDesc] - # only plot the scatter plot if it hasn't been turned off - if doPlotScatter : - - # scatter plot of file a vs file b values - - _log_spawn_and_wait_if_needed("scatter plot of file a values vs file b values for " + variableDisplayName, - childPids, compared_images, - plottingFunctions['scatter'], - outputPath, "Scatter.png", + # only plot the compared images if we aren't short circuiting them + if (outputInfoList is not compared_images) or (not shortCircuitComparisons) : + _log_spawn_and_wait_if_needed(figLongDesc, childPids, figFunction, outputPath, figFileName, makeSmall, doFork, shouldClearMemoryWithThreads) + # if we made an attempt to make the file, hang onto the name + outputInfoList.append(figFileName) # now we need to wait for all of our child processes to terminate before returning if (isParent) : # just in case diff --git a/pyglance/glance/plotcreatefns.py b/pyglance/glance/plotcreatefns.py new file mode 100644 index 0000000000000000000000000000000000000000..fa6996efb2b46278a95904fa3ad2ce6ead0ddb72 --- /dev/null +++ b/pyglance/glance/plotcreatefns.py @@ -0,0 +1,945 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +Plotting figure generation functions. + +Created by evas Dec 2009. +Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved. +""" + +# these first two lines must stay before the pylab import +import matplotlib +matplotlib.use('Agg') # use the Anti-Grain Geometry rendering engine + +from pylab import * + +from mpl_toolkits.mplot3d import axes3d +import matplotlib.pyplot as plt +from matplotlib import cm +import matplotlib.colors as colors +from matplotlib.ticker import FormatStrFormatter + +from PIL import Image + +import os, sys, logging +import numpy as np +from numpy import ma + +import glance.graphics as maps +import glance.delta as delta +import glance.report as report +import glance.filters as filters # TODO, should this be moved? +import glance.figures as figures + +LOG = logging.getLogger(__name__) + +# a constant for the larger size dpi +fullSizeDPI = 150 # 200 +# a constant for the thumbnail size dpi +thumbSizeDPI = 50 + +# make a custom medium grayscale color map for putting our bad data on top of +mediumGrayColorMapData = { + 'red' : ((0.0, 1.00, 1.00), + (0.5, 0.60, 0.60), + (1.0, 0.20, 0.20)), + 'green' : ((0.0, 1.00, 1.00), + (0.5, 0.60, 0.60), + (1.0, 0.20, 0.20)), + 'blue' : ((0.0, 1.00, 1.00), + (0.5, 0.60, 0.60), + (1.0, 0.20, 0.20)) +} +mediumGrayColorMap = colors.LinearSegmentedColormap('mediumGrayColorMap', mediumGrayColorMapData, 256) + +# ********************* Section of utility functions *********************** + +# a method to stop people from calling my fake abstract methods +def _abstract( ) : + raise NotImplementedError('Method must be implemented in subclass.') + +# figure out the bounding axes for the display given a set of +# longitude and latitude and possible a mask of invalid values +# that we should ignore in them +def get_visible_axes(longitudeData, latitudeData, toIgnoreMask) : + + # calculate the bounding range for the display + # this is in the form [longitude min, longitude max, latitude min, latitude max] + visibleAxes = [delta.min_with_mask(longitudeData, toIgnoreMask), + delta.max_with_mask(longitudeData, toIgnoreMask), + delta.min_with_mask(latitudeData, toIgnoreMask), + delta.max_with_mask(latitudeData, toIgnoreMask)] + + return visibleAxes + +def select_projection(boundingAxes) : + """ + chose a map projection based on the bounding axes that will be shown + """ + + # TODO at the moment the default (lcc) is cutting off the field of view, + # I think the same problem will occur with all of the conic projections, because + # they all allow you to specify the field of view either as corners or as a width/height in + # meters, but they do not take the distortion that a conic projection causes into account. + # This means that either the corners or the bottom curve of the data area will be clipped off + # the edge of the screen. There is also some sort of persistent bug that causes the basemap + # to ignore the requested corners for some data sets and shift west and north, cutting off + # a pretty considerable amount of data. I've tried various tactics to control the field of + # view and can't find any way to get the basemap to show an acceptable area programatically + # that will match arbitrary data sets. + # For the moment, I am setting this to use a cylindrical projection rather than a conic. + # At some point in the future this should be revisited so that glance will be able to handle + # a wider range of projections. + projToUse = 'cyl' + + # how big is the field of view? + longitudeRange = abs(boundingAxes[1] - boundingAxes[0]) + latitudeRange = abs(boundingAxes[3] - boundingAxes[2]) + # chose the projection based on the range we have to cover + if (longitudeRange > 180) : + projToUse = 'cyl' # use a equidistant cylindrical projection to show the whole world + elif (longitudeRange > 100) or (latitudeRange > 70) : + projToUse = 'ortho' # use an orthographic projection to show about half the globe + + return projToUse + +def _make_axis_and_basemap(lonLatDataDict, goodInAMask, goodInBMask, shouldUseSharedRangeForOriginal=False, variableDisplayName=None) : + """ + Determine the appropriate axes for the given data (in longitude and latitude) and create the appropriate basemap and shared + range information. + + fullAxis, baseMapInstance, sharedRange = _make_axis_and_basemap(lonLatDataDict, goodInAMask, goodInBMask, + shouldUseSharedRangeForOriginal=False) + """ + + nameMessage = '' + if variableDisplayName is not None: + nameMessage = " (" + variableDisplayName + ")" + + # figure out the bounding axis + aAxis = get_visible_axes(lonLatDataDict['a']['lon'], lonLatDataDict['a']['lat'], ~goodInAMask) + bAxis = get_visible_axes(lonLatDataDict['b']['lon'], lonLatDataDict['b']['lat'], ~goodInBMask) + fullAxis = [min(aAxis[0], bAxis[0]), max(aAxis[1], bAxis[1]), + min(aAxis[2], bAxis[2]), max(aAxis[3], bAxis[3])] + + LOG.debug("Visible axes for file A variable data" + nameMessage + " are: " + str(aAxis)) + LOG.debug("Visible axes for file B variable data" + nameMessage + " are: " + str(bAxis)) + LOG.debug("Visible axes shared for both file's variable data" + nameMessage + " are: " + str(fullAxis)) + + if (fullAxis[0] is None) or (fullAxis[1] is None) or (fullAxis[2] is None) or (fullAxis[3] is None) : + LOG.warn("Unable to display figures for variable" + nameMessage + " because of inability to identify" + + " usable bounding longitude and latitude range on the earth. Bounding range that was identified:" + str(fullAxis)) + return # TODO, the figures need to be disabled from the report and possibly a warning on the report? throw exception instead? + + # create our basemap + LOG.info('\t\tloading base map data') + baseMapInstance, fullAxis = maps.create_basemap(lonLatDataDict['common']['lon'], lonLatDataDict['common']['lat'], + fullAxis, select_projection(fullAxis)) + + # figure out the shared range for A and B's data, by default don't share a range + sharedRange = None + if (shouldUseSharedRangeForOriginal) : + sharedRange = figures._make_range(aData, ~goodInAMask, 50, offset_to_range=figures.offsetToRange, + data_b=bData, invalid_b_mask=~goodInBMask) + + return fullAxis, baseMapInstance, sharedRange + +# a method to make a list of index numbers for reordering a multi-dimensional array +def _make_new_index_list(numberOfIndexes, firstIndexNumber=0, lastIndexNumber=None) : + """ + the first and last index numbers represent the dimensions you want to be first and last (respectively) + when the list is reordered; any other indexes will retain their relative ordering + + newIndexList = _make_new_index_list(numIndexes, binIndex, tupleIndex) + + TODO, move this to delta? + """ + + if lastIndexNumber is None: + lastIndexNumber = numberOfIndexes - 1 + + newIndexList = range(numberOfIndexes) + maxSpecial = max(firstIndexNumber, lastIndexNumber) + minSpecial = min(firstIndexNumber, lastIndexNumber) + del(newIndexList[maxSpecial]) + del(newIndexList[minSpecial]) + newIndexList = [firstIndexNumber] + newIndexList + [lastIndexNumber] + + return newIndexList + +# ********************* Section of public classes *********************** + +""" +this class is intended to be a parent class for our plotting function +creation classes +it contains a "fake" "abstract" method +""" +class PlottingFunctionFactory : + + def create_plotting_functions ( + + self, + + # the most basic data set needed + aData, bData, + variableDisplayName, + epsilon, + goodInAMask, goodInBMask, + doPlotSettingsDict, + + # where the names of the created figures will be stored + original_fig_list, compared_fig_list, + + # parameters that are only needed for geolocated data + lonLatDataDict=None, + + # only used if we are plotting a contour + dataRanges=None, dataRangeNames=None, dataColors=None, + shouldUseSharedRangeForOriginal=True, + + # parameters that are only used if the data can be compared + # point by point + absDiffData=None, rawDiffData=None, + goodInBothMask=None, + troubleMask=None, outsideEpsilonMask=None, + + # only used for plotting quiver data + aUData=None, aVData=None, + bUData=None, bVData=None, + + # only used for line plots + binIndex=None, tupleIndex=None + + ) : _abstract + +""" +This class creates the most basic of comparison plots based on two similarly +sized data sets. (Plots created include histogram and scatter plots.) +""" +class BasicComparisonPlotsFunctionFactory (PlottingFunctionFactory) : + def create_plotting_functions ( + self, + + # the most basic data set needed + aData, bData, + variableDisplayName, + epsilon, + goodInAMask, goodInBMask, + doPlotSettingsDict, + + # where the names of the created figures will be stored + original_fig_list, compared_fig_list, + + # parameters that are only needed for geolocated data + lonLatDataDict=None, + + # only used if we are plotting a contour + dataRanges=None, dataRangeNames=None, dataColors=None, + shouldUseSharedRangeForOriginal=True, + + # parameters that are only used if the data can be compared + # point by point + absDiffData=None, rawDiffData=None, + goodInBothMask=None, + troubleMask=None, outsideEpsilonMask=None, + + # only used for plotting quiver data + aUData=None, aVData=None, + bUData=None, bVData=None, + + # only used for line plots + binIndex=None, tupleIndex=None + ) : + + functionsToReturn = { } + + # make the histogram plot + if ('do_plot_histogram' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_histogram']) : + + assert(goodInBothMask.shape == rawDiffData.shape) + + # setup the data bins for the histogram + numBinsToUse = 50 + valuesForHist = rawDiffData[goodInBothMask] + functionsToReturn['histogram'] = ((lambda : figures.create_histogram(valuesForHist, numBinsToUse, + ("Difference in\n" + variableDisplayName), + ('Value of (Data File B - Data File A) at a Data Point'), + ('Number of Data Points with a Given Difference'), + True)), + "histogram of the amount of difference in " + variableDisplayName, + "Hist.png", compared_fig_list) + # make the scatter plot + if ('do_plot_scatter' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_scatter']) : + + assert(aData.shape == bData.shape) + assert(bData.shape == goodInBothMask.shape) + assert(goodInBothMask.shape == outsideEpsilonMask.shape) + + functionsToReturn['scatter'] = ((lambda : figures.create_scatter_plot(aData[goodInBothMask], bData[goodInBothMask], + "Value in File A vs Value in File B", + "File A Value", "File B Value", + outsideEpsilonMask[goodInBothMask], + epsilon)), + "scatter plot of file a values vs file b values for " + variableDisplayName, + "Scatter.png", compared_fig_list) + + return functionsToReturn + +""" +This class creates contour plots mapped onto a region of the earth. +""" +class MappedContourPlotFunctionFactory (PlottingFunctionFactory) : + def create_plotting_functions ( + self, + + # the most basic data set needed + aData, bData, + variableDisplayName, + epsilon, + goodInAMask, goodInBMask, + doPlotSettingsDict, + + # where the names of the created figures will be stored + original_fig_list, compared_fig_list, + + # parameters that are only needed for geolocated data + lonLatDataDict=None, + + # only used if we are plotting a contour + dataRanges=None, dataRangeNames=None, dataColors=None, + shouldUseSharedRangeForOriginal=True, + + # parameters that are only used if the data can be compared + # point by point + absDiffData=None, rawDiffData=None, + goodInBothMask=None, + troubleMask=None, outsideEpsilonMask=None, + + # only used for plotting quiver data + aUData=None, aVData=None, + bUData=None, bVData=None, + + # only used for line plots + binIndex=None, tupleIndex=None + ) : + + # the default for plotting geolocated data + mappedPlottingFunction = figures.create_mapped_figure + + functionsToReturn = { } + + assert(lonLatDataDict is not None) + assert(goodInAMask is not None) + assert(goodInBMask is not None) + + fullAxis, baseMapInstance, sharedRange = _make_axis_and_basemap(lonLatDataDict, + goodInAMask, goodInBMask, + shouldUseSharedRangeForOriginal, + variableDisplayName) + + # make the plotting functions + + # make the original data plots + if ('do_plot_originals' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_originals']) : + + assert('a' in lonLatDataDict) + assert('lat' in lonLatDataDict['a']) + assert('lon' in lonLatDataDict['a']) + assert(lonLatDataDict['a']['lat'].shape == lonLatDataDict['a']['lon'].shape) + + functionsToReturn['originalA'] = ((lambda : mappedPlottingFunction(aData, + lonLatDataDict['a']['lat'], + lonLatDataDict['a']['lon'], + baseMapInstance, fullAxis, + (variableDisplayName + "\nin File A"), + invalidMask=(~goodInAMask), + dataRanges=dataRanges or sharedRange, + dataRangeNames=dataRangeNames, + dataRangeColors=dataColors)), + variableDisplayName + " in file a", + "A.png", original_fig_list) + + assert('b' in lonLatDataDict) + assert('lat' in lonLatDataDict['b']) + assert('lon' in lonLatDataDict['b']) + assert(lonLatDataDict['b']['lat'].shape == lonLatDataDict['b']['lon'].shape) + + functionsToReturn['originalB'] = ((lambda : mappedPlottingFunction(bData, + lonLatDataDict['b']['lat'], + lonLatDataDict['b']['lon'], + baseMapInstance, fullAxis, + (variableDisplayName + "\nin File B"), + invalidMask=(~goodInBMask), + dataRanges=dataRanges or sharedRange, + dataRangeNames=dataRangeNames, + dataRangeColors=dataColors)), + variableDisplayName + " in file b", + "B.png", original_fig_list) + + # make the absolute value difference plot + if ('do_plot_abs_diff' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_abs_diff']) : + + assert(absDiffData.shape == goodInBothMask.shape) + assert('common' in lonLatDataDict) + assert('lat' in lonLatDataDict['common']) + assert('lon' in lonLatDataDict['common']) + assert(lonLatDataDict['common']['lat'].shape == lonLatDataDict['common']['lon'].shape) + assert(lonLatDataDict['common']['lon'].shape == absDiffData.shape) + + functionsToReturn['diffAbs'] = ((lambda : mappedPlottingFunction(absDiffData, + lonLatDataDict['common']['lat'], + lonLatDataDict['common']['lon'], + baseMapInstance, fullAxis, + ("Absolute value of difference in\n" + + variableDisplayName), + invalidMask=(~goodInBothMask))), + "absolute value of difference in " + variableDisplayName, + "AbsDiff.png", compared_fig_list) + # make the subtractive difference plot + if ('do_plot_sub_diff' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_sub_diff']) : + + assert(rawDiffData.shape == goodInBothMask.shape) + assert('common' in lonLatDataDict) + assert('lat' in lonLatDataDict['common']) + assert('lon' in lonLatDataDict['common']) + assert(lonLatDataDict['common']['lat'].shape == lonLatDataDict['common']['lon'].shape) + assert(lonLatDataDict['common']['lon'].shape == rawDiffData.shape) + + functionsToReturn['diffSub'] = ((lambda : mappedPlottingFunction(rawDiffData, + lonLatDataDict['common']['lat'], + lonLatDataDict['common']['lon'], + baseMapInstance, fullAxis, + ("Value of (Data File B - Data File A) for\n" + + variableDisplayName), + invalidMask=(~goodInBothMask))), + "the difference in " + variableDisplayName, + "Diff.png", compared_fig_list) + # make the trouble data plot + if ('do_plot_trouble' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_trouble']) : + + assert(aData.shape == bData.shape) + assert(goodInAMask.shape == goodInBMask.shape) + assert('common' in lonLatDataDict) + assert('lat' in lonLatDataDict['common']) + assert('lon' in lonLatDataDict['common']) + assert(lonLatDataDict['common']['lat'].shape == lonLatDataDict['common']['lon'].shape) + assert(lonLatDataDict['common']['lon'].shape == aData.shape) + + # this is not an optimal solution, but we need to have at least somewhat valid data at any mismatched points so + # that our plot won't be totally destroyed by missing or non-finite data from B + bDataCopy = bData[:] + tempMask = goodInAMask & (~goodInBMask) + bDataCopy[tempMask] = aData[tempMask] + functionsToReturn['trouble'] = ((lambda : mappedPlottingFunction(bDataCopy, + lonLatDataDict['common']['lat'], + lonLatDataDict['common']['lon'], + baseMapInstance, fullAxis, + ("Areas of trouble data in\n" + variableDisplayName), + invalidMask=(~(goodInAMask | goodInBMask)), + colorMap=mediumGrayColorMap, tagData=troubleMask, + dataRanges=dataRanges, + dataRangeNames=dataRangeNames)), # TODO, does this need modification? + "trouble data in " + variableDisplayName, + "Trouble.png", compared_fig_list) + + return functionsToReturn + +""" +This class creates quiver plots mapped onto a region of the earth. +Note: the plotting function requires u and v data for both data sets, but +the size of the two data sets is not required to be the same. If the size +of the two data sets is the same, additional comparison plots will be +created. +TODO, this class has not been fully tested +""" +class MappedQuiverPlotFunctionFactory (PlottingFunctionFactory) : + def create_plotting_functions ( + self, + + # the most basic data set needed + aData, bData, + variableDisplayName, + epsilon, + goodInAMask, goodInBMask, + doPlotSettingsDict, + + # where the names of the created figures will be stored + original_fig_list, compared_fig_list, + + # parameters that are only needed for geolocated data + lonLatDataDict=None, + + # only used if we are plotting a contour + dataRanges=None, dataRangeNames=None, dataColors=None, + shouldUseSharedRangeForOriginal=True, + + # parameters that are only used if the data can be compared + # point by point + absDiffData=None, rawDiffData=None, + goodInBothMask=None, + troubleMask=None, outsideEpsilonMask=None, + + # only used for plotting quiver data + aUData=None, aVData=None, + bUData=None, bVData=None, + + # only used for line plots + binIndex=None, tupleIndex=None + ) : + + # the default for plotting geolocated data + mappedPlottingFunction = figures.create_quiver_mapped_figure + + functionsToReturn = { } + + assert(aUData is not None) + assert(aVData is not None) + assert(bUData is not None) + assert(bVData is not None) + + assert(lonLatDataDict is not None) + assert(goodInAMask is not None) + assert(goodInBMask is not None) + + fullAxis, baseMapInstance, _ = _make_axis_and_basemap(lonLatDataDict, goodInAMask, goodInBMask, variableDisplayName=variableDisplayName) + + # make the plotting functions + + # make the original data plots + if ('do_plot_originals' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_originals']) : + + assert('a' in lonLatDataDict) + assert('lat' in lonLatDataDict['a']) + assert('lon' in lonLatDataDict['a']) + assert(lonLatDataDict['a']['lat'].shape == lonLatDataDict['a']['lon'].shape) + + functionsToReturn['originalA'] = ((lambda : mappedPlottingFunction(aData, + lonLatDataDict['a']['lat'], + lonLatDataDict['a']['lon'], + baseMapInstance, fullAxis, + (variableDisplayName + "\nin File A"), + invalidMask=(~goodInAMask), + uData=aUData, vData=aVData)), + variableDisplayName + " in file a", + "A.png", original_fig_list) + + assert('b' in lonLatDataDict) + assert('lat' in lonLatDataDict['b']) + assert('lon' in lonLatDataDict['b']) + assert(lonLatDataDict['b']['lat'].shape == lonLatDataDict['b']['lon'].shape) + + functionsToReturn['originalB'] = ((lambda : mappedPlottingFunction(bData, + lonLatDataDict['b']['lat'], + lonLatDataDict['b']['lon'], + baseMapInstance, fullAxis, + (variableDisplayName + "\nin File B"), + invalidMask=(~ goodInBMask), + uData=bUData, vData=bVData)), + variableDisplayName + " in file b", + "B.png", original_fig_list) + + # TODO, any additional figures of the original data? + + if (aUData.shape == bUData.shape) : + LOG.info("creating comparison quiver plots for " + variableDisplayName) + + # TODO, is there any complication in taking the diff of vectors this way? + diffUData = aUData - bUData + diffVData = aVData - bVData + + # make the absolute value difference plot + if ('do_plot_abs_diff' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_abs_diff']) : + + assert(absDiffData.shape == goodInBothMask.shape) + assert('common' in lonLatDataDict) + assert('lat' in lonLatDataDict['common']) + assert('lon' in lonLatDataDict['common']) + assert(lonLatDataDict['common']['lat'].shape == lonLatDataDict['common']['lon'].shape) + assert(lonLatDataDict['common']['lon'].shape == absDiffData.shape) + + functionsToReturn['diffAbs'] = ((lambda : mappedPlottingFunction(absDiffData, + lonLatDataDict['common']['lat'], + lonLatDataDict['common']['lon'], + baseMapInstance, fullAxis, + ("Absolute value of difference in\n" + + variableDisplayName), + invalidMask=(~ goodInBothMask), + uData=diffUData, vData=diffVData)), + "absolute value of difference in " + variableDisplayName, + "AbsDiff.png", compared_fig_list) + # make the subtractive difference plot + if ('do_plot_sub_diff' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_sub_diff']) : + + assert(rawDiffData.shape == goodInBothMask.shape) + assert('common' in lonLatDataDict) + assert('lat' in lonLatDataDict['common']) + assert('lon' in lonLatDataDict['common']) + assert(lonLatDataDict['common']['lat'].shape == lonLatDataDict['common']['lon'].shape) + assert(lonLatDataDict['common']['lon'].shape == rawDiffData.shape) + + functionsToReturn['diffSub'] = ((lambda : mappedPlottingFunction(rawDiffData, + lonLatDataDict['common']['lat'], + lonLatDataDict['common']['lon'], + baseMapInstance, fullAxis, + ("Value of (Data File B - Data File A) for\n" + + variableDisplayName), + invalidMask=(~ goodInBothMask), + uData=diffUData, vData=diffVData)), + "the difference in " + variableDisplayName, + "Diff.png", compared_fig_list) + # make the trouble data plot + if ('do_plot_trouble' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_trouble']) : + + assert(aData.shape == bData.shape) + assert(goodInAMask.shape == goodInBMask.shape) + assert('common' in lonLatDataDict) + assert('lat' in lonLatDataDict['common']) + assert('lon' in lonLatDataDict['common']) + assert(lonLatDataDict['common']['lat'].shape == lonLatDataDict['common']['lon'].shape) + assert(lonLatDataDict['common']['lon'].shape == aData.shape) + + # this is not an optimal solution, but we need to have at least somewhat valid data at any mismatched points so + # that our plot won't be totally destroyed by missing or non-finite data from B + bDataCopy = bData[:] + tempMask = goodInAMask & (~goodInBMask) + bDataCopy[tempMask] = aData[tempMask] + functionsToReturn['trouble'] = ((lambda : mappedPlottingFunction(bDataCopy, + lonLatDataDict['common']['lat'], + lonLatDataDict['common']['lon'], + baseMapInstance, fullAxis, + ("Areas of trouble data in\n" + variableDisplayName), + invalidMask=(~(goodInAMask | goodInBMask)), + colorMap=mediumGrayColorMap, tagData=troubleMask, + dataRanges=dataRanges, + dataRangeNames=dataRangeNames, + # TODO, does this need modification? + uData=bUData, vData=bVData)), + "trouble data in " + variableDisplayName, + "Trouble.png", compared_fig_list) + + return functionsToReturn + + +""" +This class creates simple line plots based on simple one dimentional data. +""" +class LinePlotsFunctionFactory (PlottingFunctionFactory) : + def create_plotting_functions ( + self, + + # the most basic data set needed + aData, bData, + variableDisplayName, + epsilon, + goodInAMask, goodInBMask, + doPlotSettingsDict, + + # where the names of the created figures will be stored + original_fig_list, compared_fig_list, + + # parameters that are only needed for geolocated data + lonLatDataDict=None, + + # only used if we are plotting a contour + dataRanges=None, dataRangeNames=None, dataColors=None, + shouldUseSharedRangeForOriginal=True, + + # parameters that are only used if the data can be compared + # point by point + absDiffData=None, rawDiffData=None, + goodInBothMask=None, + troubleMask=None, outsideEpsilonMask=None, + + # only used for plotting quiver data + aUData=None, aVData=None, + bUData=None, bVData=None, + + # only used for line plots + binIndex=None, tupleIndex=None + ) : + """ + This method generates line plotting functions for one dimensional data + and returns them in a dictionary of tupples, where each tupple is in the form: + + returnDictionary['descriptive name'] = (function, title, file_name, list_this_figure_should_go_into) + + The file name is only the name of the file, not the full path. + """ + assert(aData is not None) + assert(bData is not None) + # TODO, assert about more of the data + + if len(aData.shape) > 1 : + assert(binIndex is not None) + assert(tupleIndex is not None) + assert(binIndex is not tupleIndex) + + numIndexes = len(aData.shape) + assert(binIndex < numIndexes) + assert(tupleIndex < numIndexes) + + # TODO, the rest of this function is not totally refactored but should be functional for now + + # TODO, temporary + aList = [ ] + bList = [ ] + singleAList = [ ] + singleBList = [ ] + absDiffList = [ ] + subDiffList = [ ] + troubleSingleList = [ ] + troubleFullList = [ ] + if len(aData.shape) > 1 : + + newIndexList = _make_new_index_list(numIndexes, binIndex, tupleIndex) + aData = aData.transpose(newIndexList) + bData = bData.transpose(newIndexList) + goodInAMask = goodInAMask.transpose(newIndexList) + goodInBMask = goodInBMask.transpose(newIndexList) + absDiffData = absDiffData.transpose(newIndexList) + rawDiffData = rawDiffData.transpose(newIndexList) + goodInBothMask = goodInBothMask.transpose(newIndexList) + troubleMask = troubleMask.transpose(newIndexList) + outsideEpsilonMask = outsideEpsilonMask.transpose(newIndexList) + + for firstIndexValue in range(aData.shape[0]) : + # get the a and b data with it's masks + aSlice = filters.collapse_to_index(aData[firstIndexValue], (numIndexes - 1), missing_value=-999.0, ignore_below_exclusive=-990.0) + bSlice = filters.collapse_to_index(bData[firstIndexValue], (numIndexes - 1), missing_value=-999.0, ignore_below_exclusive=-990.0) + goodInAMaskSlice = filters.collapse_to_index(goodInAMask[firstIndexValue], (numIndexes - 1), collapsing_function=(lambda data: any(data))) + goodInBMaskSlice = filters.collapse_to_index(goodInBMask[firstIndexValue], (numIndexes - 1), collapsing_function=(lambda data: any(data))) + + # add to the plain a and b lists + aList.append((aSlice, ~goodInAMaskSlice, 'r', 'A data, ' + str(firstIndexValue), None)) + bList.append((bSlice, ~goodInBMaskSlice, 'b', 'B data, ' + str(firstIndexValue), None)) + + # deal with the trouble data and it's list + troubleMaskSlice = filters.collapse_to_index(troubleMask[firstIndexValue], (numIndexes - 1), collapsing_function=(lambda data: any(data))) + troubleFullList.append((aSlice, ~goodInAMaskSlice, 'r', 'A data, ' + str(firstIndexValue), troubleMaskSlice)) + troubleFullList.append((bSlice, ~goodInBMaskSlice, 'b', 'B data, ' + str(firstIndexValue), troubleMaskSlice)) + + # get the diff data with it's masks + absDiffDataSlice = filters.collapse_to_index(absDiffData[firstIndexValue], (numIndexes - 1), missing_value=-999.0, ignore_below_exclusive=-990.0) + rawDiffDataSlice = filters.collapse_to_index(rawDiffData[firstIndexValue], (numIndexes - 1), missing_value=-999.0, ignore_below_exclusive=-990.0) + goodMaskSlice = filters.collapse_to_index(goodInBothMask[firstIndexValue], (numIndexes - 1), collapsing_function=(lambda data: any(data))) + + # add to the diff data lists + absDiffList.append((absDiffDataSlice, ~goodMaskSlice, '', 'abs. diff. data, ' + str(firstIndexValue), None)) + subDiffList.append((rawDiffDataSlice, ~goodMaskSlice, '', 'sub. diff. data, ' + str(firstIndexValue), None)) + + # also collapse the masks + troubleMask = filters.collapse_to_index(troubleMask, numIndexes, collapsing_function=(lambda data: any(data))) + #outsideEpsilonMask = filters.collapse_to_index(outsideEpsilonMask, numIndexes, collapsing_function=(lambda data: any(data))) + #goodInBothMask = filters.collapse_to_index(goodInBothMask, numIndexes, collapsing_function=(lambda data: any(data))) + + # make some averages + aDataAvg = filters.collapse_to_index(aData, numIndexes, missing_value=-999.0, ignore_below_exclusive=-990.0) + bDataAvg = filters.collapse_to_index(bData, numIndexes, missing_value=-999.0, ignore_below_exclusive=-990.0) + goodInAMaskAvg = filters.collapse_to_index(goodInAMask, numIndexes, collapsing_function=(lambda data: any(data))) + goodInBMaskAvg = filters.collapse_to_index(goodInBMask, numIndexes, collapsing_function=(lambda data: any(data))) + + # make our averaged sets + singleAList = [(aDataAvg, ~goodInAMaskAvg, 'r', 'A data', None)] + singleBList = [(bDataAvg, ~goodInBMaskAvg, 'b', 'B data', None)] + troubleSingleList = [(aDataAvg, ~goodInAMaskAvg, 'r', 'A data', troubleMask), + (bDataAvg, ~goodInBMaskAvg, 'b', 'B data', troubleMask)] + else : + aList = [(aData, ~goodInAMask, 'r', 'A data', None)] + bList = [(bData, ~goodInBMask, 'b', 'B data', None)] + absDiffList = [(absDiffData, ~goodInBothMask, '', 'abs. diff. data', None)] + subDiffList = [(rawDiffData, ~goodInBothMask, '', 'sub. diff. data', None)] + + singleAList = aList + singleBList = bList + + troubleFullList = [(aData, ~goodInAMask, 'r', 'A data', troubleMask), + (bData, ~goodInBMask, 'b', 'B data', troubleMask)] + troubleSingleList = troubleFullList + + # make a list of both the A and B values together + allList = aList + bList + allSingleList = singleAList + singleBList + + functionsToReturn = { } + + # make the original data plots + if ('do_plot_originals' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_originals']) : + + + functionsToReturn['original'] = ((lambda: figures.create_line_plot_figure(allList, + variableDisplayName + "\nin Both Files")), + variableDisplayName + " in both files", + "AB.png", original_fig_list) + functionsToReturn['originalA'] = ((lambda: figures.create_line_plot_figure(aList, + variableDisplayName + "\nin File A")), + variableDisplayName + " in file a", + "A.png", original_fig_list) + functionsToReturn['originalB'] = ((lambda: figures.create_line_plot_figure(bList, + variableDisplayName + "\nin File B")), + variableDisplayName + " in file b", + "B.png", original_fig_list) + + if len(allSingleList) < len(allList) : + functionsToReturn['originalS'] = ((lambda: figures.create_line_plot_figure(allSingleList, + variableDisplayName + "\nAvg. in Both Files")), + variableDisplayName + " avg. in both files", + "ABsingle.png", original_fig_list) + functionsToReturn['originalAS'] = ((lambda: figures.create_line_plot_figure(singleAList, + variableDisplayName + "\nAvg. in File A")), + variableDisplayName + " avg. in file a", + "Asingle.png", original_fig_list) + functionsToReturn['originalBS'] = ((lambda: figures.create_line_plot_figure(singleBList, + variableDisplayName + "\nAvg. in File B")), + variableDisplayName + " avg. in file b", + "Bsingle.png", original_fig_list) + + # make the absolute value difference plot + if ('do_plot_abs_diff' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_abs_diff']) : + functionsToReturn['diffAbs'] = ((lambda: figures.create_line_plot_figure(absDiffList, + "Absolute value of difference in\n" + variableDisplayName)), + "absolute value of difference in " + variableDisplayName, + "AbsDiff.png", compared_fig_list) + # make the subtractive difference plot + if ('do_plot_sub_diff' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_sub_diff']) : + functionsToReturn['diffSub'] = ((lambda: figures.create_line_plot_figure(subDiffList, + "Value of (Data File B - Data File A) for\n" + variableDisplayName)), + "the difference in " + variableDisplayName, + "Diff.png", compared_fig_list) + + # make the trouble data plot + if ('do_plot_trouble' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_trouble']) : + functionsToReturn['trouble'] = ((lambda: figures.create_line_plot_figure(troubleFullList, + "Areas of trouble data in\n" + variableDisplayName)), + "trouble data in " + variableDisplayName, + "Trouble.png", compared_fig_list) + if len(troubleSingleList) < len(troubleFullList) : + functionsToReturn['troubleAvg'] = ((lambda: figures.create_line_plot_figure(troubleSingleList, + "Avg. areas of trouble data in\n" + variableDisplayName)), + "avg. trouble data in " + variableDisplayName, + "TroubleAvg.png", compared_fig_list) + + return functionsToReturn + +""" +This class creates simple imshow plots of 2D data +""" +class IMShowPlotFunctionFactory (PlottingFunctionFactory) : + def create_plotting_functions ( + self, + + # the most basic data set needed + aData, bData, + variableDisplayName, + epsilon, + goodInAMask, goodInBMask, + doPlotSettingsDict, + + # where the names of the created figures will be stored + original_fig_list, compared_fig_list, + + # parameters that are only needed for geolocated data + lonLatDataDict=None, + + # only used if we are plotting a contour + dataRanges=None, dataRangeNames=None, dataColors=None, + shouldUseSharedRangeForOriginal=True, + + # parameters that are only used if the data can be compared + # point by point + absDiffData=None, rawDiffData=None, + goodInBothMask=None, + troubleMask=None, outsideEpsilonMask=None, + + # only used for plotting quiver data + aUData=None, aVData=None, + bUData=None, bVData=None, + + # only used for line plots + binIndex=None, tupleIndex=None + ) : + """ + This method generates imshow plotting functions for two dimensional data + and returns them in a dictionary of tupples, where each tupple is in the form: + + returnDictionary['descriptive name'] = (function, title, file_name, list_this_figure_should_go_into) + + The file name is only the name of the file, not the full path. + """ + + assert(aData is not None) + assert(bData is not None) + + functionsToReturn = { } + + # make the original data plots + if ('do_plot_originals' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_originals']) : + + assert(goodInAMask is not None) + assert(aData.shape == goodInAMask.shape) + + functionsToReturn['originalA'] = ((lambda: figures.create_simple_figure(aData, variableDisplayName + "\nin File A", + invalidMask=~goodInAMask)), + variableDisplayName + " in file a", + "A.png", original_fig_list) + + assert(goodInBMask is not None) + assert(bData.shape == goodInBMask.shape) + + functionsToReturn['originalB'] = ((lambda: figures.create_simple_figure(bData, variableDisplayName + "\nin File B", + invalidMask=~goodInBMask)), + variableDisplayName + " in file b", + "B.png", original_fig_list) + + # make the absolute value difference plot + if ('do_plot_abs_diff' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_abs_diff']) : + + assert(absDiffData is not None) + assert(goodInBothMask is not None) + assert(goodInBothMask.shape == absDiffData.shape) + + functionsToReturn['diffAbs'] = ((lambda: figures.create_simple_figure(absDiffData, + "Absolute value of difference in\n" + variableDisplayName, + invalidMask=~goodInBothMask)), + "absolute value of difference in " + variableDisplayName, + "AbsDiff.png", compared_fig_list) + # make the subtractive difference plot + if ('do_plot_sub_diff' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_sub_diff']) : + + assert(rawDiffData is not None) + assert(goodInBothMask is not None) + assert(goodInBothMask.shape == rawDiffData.shape) + + functionsToReturn['diffSub'] = ((lambda: figures.create_simple_figure(rawDiffData, + "Value of (Data File B - Data File A) for\n" + variableDisplayName, + invalidMask=~goodInBothMask)), + "the difference in " + variableDisplayName, + "Diff.png", compared_fig_list) + # make the trouble data plot + if ('do_plot_trouble' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_trouble']) : + + assert(goodInAMask is not None) + assert(goodInBMask is not None) + assert(goodInAMask.shape == goodInBMask.shape) + assert(aData.shape == bData.shape) + assert(aData.shape == goodInAMask.shape) + assert(troubleMask is not None) + assert(troubleMask.shape == aData.shape) + + + # this is not an optimal solution, but we need to have at least somewhat valid data at any mismatched points so + # that our plot won't be totally destroyed by missing or non-finite data from B + bDataCopy = bData[:] + tempMask = goodInAMask & (~goodInBMask) + bDataCopy[tempMask] = aData[tempMask] + functionsToReturn['trouble'] = ((lambda: figures.create_simple_figure(bDataCopy, "Areas of trouble data in\n" + variableDisplayName, + invalidMask=~(goodInAMask | goodInBMask), tagData=troubleMask, + colorMap=mediumGrayColorMap)), + "trouble data in " + variableDisplayName, + "Trouble.png", compared_fig_list) + + return functionsToReturn + + +if __name__=='__main__': + import doctest + doctest.testmod()