diff --git a/pyglance/glance/compare.py b/pyglance/glance/compare.py index 66864289e331220697b9e27fb3484d693a29448f..7ff94c944dc584ab36b08bd9084c5a7c0f374438 100644 --- a/pyglance/glance/compare.py +++ b/pyglance/glance/compare.py @@ -1161,28 +1161,35 @@ def reportGen_library_call (a_path, b_path, var_list=[ ], plotFunctionGenerationObjects = [ ] - # if the data is the same size, we can always make our basic statistical comparison plots - if (aData.shape == bData.shape) : - plotFunctionGenerationObjects.append(plotcreate.BasicComparisonPlotsFunctionFactory()) + # if the bin and tuple are defined, try to analyze the data as complex + # multidimentional information requiring careful sampling + if ('binIndex' in varRunInfo) and ('tupleIndex' in varRunInfo) : + plotFunctionGenerationObjects.append(plotcreate.BinTupleAnalysisFunctionFactory()) - # if it's vector data with longitude and latitude, quiver plot it on the Earth - if isVectorData and (not do_not_test_with_lon_lat) : - plotFunctionGenerationObjects.append(plotcreate.MappedQuiverPlotFunctionFactory()) - - # if the data is one dimensional we can plot it as lines - elif (len(aData.shape) is 1) : - plotFunctionGenerationObjects.append(plotcreate.LinePlotsFunctionFactory()) + else : - # if the data is 2D we have some options based on the type of data - elif (len(aData.shape) is 2) : + # if the data is the same size, we can always make our basic statistical comparison plots + if (aData.shape == bData.shape) : + plotFunctionGenerationObjects.append(plotcreate.BasicComparisonPlotsFunctionFactory()) + + # if it's vector data with longitude and latitude, quiver plot it on the Earth + if isVectorData and (not do_not_test_with_lon_lat) : + plotFunctionGenerationObjects.append(plotcreate.MappedQuiverPlotFunctionFactory()) - # if the data is not mapped to a longitude and latitude, just show it as an image - if (do_not_test_with_lon_lat) : - plotFunctionGenerationObjects.append(plotcreate.IMShowPlotFunctionFactory()) + # if the data is one dimensional we can plot it as lines + elif (len(aData.shape) is 1) : + plotFunctionGenerationObjects.append(plotcreate.LinePlotsFunctionFactory()) - # if it's 2D and mapped to the Earth, contour plot it on the earth - else : - plotFunctionGenerationObjects.append(plotcreate.MappedContourPlotFunctionFactory()) + # if the data is 2D we have some options based on the type of data + elif (len(aData.shape) is 2) : + + # if the data is not mapped to a longitude and latitude, just show it as an image + if (do_not_test_with_lon_lat) : + plotFunctionGenerationObjects.append(plotcreate.IMShowPlotFunctionFactory()) + + # if it's 2D and mapped to the Earth, contour plot it on the earth + else : + plotFunctionGenerationObjects.append(plotcreate.MappedContourPlotFunctionFactory()) # if there's magnitude and direction data, figure out the u and v, otherwise these will be None aUData, aVData = _get_UV_info_from_magnitude_direction_info (aFile, @@ -1216,7 +1223,11 @@ def reportGen_library_call (a_path, b_path, var_list=[ ], shouldUseSharedRangeForOriginal=runInfo['useSharedRangeForOriginal'], doPlotSettingsDict = varRunInfo, aUData=aUData, aVData=aVData, - bUData=bUData, bVData=bVData,) + bUData=bUData, bVData=bVData, + binIndex= varRunInfo['binIndex'] if 'binIndex' in varRunInfo else None, + tupleIndex=varRunInfo['tupleIndex'] if 'tupleIndex' in varRunInfo else None, + binName= varRunInfo['binName'] if 'binName' in varRunInfo else 'bin', + tupleName= varRunInfo['tupleName'] if 'tupleName' in varRunInfo else 'tuple') print("\tfinished creating figures for: " + explanationName) diff --git a/pyglance/glance/delta.py b/pyglance/glance/delta.py index 38e5e2c26ff9f9b115a1686304bd3563e4abc260..4bf2cea2d46286b7f925b4a0e7442e4b81fbe068 100644 --- a/pyglance/glance/delta.py +++ b/pyglance/glance/delta.py @@ -188,131 +188,88 @@ def convert_mag_dir_to_U_V_vector(magnitude_data, direction_data, invalidMask=No if invalidMask is None : invalidMask = zeros(magnitude_data.shape, dtype=bool) + new_direction_data = direction_data[:] + 180 + + print ("direction data: " + str(new_direction_data[~invalidMask])) + uData = zeros(magnitude_data.shape, dtype=float) uData[invalidMask] = nan - uData[~invalidMask] = magnitude_data[~invalidMask] * cos (direction_data[~invalidMask]) + uData[~invalidMask] = magnitude_data[~invalidMask] * np.sin (deg2rad(new_direction_data[~invalidMask])) vData = zeros(magnitude_data.shape, dtype=float) vData[invalidMask] = nan - vData[~invalidMask] = magnitude_data[~invalidMask] * sin (direction_data[~invalidMask]) + vData[~invalidMask] = magnitude_data[~invalidMask] * np.cos (deg2rad(new_direction_data[~invalidMask])) return uData, vData -def calculate_root_mean_square (data, goodMask=None) : +# a method to make a list of index numbers for reordering a multi-dimensional array +def _make_new_index_list(numberOfIndexes, firstIndexNumber=0, lastIndexNumber=None) : """ - calculate the root mean square of the data, - possibly selecting only the points in the given - goodMask, if no mask is given, all points will - be used + the first and last index numbers represent the dimensions you want to be first and last (respectively) + when the list is reordered; any other indexes will retain their relative ordering + + newIndexList = _make_new_index_list(numIndexes, binIndex, tupleIndex) """ - if goodMask is None: - goodMask = np.ones(data.shape, dtype=bool) - rootMeanSquare = sqrt( sum( data[goodMask] ** 2 ) / sum( goodMask ) ) + if lastIndexNumber is None: + lastIndexNumber = numberOfIndexes - 1 - return rootMeanSquare + newIndexList = range(numberOfIndexes) + maxSpecial = max(firstIndexNumber, lastIndexNumber) + minSpecial = min(firstIndexNumber, lastIndexNumber) + del(newIndexList[maxSpecial]) + del(newIndexList[minSpecial]) + newIndexList = [firstIndexNumber] + newIndexList + [lastIndexNumber] + + return newIndexList -def organize_dimensions_and_produce_rms_info(dataA, dataB, binDimensionIndexNum, tupleDimensionIndexNum, - epsilon=0., (a_missing_value, b_missing_value)=(None, None), - (ignore_mask_a, ignore_mask_b)=(None, None)) : +def reorder_for_bin_tuple (data, binIndexNumber, tupleIndexNumber) : """ - reorganize the dimensions in the given data sets in order to put the bin dimension first - and the tuple dimension last - collapse all other dimensions into a single "case" dimension, but preserve information on their - previous shape so that the coresponding index of a given case can be recovered - - once the data is reshaped to the desired ordering with the appopriate set of cases, diff the - data and calculate the rms on a per case basis + reorder the data given so that the bin index is first, the tuple index is last, + and any additional dimensions are flattened into a middle "case" index - return reshaped original and diffrence data sets, masks as corresponding to the output of diff and - matching the shape of the reshaped data, and information on the shape of the collapsed case dimensions - - TODO, this method has not yet been tested or integrated with the rest of glance + the reordered data and the shape of flattened case indexes will be returned + (note if the original data was only 2 dimensional, None will be returned for the + shape of the flattened case indexes, since there were no other dimensions to flatten) """ - # make sure we meet the minimal requirement for compatable shapes/index sizes - assert(dataA.shape is dataB.shape) - assert(binDimensionIndexNum < len(dataA.shape)) - assert(tupleDimensionIndexNum < len(dataA.shape)) - assert(tupleDimensionIndexNum is not binDimensionIndexNum) - - # figure out the order to put the index we want first - new_index_order = range(len(dataA.shape)) - smaller = tupleDimensionIndexNum - larger = binDimensionIndexNum - if (tupleDimensionIndexNum > binDimensionIndexNum) : - larger = tupleDimensionIndexNum - smaller = binDimensionIndexNum - del(new_index_order[larger]) - del(new_index_order[smaller]) - new_index_order = [binDimensionIndexNum] + new_index_order + [tupleDimensionIndexNum] - - # copy our data, then put the indexes into the new order - new_a_data = dataA.copy() - new_a_data = new_a_data.transpose(new_index_order) - new_b_data = dataB.copy() - new_b_data = new_b_data.transpose(new_index_order) - # if our masks need to be reordered, do that - new_a_missing_mask = a_missing_mask - if new_a_missing_mask is not None : - new_a_missing_mask = new_a_missing_mask.copy() - new_a_missing_mask = new_a_missing_mask.transpose(new_index_order) - new_b_missing_mask = b_missing_mask - if new_b_missing_mask is not None : - new_b_missing_mask = new_b_missing_mask.copy() - new_b_missing_mask = new_b_missing_mask.transpose(new_index_order) - new_a_ignore_mask = ignore_mask_a - if new_a_ignore_mask is not None : - new_a_ignore_mask = new_a_ignore_mask.copy() - new_a_ignore_mask = new_a_ignore_mask.transpose(new_index_order) - new_b_ignore_mask = ignore_mask_b - if new_b_ignore_mask is not None : - new_b_ignore_mask = new_b_ignore_mask.copy() - new_b_ignore_mask = new_b_ignore_mask.transpose(new_index_order) + # put the bin and tuple dimensions in the correct places + newIndexList = _make_new_index_list(len(data.shape), binIndexNumber, tupleIndexNumber) + newData = data.transpose(newIndexList) # get the shape information on the internal dimensions we're going to combine - case_dimension_original_shape = new_b_data.shape[1:-1] + caseOriginalShape = newData.shape[1:-1] # combine the internal dimensions, to figure out what shape things # will be with the flattened cases - numDimensionsToFlatten = len(case_dimension_original_shape) - sizeAfterFlattened = np.multiply.accumulate(case_dimension_original_shape)[-1] - newShape = (new_a_data.shape[0], sizeAfterFlattened, new_a_data.shape[-1]) + numDimensionsToFlatten = len(caseOriginalShape) + sizeAfterFlattened = np.multiply.accumulate(caseOriginalShape)[-1] + newShape = (newData.shape[0], sizeAfterFlattened, newData.shape[-1]) # flatten the case dimensions - new_a_data = new_a_data.reshape(newShape) - new_b_data = new_b_data.reshape(newShape) - if new_a_missing_mask is not None : - new_a_missing_mask = new_a_missing_mask.reshape(newShape) - if new_b_missing_mask is not None : - new_b_missing_mask = new_b_missing_mask.reshape(newShape) - if new_a_ignore_mask is not None : - new_a_ignore_mask = new_a_ignore_mask.reshape(newShape) - if new_b_ignore_mask is not None : - new_b_ignore_mask = new_b_ignore_mask.reshape(newShape) - - # diff our data - rawDiffData, goodInBothMask, (goodInAMask, goodInBMask), troubleMask, outsideEpsilonMask, \ - (aNotFiniteMask, bNotFiniteMask), (aMissingMask, bMissingMask), (finalAIgnoreMask, finalBIgnoreMask) = diffOutput = \ - diff(new_a_data, new_b_data, epsilon, (new_a_missing_mask, new_b_missing_mask), (new_a_ignore_mask, new_b_ignore_mask)) - - # calculate the rms diffs for all the cases - caseRMSInfo = np.zeros(rawDiffData.shape[:-1], dtype=np.float64) - # TODO, how to do this without a loop? - for caseNum in range(sizeAfterFlattened) : - caseRMSInfo[caseNum] = calculate_root_mean_square(rawDiffData[caseNum], goodMask=goodInBothMask[caseNum]) - - # return the reshaped original data, - # information on the original shape of the case dimension that was flattened, - # the rms diff info, - # and then the full set of info that came out of the diff between the reshaped a and b data - return (new_a_data, new_b_data), case_dimension_original_shape, caseRMSInfo, diffOutput - """ - diffOutput is in the form: + newData = newData.reshape(newShape) - rawDiffData, goodInBothMask, (goodInAMask, goodInBMask), troubleMask, outsideEpsilonMask, \ - (aNotFiniteMask, bNotFiniteMask), (aMissingMask, bMissingMask), (finalAIgnoreMask, finalBIgnoreMask) + # TODO, remove once this is tested + #print ('original data shape: ' + str(data.shape)) + #print ('original case shape: ' + str(caseOriginalShape)) + #print ('new data shape: ' + str(newData.shape)) + + return newData, caseOriginalShape + + +def calculate_root_mean_square (data, goodMask=None) : """ + calculate the root mean square of the data, + possibly selecting only the points in the given + goodMask, if no mask is given, all points will + be used + """ + if goodMask is None: + goodMask = np.ones(data.shape, dtype=bool) + + rootMeanSquare = sqrt( sum( data[goodMask] ** 2 ) / sum( goodMask ) ) + + return rootMeanSquare def _get_num_perfect(a, b, ignoreMask=None): numPerfect = 0 diff --git a/pyglance/glance/figures.py b/pyglance/glance/figures.py index 896cb1fd9c8144fc3684290fb283e1d074382907..f9d129c4da2eb7bf1c793ec3879572961038d30b 100644 --- a/pyglance/glance/figures.py +++ b/pyglance/glance/figures.py @@ -528,7 +528,7 @@ def create_line_plot_figure(dataList, figureTitle) : if (len(dataList) > 1) or plottedTagData : # make a key to explain our plot # as long as things have been plotted with proper labels they should show up here - #axes.legend(loc=0, markerscale=3.0) # Note: at the moment markerscale doesn't seem to work # TODO, turn back on? + axes.legend(loc=0, markerscale=3.0) # Note: at the moment markerscale doesn't seem to work pass # and some informational stuff diff --git a/pyglance/glance/io.py b/pyglance/glance/io.py index cdc1c6ece2d3734708dc6a9da7a63f3da16aa8dd..a8bb79526e4f1f7b56872a0e55bb0794b9f87979 100644 --- a/pyglance/glance/io.py +++ b/pyglance/glance/io.py @@ -116,7 +116,28 @@ class hdf(SD): SDS.endaccess(variable_object) return to_return + + def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None): + """ + create a new variable with the given name + optionally set the missing value (fill value) and data to those given + + the created variable will be returned, or None if a variable could not + be created + """ + # TODO + + return None + + def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) : + """ + if the attribute exists for the given variable, set it to the new value + if the attribute does not exist for the given variable, create it and set it to the new value + """ + # TODO + + return class nc(CDF): """wrapper for NetCDF3/4/opendap dataset for comparison @@ -190,7 +211,6 @@ class nc(CDF): return to_return - # TODO, this method only exists for nc files at the moment, make the others at some point def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None): """ create a new variable with the given name @@ -371,7 +391,28 @@ class h5(object): toReturn = temp return toReturn + + def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None): + """ + create a new variable with the given name + optionally set the missing value (fill value) and data to those given + + the created variable will be returned, or None if a variable could not + be created + """ + # TODO + + return None + + def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) : + """ + if the attribute exists for the given variable, set it to the new value + if the attribute does not exist for the given variable, create it and set it to the new value + """ + # TODO + + return def open(pathname, allowWrite=False): cls = globals()[os.path.splitext(pathname)[1][1:]] diff --git a/pyglance/glance/plot.py b/pyglance/glance/plot.py index 1acbc3fda43c90cee39426273ad7146399c8d018..08eac02ab5b2d1d423a0d0af79f766406e165423 100644 --- a/pyglance/glance/plot.py +++ b/pyglance/glance/plot.py @@ -161,7 +161,8 @@ def plot_and_save_comparison_figures (aData, bData, doPlotSettingsDict={ }, aUData=None, aVData=None, bUData=None, bVData=None, - binIndex=None, tupleIndex=None) : + binIndex=None, tupleIndex=None, + binName='bin', tupleName='tuple') : """ Plot images for a set of figures based on the data sets and settings passed in. The images will be saved to disk according to the settings. @@ -282,8 +283,9 @@ def plot_and_save_comparison_figures (aData, bData, aUData=aUData, aVData=aVData, bUData=bUData, bVData=bVData, - # only used for line plots TODO - binIndex=0, tupleIndex=1 # TODO, temporary binIndex=None, tupleIndex=None + # only used for line plots + binIndex=binIndex, tupleIndex=tupleIndex, + binName=binName, tupleName=tupleName ) plottingFunctions.update(moreFunctions) diff --git a/pyglance/glance/plotcreatefns.py b/pyglance/glance/plotcreatefns.py index fa6996efb2b46278a95904fa3ad2ce6ead0ddb72..6d3f6ffda23cc97e1855a77ace3e32f8849624c8 100644 --- a/pyglance/glance/plotcreatefns.py +++ b/pyglance/glance/plotcreatefns.py @@ -22,6 +22,7 @@ from matplotlib.ticker import FormatStrFormatter from PIL import Image import os, sys, logging +import random as random import numpy as np from numpy import ma @@ -144,29 +145,6 @@ def _make_axis_and_basemap(lonLatDataDict, goodInAMask, goodInBMask, shouldUseSh return fullAxis, baseMapInstance, sharedRange -# a method to make a list of index numbers for reordering a multi-dimensional array -def _make_new_index_list(numberOfIndexes, firstIndexNumber=0, lastIndexNumber=None) : - """ - the first and last index numbers represent the dimensions you want to be first and last (respectively) - when the list is reordered; any other indexes will retain their relative ordering - - newIndexList = _make_new_index_list(numIndexes, binIndex, tupleIndex) - - TODO, move this to delta? - """ - - if lastIndexNumber is None: - lastIndexNumber = numberOfIndexes - 1 - - newIndexList = range(numberOfIndexes) - maxSpecial = max(firstIndexNumber, lastIndexNumber) - minSpecial = min(firstIndexNumber, lastIndexNumber) - del(newIndexList[maxSpecial]) - del(newIndexList[minSpecial]) - newIndexList = [firstIndexNumber] + newIndexList + [lastIndexNumber] - - return newIndexList - # ********************* Section of public classes *********************** """ @@ -208,7 +186,8 @@ class PlottingFunctionFactory : bUData=None, bVData=None, # only used for line plots - binIndex=None, tupleIndex=None + binIndex=None, tupleIndex=None, + binName=None, tupleName=None ) : _abstract @@ -248,7 +227,8 @@ class BasicComparisonPlotsFunctionFactory (PlottingFunctionFactory) : bUData=None, bVData=None, # only used for line plots - binIndex=None, tupleIndex=None + binIndex=None, tupleIndex=None, + binName=None, tupleName=None ) : functionsToReturn = { } @@ -320,7 +300,8 @@ class MappedContourPlotFunctionFactory (PlottingFunctionFactory) : bUData=None, bVData=None, # only used for line plots - binIndex=None, tupleIndex=None + binIndex=None, tupleIndex=None, + binName=None, tupleName=None ) : # the default for plotting geolocated data @@ -484,7 +465,8 @@ class MappedQuiverPlotFunctionFactory (PlottingFunctionFactory) : bUData=None, bVData=None, # only used for line plots - binIndex=None, tupleIndex=None + binIndex=None, tupleIndex=None, + binName=None, tupleName=None ) : # the default for plotting geolocated data @@ -655,7 +637,8 @@ class LinePlotsFunctionFactory (PlottingFunctionFactory) : bUData=None, bVData=None, # only used for line plots - binIndex=None, tupleIndex=None + binIndex=None, tupleIndex=None, + binName=None, tupleName=None ) : """ This method generates line plotting functions for one dimensional data @@ -667,98 +650,17 @@ class LinePlotsFunctionFactory (PlottingFunctionFactory) : """ assert(aData is not None) assert(bData is not None) - # TODO, assert about more of the data - - if len(aData.shape) > 1 : - assert(binIndex is not None) - assert(tupleIndex is not None) - assert(binIndex is not tupleIndex) - - numIndexes = len(aData.shape) - assert(binIndex < numIndexes) - assert(tupleIndex < numIndexes) - - # TODO, the rest of this function is not totally refactored but should be functional for now - - # TODO, temporary - aList = [ ] - bList = [ ] - singleAList = [ ] - singleBList = [ ] - absDiffList = [ ] - subDiffList = [ ] - troubleSingleList = [ ] - troubleFullList = [ ] - if len(aData.shape) > 1 : - - newIndexList = _make_new_index_list(numIndexes, binIndex, tupleIndex) - aData = aData.transpose(newIndexList) - bData = bData.transpose(newIndexList) - goodInAMask = goodInAMask.transpose(newIndexList) - goodInBMask = goodInBMask.transpose(newIndexList) - absDiffData = absDiffData.transpose(newIndexList) - rawDiffData = rawDiffData.transpose(newIndexList) - goodInBothMask = goodInBothMask.transpose(newIndexList) - troubleMask = troubleMask.transpose(newIndexList) - outsideEpsilonMask = outsideEpsilonMask.transpose(newIndexList) - - for firstIndexValue in range(aData.shape[0]) : - # get the a and b data with it's masks - aSlice = filters.collapse_to_index(aData[firstIndexValue], (numIndexes - 1), missing_value=-999.0, ignore_below_exclusive=-990.0) - bSlice = filters.collapse_to_index(bData[firstIndexValue], (numIndexes - 1), missing_value=-999.0, ignore_below_exclusive=-990.0) - goodInAMaskSlice = filters.collapse_to_index(goodInAMask[firstIndexValue], (numIndexes - 1), collapsing_function=(lambda data: any(data))) - goodInBMaskSlice = filters.collapse_to_index(goodInBMask[firstIndexValue], (numIndexes - 1), collapsing_function=(lambda data: any(data))) - - # add to the plain a and b lists - aList.append((aSlice, ~goodInAMaskSlice, 'r', 'A data, ' + str(firstIndexValue), None)) - bList.append((bSlice, ~goodInBMaskSlice, 'b', 'B data, ' + str(firstIndexValue), None)) - - # deal with the trouble data and it's list - troubleMaskSlice = filters.collapse_to_index(troubleMask[firstIndexValue], (numIndexes - 1), collapsing_function=(lambda data: any(data))) - troubleFullList.append((aSlice, ~goodInAMaskSlice, 'r', 'A data, ' + str(firstIndexValue), troubleMaskSlice)) - troubleFullList.append((bSlice, ~goodInBMaskSlice, 'b', 'B data, ' + str(firstIndexValue), troubleMaskSlice)) - - # get the diff data with it's masks - absDiffDataSlice = filters.collapse_to_index(absDiffData[firstIndexValue], (numIndexes - 1), missing_value=-999.0, ignore_below_exclusive=-990.0) - rawDiffDataSlice = filters.collapse_to_index(rawDiffData[firstIndexValue], (numIndexes - 1), missing_value=-999.0, ignore_below_exclusive=-990.0) - goodMaskSlice = filters.collapse_to_index(goodInBothMask[firstIndexValue], (numIndexes - 1), collapsing_function=(lambda data: any(data))) - - # add to the diff data lists - absDiffList.append((absDiffDataSlice, ~goodMaskSlice, '', 'abs. diff. data, ' + str(firstIndexValue), None)) - subDiffList.append((rawDiffDataSlice, ~goodMaskSlice, '', 'sub. diff. data, ' + str(firstIndexValue), None)) - - # also collapse the masks - troubleMask = filters.collapse_to_index(troubleMask, numIndexes, collapsing_function=(lambda data: any(data))) - #outsideEpsilonMask = filters.collapse_to_index(outsideEpsilonMask, numIndexes, collapsing_function=(lambda data: any(data))) - #goodInBothMask = filters.collapse_to_index(goodInBothMask, numIndexes, collapsing_function=(lambda data: any(data))) - - # make some averages - aDataAvg = filters.collapse_to_index(aData, numIndexes, missing_value=-999.0, ignore_below_exclusive=-990.0) - bDataAvg = filters.collapse_to_index(bData, numIndexes, missing_value=-999.0, ignore_below_exclusive=-990.0) - goodInAMaskAvg = filters.collapse_to_index(goodInAMask, numIndexes, collapsing_function=(lambda data: any(data))) - goodInBMaskAvg = filters.collapse_to_index(goodInBMask, numIndexes, collapsing_function=(lambda data: any(data))) - - # make our averaged sets - singleAList = [(aDataAvg, ~goodInAMaskAvg, 'r', 'A data', None)] - singleBList = [(bDataAvg, ~goodInBMaskAvg, 'b', 'B data', None)] - troubleSingleList = [(aDataAvg, ~goodInAMaskAvg, 'r', 'A data', troubleMask), - (bDataAvg, ~goodInBMaskAvg, 'b', 'B data', troubleMask)] - else : - aList = [(aData, ~goodInAMask, 'r', 'A data', None)] - bList = [(bData, ~goodInBMask, 'b', 'B data', None)] - absDiffList = [(absDiffData, ~goodInBothMask, '', 'abs. diff. data', None)] - subDiffList = [(rawDiffData, ~goodInBothMask, '', 'sub. diff. data', None)] - - singleAList = aList - singleBList = bList - - troubleFullList = [(aData, ~goodInAMask, 'r', 'A data', troubleMask), - (bData, ~goodInBMask, 'b', 'B data', troubleMask)] - troubleSingleList = troubleFullList - - # make a list of both the A and B values together - allList = aList + bList - allSingleList = singleAList + singleBList + assert(len(aData.shape) is 1) + assert(aData.shape == bData.shape) + + # make all our data sets for plotting ahead of time for simplicity + aList = [(aData, ~goodInAMask, 'r', 'A data', None)] + bList = [(bData, ~goodInBMask, 'b', 'B data', None)] + absDiffList = [(absDiffData, ~goodInBothMask, '', 'abs. diff. data', None)] + subDiffList = [(rawDiffData, ~goodInBothMask, '', 'sub. diff. data', None)] + + troubleList = [(aData, ~goodInAMask, 'r', 'A data', troubleMask), + (bData, ~goodInBMask, 'b', 'B data', troubleMask)] functionsToReturn = { } @@ -766,7 +668,7 @@ class LinePlotsFunctionFactory (PlottingFunctionFactory) : if ('do_plot_originals' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_originals']) : - functionsToReturn['original'] = ((lambda: figures.create_line_plot_figure(allList, + functionsToReturn['original'] = ((lambda: figures.create_line_plot_figure((aList + bList), variableDisplayName + "\nin Both Files")), variableDisplayName + " in both files", "AB.png", original_fig_list) @@ -778,20 +680,6 @@ class LinePlotsFunctionFactory (PlottingFunctionFactory) : variableDisplayName + "\nin File B")), variableDisplayName + " in file b", "B.png", original_fig_list) - - if len(allSingleList) < len(allList) : - functionsToReturn['originalS'] = ((lambda: figures.create_line_plot_figure(allSingleList, - variableDisplayName + "\nAvg. in Both Files")), - variableDisplayName + " avg. in both files", - "ABsingle.png", original_fig_list) - functionsToReturn['originalAS'] = ((lambda: figures.create_line_plot_figure(singleAList, - variableDisplayName + "\nAvg. in File A")), - variableDisplayName + " avg. in file a", - "Asingle.png", original_fig_list) - functionsToReturn['originalBS'] = ((lambda: figures.create_line_plot_figure(singleBList, - variableDisplayName + "\nAvg. in File B")), - variableDisplayName + " avg. in file b", - "Bsingle.png", original_fig_list) # make the absolute value difference plot if ('do_plot_abs_diff' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_abs_diff']) : @@ -808,15 +696,164 @@ class LinePlotsFunctionFactory (PlottingFunctionFactory) : # make the trouble data plot if ('do_plot_trouble' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_trouble']) : - functionsToReturn['trouble'] = ((lambda: figures.create_line_plot_figure(troubleFullList, + functionsToReturn['trouble'] = ((lambda: figures.create_line_plot_figure(troubleList, "Areas of trouble data in\n" + variableDisplayName)), "trouble data in " + variableDisplayName, "Trouble.png", compared_fig_list) - if len(troubleSingleList) < len(troubleFullList) : - functionsToReturn['troubleAvg'] = ((lambda: figures.create_line_plot_figure(troubleSingleList, - "Avg. areas of trouble data in\n" + variableDisplayName)), - "avg. trouble data in " + variableDisplayName, - "TroubleAvg.png", compared_fig_list) + + return functionsToReturn + +class BinTupleAnalysisFunctionFactory (PlottingFunctionFactory) : + def create_plotting_functions ( + self, + + # the most basic data set needed + aData, bData, + variableDisplayName, + epsilon, + goodInAMask, goodInBMask, + doPlotSettingsDict, + + # where the names of the created figures will be stored + original_fig_list, compared_fig_list, + + # parameters that are only needed for geolocated data + lonLatDataDict=None, + + # only used if we are plotting a contour + dataRanges=None, dataRangeNames=None, dataColors=None, + shouldUseSharedRangeForOriginal=True, + + # parameters that are only used if the data can be compared + # point by point + absDiffData=None, rawDiffData=None, + goodInBothMask=None, + troubleMask=None, outsideEpsilonMask=None, + + # only used for plotting quiver data + aUData=None, aVData=None, + bUData=None, bVData=None, + + # only used for line plots + binIndex=None, tupleIndex=None, + binName=None, tupleName=None + ) : + """ + This method generates histogram and sample line plot functions for complex three dimensional data + and returns them in a dictionary of tupples, where each tupple is in the form: + + returnDictionary['descriptive name'] = (function, title, file_name, list_this_figure_should_go_into) + + The file name is only the name of the file, not the full path. + """ + + # confirm that our a and b data are minimally ok + assert(aData is not None) + assert(bData is not None) + assert(len(aData.shape) >= 2) + assert(aData.shape == bData.shape) + + # confirm that our bin and tuple indexes are valid + assert(binIndex is not None) + assert(tupleIndex is not None) + assert(binIndex is not tupleIndex) + assert(binIndex < len(aData.shape)) + assert(tupleIndex < len(aData.shape)) + + # reorder and reshape our data into the [bin][case][tuple] form + aData, caseInfo1 = delta.reorder_for_bin_tuple(aData, binIndex, tupleIndex) + bData, caseInfo2 = delta.reorder_for_bin_tuple(bData, binIndex, tupleIndex) + goodInAMask, caseInfo3 = delta.reorder_for_bin_tuple(goodInAMask, binIndex, tupleIndex) + goodInBMask, caseInfo4 = delta.reorder_for_bin_tuple(goodInBMask, binIndex, tupleIndex) + absDiffData, caseInfo5 = delta.reorder_for_bin_tuple(absDiffData, binIndex, tupleIndex) + rawDiffData, caseInfo6 = delta.reorder_for_bin_tuple(rawDiffData, binIndex, tupleIndex) + goodInBothMask, caseInfo7 = delta.reorder_for_bin_tuple(goodInBothMask, binIndex, tupleIndex) + troubleMask, caseInfo8 = delta.reorder_for_bin_tuple(troubleMask, binIndex, tupleIndex) + outsideEpsilonMask, caseInfo9 = delta.reorder_for_bin_tuple(outsideEpsilonMask, binIndex, tupleIndex) + + # assert that all our case dimensions were originally the same + # TODO, should I have just compared the shape of all the data before modifying it? + assert(caseInfo1 == caseInfo2) + assert(caseInfo2 == caseInfo3) + assert(caseInfo3 == caseInfo4) + assert(caseInfo4 == caseInfo5) + assert(caseInfo5 == caseInfo6) + assert(caseInfo6 == caseInfo7) + assert(caseInfo7 == caseInfo8) + assert(caseInfo8 == caseInfo9) + + # our list of functions that will later create the plots + functionsToReturn = { } + + # for each of the bins, make the rms histogram data + numHistogramSections = 7 # TODO at some point make this controlable via the doPlotSettingsDict + for binNumber in range(rawDiffData.shape[0]) : + + # figure out all the rms diff values for the various cases + rmsDiffValues = np.zeros(rawDiffData.shape[1]) + for caseNumber in range(rawDiffData.shape[1]) : + rmsDiffValues[caseNumber] = delta.calculate_root_mean_square(rawDiffData[binNumber][caseNumber], + goodInBothMask[binNumber][caseNumber]) + + # make the basic histogram for this binNumber + dataForHistogram = rmsDiffValues[isfinite(rmsDiffValues)] # remove any invalid data "nan" values + if ('do_plot_histogram' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_histogram']) : + def make_histogram(binNumber=binNumber, dataForHistogram=dataForHistogram): + return figures.create_histogram(dataForHistogram, numHistogramSections, + ("RMS Diff. in " + variableDisplayName + + "\nfor " + binName + " # " + str(binNumber + 1)), + ('RMS Difference across ' + tupleName + ' dimension'), + ('Number of Cases with a Given RMS Diff.'), + True) + functionsToReturn[str(binNumber + 1) + 'histogram'] = (make_histogram, + "histogram of rms differences in " + variableDisplayName, + str(binNumber + 1) + "Hist.png", compared_fig_list) + + # we will need to be able to mask out the non-finite data + tempFiniteMap = np.isfinite(rmsDiffValues) + + # figure out the min/max rms diff values + minRMSDiff = min(rmsDiffValues[tempFiniteMap]) + maxRMSDiff = max(rmsDiffValues[tempFiniteMap]) + + # sort the cases by their rms diff values + counts = np.zeros(numHistogramSections) + histogramSections = { } + histogramSectionLimits = np.linspace(minRMSDiff, maxRMSDiff, numHistogramSections + 1) + histogramSectionLimits[0] = histogramSectionLimits[0] - 0.00000001 + print ('*** section limits *** : ' + str(histogramSectionLimits)) + for caseNumber in range(rmsDiffValues.size) : + + # check each of the sections to see which one it falls in + for limitIndex in range(histogramSectionLimits.size - 1) : + + # if it falls in this section, add it's case number index to the list for this section + if ( (rmsDiffValues[caseNumber] > histogramSectionLimits[limitIndex]) and + (rmsDiffValues[caseNumber] <= histogramSectionLimits[limitIndex + 1]) ) : + + if limitIndex not in histogramSections : + histogramSections[limitIndex] = [ ] + + histogramSections[limitIndex].append(caseNumber) + + # select example cases for the histogram + random.seed('test') # TODO, seed with something else? + for section in sorted(histogramSections.keys()) : + listOfCases = histogramSections[section] + caseNumber = listOfCases[random.randint(0, len(listOfCases) - 1)] + + # TODO, make lineplot functions for the example cases + dataList = [(aData[binNumber][caseNumber], ~goodInAMask[binNumber][caseNumber], 'r', 'A case', None), + (bData[binNumber][caseNumber], ~goodInBMask[binNumber][caseNumber], 'b', 'B case', None)] + def make_lineplot(data=dataList, binNumber=binNumber, caseNumber=caseNumber): + return figures.create_line_plot_figure(data, + variableDisplayName + " in both files" + "\n" + "for " + + binName + " # " + str(binNumber + 1) + " and case # " + + str(caseNumber)) + functionsToReturn[str(binNumber + 1) + 'sample' + str(section + 1) + '.AB.png'] = (make_lineplot, + variableDisplayName + " sample in both files", + str(binNumber + 1) + 'sample' + str(section + 1) + '.AB.png', + compared_fig_list) return functionsToReturn @@ -855,7 +892,8 @@ class IMShowPlotFunctionFactory (PlottingFunctionFactory) : bUData=None, bVData=None, # only used for line plots - binIndex=None, tupleIndex=None + binIndex=None, tupleIndex=None, + binName=None, tupleName=None ) : """ This method generates imshow plotting functions for two dimensional data