diff --git a/pyglance/glance/compare.py b/pyglance/glance/compare.py index 79ac97b5a40d2cd14d35bf2c87272721bf6a4361..7dd0703b12e391431eea06449a92427d7352ca70 100644 --- a/pyglance/glance/compare.py +++ b/pyglance/glance/compare.py @@ -1449,26 +1449,42 @@ python -m glance.compare inspectStats A.hdf def help(command=None): """print help for a specific command or list of commands e.g. help stats - """ + """ # TODO, need to double check that this still works with the lowercase names? + + print_all_summary = False if command is None: + print_all_summary = True + else: + if command in commands : + print commands[command].__doc__ + else : + print_all_summary = True + + # print out a list of summaries for each command + if print_all_summary : # print first line of docstring for cmd in commands: ds = commands[cmd].__doc__.split('\n')[0] print "%-16s %s" % (cmd,ds) - else: - print commands[command].__doc__ - + # all the local public functions are considered part of glance, collect them up - commands.update(dict(x for x in locals().items() if x[0] not in prior)) - + commands.update(dict(x for x in locals().items() if x[0] not in prior)) + + # lowercase locals + # Future: this is an awkward use and could be made more elegant + lower_locals = { } + for command_key in commands.keys() : + lower_locals[command_key.lower()] = locals()[command_key] + # if what the user asked for is not one of our existing functions, print the help - if (not args) or (args[0] not in commands): - parser.print_help() - help() + if ((not args) or (args[0].lower() not in lower_locals)): + if not options.version : + parser.print_help() + help() return 9 else: - # call the function the user named, given the arguments from the command line - rc = locals()[args[0]](*args[1:]) + # call the function the user named, given the arguments from the command line, lowercase the request to ignore case + rc = lower_locals[args[0].lower()](*args[1:]) return 0 if rc is None else rc return 0 # it shouldn't be possible to get here any longer diff --git a/pyglance/glance/data.py b/pyglance/glance/data.py index 93a5ba8060d370cd67f26dc4aaff463bc3109400..4e213de9d916dbe8af0737c5e69afcda3ebed079 100644 --- a/pyglance/glance/data.py +++ b/pyglance/glance/data.py @@ -199,9 +199,12 @@ class DataObject (object) : # nonfinite, or ignored #valid_mask = ~ (missing_mask | non_finite_mask | self.masks.ignore_mask) valid_mask = np.zeros(shape, dtype=np.bool) - np.logical_or(missing_mask, non_finite_mask, valid_mask) - np.logical_or(self.masks.ignore_mask, valid_mask, valid_mask) - np.logical_not(valid_mask, valid_mask) + if len(shape) > 0 : + np.logical_or(missing_mask, non_finite_mask, valid_mask) + np.logical_or(self.masks.ignore_mask, valid_mask, valid_mask) + np.logical_not(valid_mask, valid_mask) + else : + np.array([ ], dtype=np.bool) # set our masks self.masks = BasicMaskSetObject(self.masks.ignore_mask, valid_mask, @@ -223,7 +226,7 @@ class DataObject (object) : """ self.self_analysis() - return delta.min_with_mask(self.data, self.masks.valid_mask) + return delta.min_with_mask(self.data, self.masks.valid_mask) if len(self.data.shape) > 0 else np.nan def get_max (self) : """ @@ -231,7 +234,7 @@ class DataObject (object) : """ self.self_analysis() - return delta.max_with_mask(self.data, self.masks.valid_mask) + return delta.max_with_mask(self.data, self.masks.valid_mask) if len(self.data.shape) > 0 else np.nan class DiffInfoObject (object) : """ @@ -266,6 +269,7 @@ class DiffInfoObject (object) : } # FUTURE: right now the actual range of the data isn't being considered when upcasting # (Note: numpy.finfo and numpy.iinfo can be used to get more data on types) + # TODO, replace this with syntax like: np.iinfo(np.uint16).max TYPE_MAXIMUM = { np.int16: 32767, np.int32: 2147483647, @@ -399,8 +403,8 @@ class DiffInfoObject (object) : # mismatch points = mismatched nans, mismatched missing-values, differences that are too large mismatch_pt_mask = ( (aDataObject.masks.non_finite_mask ^ bDataObject.masks.non_finite_mask) | - (aDataObject.masks.missing_mask ^ bDataObject.masks.missing_mask) | - outside_epsilon_mask ) + (aDataObject.masks.missing_mask ^ bDataObject.masks.missing_mask) | + outside_epsilon_mask ) # make our diff data object diff_data_object = DataObject(raw_diff, fillValue=fill_data_value) diff --git a/pyglance/glance/figures.py b/pyglance/glance/figures.py index 865738c85bae9d296284af669fb69704e90fe830..37daf7da26e790f73cd547b043a96b0bfcd57c18 100644 --- a/pyglance/glance/figures.py +++ b/pyglance/glance/figures.py @@ -297,9 +297,9 @@ def create_complex_scatter_plot(dataList, title, xLabel, yLabel, epsilon=None, u # add the units to the x and y labels tempXLabel = xLabel tempYLabel = yLabel - if str.lower(str(units_x)) != "none" : + if (str.lower(str(units_x)) != "none") and (str.lower(str(units_x)) != "1") : tempXLabel = tempXLabel + " in " + units_x - if str.lower(str(units_y)) != "none" : + if (str.lower(str(units_y)) != "none") and (str.lower(str(units_y)) != "1") : tempYLabel = tempYLabel + " in " + units_y # and some informational stuff @@ -367,9 +367,9 @@ def create_density_scatter_plot(dataX, dataY, # add the units to the x and y labels tempXLabel = xLabel tempYLabel = yLabel - if str.lower(str(units_x)) != "none" : + if (str.lower(str(units_x)) != "none") and (str.lower(str(units_x)) != "1") : tempXLabel = tempXLabel + " in " + units_x - if str.lower(str(units_y)) != "none" : + if (str.lower(str(units_y)) != "none") and (str.lower(str(units_y)) != "1") : tempYLabel = tempYLabel + " in " + units_y # and some informational stuff @@ -419,9 +419,9 @@ def create_hexbin_plot(dataX, dataY, title, xLabel, yLabel, epsilon=None, units_ # add the units to the x and y labels tempXLabel = xLabel tempYLabel = yLabel - if str.lower(str(units_x)) != "none" : + if (str.lower(str(units_x)) != "none") and (str.lower(str(units_x)) != "1") : tempXLabel = tempXLabel + " in " + units_x - if str.lower(str(units_y)) != "none" : + if (str.lower(str(units_y)) != "none") and (str.lower(str(units_y)) != "1") : tempYLabel = tempYLabel + " in " + units_y # and some informational stuff @@ -492,7 +492,7 @@ def create_histogram(data, bins, title, xLabel, yLabel, displayStats=False, unit # add the units to the x and y labels tempXLabel = xLabel - if str.lower(str(units)) != "none" : + if (str.lower(str(units)) != "none") and (str.lower(str(units)) != "1") : tempXLabel = tempXLabel + " in " + units # and some informational stuff @@ -606,7 +606,7 @@ def create_mapped_figure(data, latitude, longitude, baseMapInstance, boundingAxe doLabelRanges = True else : # add the units to the colorbar - if str.lower(str(units)) != "none" : + if (str.lower(str(units)) != "none") and (str.lower(str(units)) != "1") : cbar.set_label(units) numMismatchPoints = _plot_tag_data_mapped(bMap, tagData, x, y) @@ -666,7 +666,7 @@ def create_quiver_mapped_figure(data, latitude, longitude, baseMapInstance, boun if colorData is not None : cbar = plt.colorbar(format='%.3g') # add the units to the colorbar - if str.lower(str(units)) != "none" : + if (str.lower(str(units)) != "none") and (str.lower(str(units)) != "1") : cbar.set_label(units) """ @@ -734,7 +734,7 @@ def create_simple_figure(data, figureTitle, invalidMask=None, tagData=None, # make a color bar cbar = colorbar(format='%.3g') # add the units to the colorbar - if str.lower(str(units)) != "none" : + if (str.lower(str(units)) != "none") and (str.lower(str(units)) != "1") : cbar.set_label(units) # and some informational stuff @@ -809,7 +809,7 @@ def create_line_plot_figure(dataList, figureTitle) : cleanTagData = ma.array(dataSet.ravel(), mask=~tagData.ravel() | invalidMask.ravel()) axes.plot(indexData, cleanTagData, 'yo', label='mismatch point') - if str.lower(str(units)) !="none" : + if (str.lower(str(units)) !="none") and (str.lower(str(units)) != "1") : labelName = labelName + " in " + units axes.plot(indexData, cleanData, '-' + colorString, label=labelName) diff --git a/pyglance/glance/gui_model.py b/pyglance/glance/gui_model.py index 4174a22efb415afdd21f317dfc6be3a8f4a0a607..422c18864132a82a3ffeba6d3d9e6fbd52f86432 100644 --- a/pyglance/glance/gui_model.py +++ b/pyglance/glance/gui_model.py @@ -302,7 +302,6 @@ class GlanceGUIModel (object) : dataListener.updateDataForms(self.dataForm, list=DATA_FORMS) dataListener.updateUseSharedRange(self.useSharedRange) dataListener.updatePlotGeoTiffAsRGB(self.plotGeoTiffAsRGB) - dataListener.updateHideMismatchNav(self.hideMismatchNav) self.sendFileSettings(A_CONST) self.sendFileSettings(B_CONST) @@ -457,8 +456,7 @@ class GlanceGUIModel (object) : listener.updateDataForms(self.dataForm) listener.updateUseSharedRange(self.useSharedRange) listener.updatePlotGeoTiffAsRGB(self.plotGeoTiffAsRGB) - listener.updateHideMismatchNav(self.hideMismatchNav) - + def updateFileSettings (self, file_prefix, doRestrictRange=None, newRangeMin=np.nan, newRangeMax=np.nan, doCorrectForAWIPS=None) : diff --git a/pyglance/glance/gui_view.py b/pyglance/glance/gui_view.py index 255fc4763d41fdfba5fdd8cf5a1e6e3c0f3c15a6..95d73724d99d697a9c38a993d5149284164e925b 100644 --- a/pyglance/glance/gui_view.py +++ b/pyglance/glance/gui_view.py @@ -178,7 +178,7 @@ class GlanceGUIView (QtGui.QWidget) : self.epsilonWidget.editingFinished.connect(self.reportEpsilonChanged) layoutToUse.addWidget(self.epsilonWidget, currentRow, 1, 1, 2) - currentRow = currentRow + 1 + currentRow += 1 # set up the epsilon percent input box layoutToUse.addWidget(QtGui.QLabel("epsilon percent:"), currentRow, 0) @@ -191,7 +191,7 @@ class GlanceGUIView (QtGui.QWidget) : self.epsilonPerWidget.editingFinished.connect(self.reportEpsilonPercentChanged) layoutToUse.addWidget(self.epsilonPerWidget, currentRow, 1, 1, 2) - currentRow = currentRow + 1 + currentRow += 1 # set up the drop down to allow image type selection layoutToUse.addWidget(QtGui.QLabel("Image Type:"), currentRow, 0) @@ -199,7 +199,7 @@ class GlanceGUIView (QtGui.QWidget) : self.imageSelectionDropDown.activated.connect(self.reportImageTypeSelected) layoutToUse.addWidget(self.imageSelectionDropDown, currentRow, 1, 1, 2) - currentRow = currentRow + 1 + currentRow += 1 # set up a button that shows the numerical data self.rawDataButton = QtGui.QPushButton("Display Data") @@ -244,7 +244,7 @@ class GlanceGUIView (QtGui.QWidget) : self.widgetInfo[file_prefix]['load'] = loadButton grid_layout.addWidget(loadButton, currentRow, 4) - currentRow = currentRow + 1 + currentRow += 1 # set up the drop down for the variable select grid_layout.addWidget(QtGui.QLabel("variable name:"), currentRow, 1) @@ -254,7 +254,7 @@ class GlanceGUIView (QtGui.QWidget) : self.widgetInfo[file_prefix]['variable'] = variableSelection grid_layout.addWidget(variableSelection, currentRow, 2, 1, 3) - currentRow = currentRow + 1 + currentRow += 1 # set up a label to display the variable dimension information tempShapeLabel = QtGui.QLabel("data shape:") @@ -264,7 +264,7 @@ class GlanceGUIView (QtGui.QWidget) : self.widgetInfo[file_prefix]['dims'] = dimensionsLabel grid_layout.addWidget(dimensionsLabel, currentRow, 2, 1, 3) - currentRow = currentRow + 1 + currentRow += 1 # set up a table to display variable attribute information tempAttributesTable = QtGui.QTableWidget() @@ -278,7 +278,7 @@ class GlanceGUIView (QtGui.QWidget) : self.widgetInfo[file_prefix]['attrs'] = tempAttributesTable grid_layout.addWidget(tempAttributesTable, currentRow, 1, 1, 4) - currentRow = currentRow + 1 + currentRow += 1 # set up a check box to override the fill value loaded from the file overrideFillButton = QtGui.QCheckBox("override fill value") @@ -300,7 +300,7 @@ class GlanceGUIView (QtGui.QWidget) : self.widgetInfo[file_prefix]['fillValue'] = fillValue grid_layout.addWidget(fillValue, currentRow+1, 2, 1, 3) - currentRow = currentRow + 2 + currentRow += 2 return currentRow @@ -321,7 +321,7 @@ class GlanceGUIView (QtGui.QWidget) : currentRow += 1 - # add a check box so the user can plot geotiffs as rgb images + # add a check box so the user can plot geoTIFFs as rgb images doPlotRGB = QtGui.QCheckBox("plot multi-channel GeoTIFFs as RGB images") doPlotRGB.setToolTip("When plotting original images for multi-channel GeoTIFFs, plot them as RGB images regardless of the selected variable.\n" + "This setting won't change how comparison images and simpler plots like histograms appear.") @@ -367,21 +367,6 @@ class GlanceGUIView (QtGui.QWidget) : currentRow += 1 - """ TODO, why did I create this control in the first place? remove this... - # add a checkbox to let the user hide data that's spatially invalid based on epsilon - hideDataAssociatedWithInvalidNavigation = QtGui.QCheckBox("hide data associated with mismatched navigation") - hideDataAssociatedWithInvalidNavigation.setToolTip("Check to treat all data matching navigation that differ by more than the " + - "defined lon/lat epsilon as fill data.\n" + - "Whether or not this is checked, if you plot mapped images data matching invalid " + - "(fill or outside of valid range) navigation will be treated as fill data.") - hideDataAssociatedWithInvalidNavigation.setDisabled(False) - hideDataAssociatedWithInvalidNavigation.stateChanged.connect(self.reportHideInvalidNavToggled) - self.hideInvalidNavWidget = hideDataAssociatedWithInvalidNavigation - layoutToUse.addWidget(hideDataAssociatedWithInvalidNavigation, currentRow, 1, 1, 2) - - currentRow += 1 - """ - # add the lon/lat controls that are separated by file currentRow = self._add_lon_lat_controls(A_CONST, layoutToUse, currentRow) currentRow = self._add_lon_lat_controls(B_CONST, layoutToUse, currentRow) @@ -417,7 +402,7 @@ class GlanceGUIView (QtGui.QWidget) : tempLabel.setToolTip("Simple filters that will be applied to the data before display or analysis.") grid_layout.addWidget(tempLabel, current_row, 0) - current_row = current_row + 1 + current_row += 1 # add something to give range restrictions restrictToRangeCheckbox = QtGui.QCheckBox("restrict data to range:") @@ -427,7 +412,7 @@ class GlanceGUIView (QtGui.QWidget) : self.widgetInfo[file_prefix]["doRestrictRangeCheckbox"] = restrictToRangeCheckbox grid_layout.addWidget(restrictToRangeCheckbox, current_row, 1, 1, 2) - current_row = current_row + 1 + current_row += 1 # add the areas to enter the range boundaries @@ -458,7 +443,7 @@ class GlanceGUIView (QtGui.QWidget) : self.widgetInfo[file_prefix]["maxRangeRestriction"] = maxRangeValue grid_layout.addWidget(maxRangeValue, current_row, 3) #1, 1, 2) - current_row = current_row + 1 + current_row += 1 # add a check box to filter AWIPS data isAWIPSdata = QtGui.QCheckBox("correct for AWIPS data types") @@ -469,7 +454,7 @@ class GlanceGUIView (QtGui.QWidget) : self.widgetInfo[file_prefix]["isAWIPScheckbox"] = isAWIPSdata grid_layout.addWidget(isAWIPSdata, current_row, 1, 1, 2) - current_row = current_row + 1 + current_row += 1 return current_row @@ -486,7 +471,7 @@ class GlanceGUIView (QtGui.QWidget) : tempLabel.setToolTip("Navigation variables will only be used when drawing mapped plots.") grid_layout.addWidget(tempLabel, current_row, 0) - current_row = current_row + 1 + current_row += 1 # add drop down to select latitude grid_layout.addWidget(QtGui.QLabel("Latitude:"), current_row, 1) @@ -495,7 +480,7 @@ class GlanceGUIView (QtGui.QWidget) : self.widgetInfo[file_prefix]["latName"] = latNameDropDown grid_layout.addWidget(latNameDropDown, current_row, 2, 1, 2) - current_row = current_row + 1 + current_row += 1 # add drop down to select longitude grid_layout.addWidget(QtGui.QLabel("Longitude:"), current_row, 1) @@ -504,7 +489,7 @@ class GlanceGUIView (QtGui.QWidget) : self.widgetInfo[file_prefix]["lonName"] = lonNameDropDown grid_layout.addWidget(lonNameDropDown, current_row, 2, 1, 2) - current_row = current_row + 1 + current_row += 1 return current_row @@ -540,7 +525,7 @@ class GlanceGUIView (QtGui.QWidget) : def reportOverrideChange (self, file_prefix=None) : """ - when the user checks or unchecks one of the override checkboxes, report it to user update listeners + when the user checked or unchecked one of the override checkboxes, report it to user update listeners """ # this must be recorded before we tamper with the focus, because that will @@ -853,15 +838,21 @@ class GlanceGUIView (QtGui.QWidget) : """ given variable data, pop a window to show it to the user """ - - tempID = self.dataShowCounter - self.dataShowCounter += 1 - - # not the best solution ever, but it works for now - self.dataShowWindows[tempID] = RawDataDisplayWindow(tempID, - variableDataObject, variableName, - file_descriptor=fileDescriptor, - stored_in=self.dataShowWindows) + + if len(variableDataObject.data.shape) > 0 and len(variableDataObject.data.shape) <= 2 : + + tempID = self.dataShowCounter + self.dataShowCounter += 1 + + # not the best solution ever, but it works for now + self.dataShowWindows[tempID] = RawDataDisplayWindow(tempID, + variableDataObject, variableName, + file_descriptor=fileDescriptor, + stored_in=self.dataShowWindows) + else: + + LOG.debug("Unable to display data for variable " + variableName + " because it's shape of " + + str(variableDataObject.data.shape) + " will not work in display window.") def fileDataUpdate (self, file_prefix, file_path, selected_variable, use_fill_override, new_fill_value, variable_dimensions, variable_list=None, attribute_list=None) : @@ -900,7 +891,7 @@ class GlanceGUIView (QtGui.QWidget) : for attributeKey in sorted(attribute_list.keys()) : temp_table.setCellWidget(rowCounter, 0, QtGui.QLabel(str(attributeKey))) temp_table.setCellWidget(rowCounter, 1, QtGui.QLabel(str(attribute_list[attributeKey]))) - rowCounter = rowCounter + 1 + rowCounter += 1 # if there is a file selected, enable some of the other controls if file_path != "" : @@ -937,7 +928,7 @@ class GlanceGUIView (QtGui.QWidget) : update the comparison epsilon displayed to the user """ - stringToUse = str(epsilon) if epsilon is not None else "" + #stringToUse = str(epsilon) if epsilon is not None else "" self.epsilonWidget.setText(str(epsilon)) @@ -1014,14 +1005,6 @@ class GlanceGUIView (QtGui.QWidget) : self.plotGeoTiffsAsRGB.setChecked(doPlotGeoTiffAsRGB) - def updateHideMismatchNav(self, shouldHideBasedOnNavMismatch) : - """ - update whether or not the data corresponding to mismatched navigation - should be hidden when plotting - """ - - #self.hideInvalidNavWidget.setChecked(shouldHideBasedOnNavMismatch) - def updateDoRestrictRange (self, filePrefix, doRestrictRange) : """ update our control to reflect whether or not the range is going to be restricted diff --git a/pyglance/glance/io.py b/pyglance/glance/io.py index 0652a86124813491b18767adfa1c5d39289c003d..40d7c139d616bc7ba3800bb2f5d2e001f22c623b 100644 --- a/pyglance/glance/io.py +++ b/pyglance/glance/io.py @@ -77,6 +77,15 @@ ADD_OFFSET_STR = 'add_offset' SCALE_FACTOR_STR = 'scale_factor' SCALE_METHOD_STR = 'scaling_method' +UNSIGNED_ATTR_STR = "_unsigned" + +SIGNED_TO_UNSIGNED_DTYPES = { + np.dtype(np.int8): np.dtype(np.uint8), + np.dtype(np.int16): np.dtype(np.uint16), + np.dtype(np.int32): np.dtype(np.uint32), + np.dtype(np.int64): np.dtype(np.uint64), + } + class IOUnimplimentedError(Exception): """ The exception raised when a requested io operation is not yet available. @@ -443,8 +452,34 @@ class nc (object): scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues? """ + # get our data, save the dtype, and make sure it's a more flexible dtype for now scaled_data_copy = np.array(variable_object[:], dtype=data_type) + temp = self.attributeCache.get_variable_attributes(name) + if UNSIGNED_ATTR_STR in temp.keys() and str(temp[UNSIGNED_ATTR_STR]).lower() == ( "true" ) : + + LOG.debug("fixing unsigned values in variable " + name) + + # load the scale factor and add offset + scale_factor = 1.0 + add_offset = 0.0 + temp = self.attributeCache.get_variable_attributes(name) + if SCALE_FACTOR_STR in temp.keys() : + scale_factor = temp[SCALE_FACTOR_STR] + if ADD_OFFSET_STR in temp.keys() : + add_offset = temp[ADD_OFFSET_STR] + + # get the missing value and figure out the dtype of the original data + missing_val = self.missing_value(name) + orig_dtype = np.array([missing_val,]).dtype + needed_dtype = SIGNED_TO_UNSIGNED_DTYPES[orig_dtype] if orig_dtype in SIGNED_TO_UNSIGNED_DTYPES.keys() else None + + if needed_dtype is not None : + # now figure out where all the corrupted values are, and shift them up to be positive + needs_fix_mask = (scaled_data_copy < 0.0) & (scaled_data_copy != missing_val) + # we are adding the 2's complement, but first we're scaling it appropriately + scaled_data_copy[needs_fix_mask] += ((np.iinfo(np.uint16).max + 1.0) * scale_factor) + add_offset + return scaled_data_copy # TODO, this hasn't been supported in other file types diff --git a/pyglance/glance/stats.py b/pyglance/glance/stats.py index 6ce9ca46b4281d206aacfb9e68089bff74bdb6ae..d1b5a845e7db2235c7f1988f800495b57369b442 100644 --- a/pyglance/glance/stats.py +++ b/pyglance/glance/stats.py @@ -13,8 +13,8 @@ import glance.delta as delta import numpy as np -# TODO, I don't like this design, but it's what I could come up -# with for now. Reconsider this again later. +# I don't like this design, but it's what I could come up +# with for now. FUTURE: Reconsider this design again later. class StatisticalData (object) : """ This class represents a set of statistical data generated from @@ -135,21 +135,24 @@ class MissingValueStatistics (StatisticalData) : # we have one data set and should save the prefix information self.is_one_data_set = True self.desc_text = dataSetDescription + noData = len(dataObject.data.shape) <= 0 # figure out some basic statistics self.missing_count = np.sum(dataObject.masks.missing_mask) - self.missing_fraction = float(self.missing_count) / float(dataObject.data.size) + self.missing_fraction = float(self.missing_count) / float(dataObject.data.size) if not noData else np.nan # if we have a comparison object analyze the data associated with that comparison elif diffInfoObject is not None : - + + noData = len(diffInfoObject.a_data_object.data.shape) <= 0 + # analyze each of the original data sets that are being compared self.a_missing_stats = MissingValueStatistics(dataObject=diffInfoObject.a_data_object, dataSetDescription="a") self.b_missing_stats = MissingValueStatistics(dataObject=diffInfoObject.b_data_object, dataSetDescription="b") # common statistics self.common_missing_count = np.sum(diffInfoObject.a_data_object.masks.missing_mask & diffInfoObject.b_data_object.masks.missing_mask) - self.common_missing_fraction = float(self.common_missing_count) / float(diffInfoObject.a_data_object.data.size) + self.common_missing_fraction = float(self.common_missing_count) / float(diffInfoObject.a_data_object.data.size) if not noData else np.nan else : raise ValueError ("No data set was given when requesting statistical analysis of missing values.") @@ -268,23 +271,29 @@ class FiniteDataStatistics (StatisticalData) : self.desc_text = dataSetDescription # figure out some basic statistics - self.finite_count = np.sum(dataObject.masks.valid_mask) - self.finite_fraction = float(self.finite_count) / float(dataObject.data.size) + self.finite_count = np.sum(dataObject.masks.valid_mask) if len(dataObject.data.shape) > 0 else 0 + self.finite_fraction = float(self.finite_count) / float(dataObject.data.size) if len(dataObject.data.shape) > 0 else np.nan # if we have a comparison object analyze the data associated with that comparison elif diffInfoObject is not None : - + + no_data = len(diffInfoObject.a_data_object.data.shape) <= 0 + # analyze each of the original data sets that are being compared self.a_finite_stats = FiniteDataStatistics(dataObject=diffInfoObject.a_data_object, dataSetDescription="a") self.b_finite_stats = FiniteDataStatistics(dataObject=diffInfoObject.b_data_object, dataSetDescription="b") # calculate some common statistics - self.common_finite_count = np.sum(diffInfoObject.a_data_object.masks.valid_mask & diffInfoObject.b_data_object.masks.valid_mask) + self.common_finite_count = np.sum(diffInfoObject.a_data_object.masks.valid_mask & diffInfoObject.b_data_object.masks.valid_mask) \ + if not no_data else 0 # use an exclusive or to check which points are finite in only one of the two data sets self.finite_in_only_one_count = np.sum((diffInfoObject.a_data_object.masks.valid_mask ^ diffInfoObject.b_data_object.masks.valid_mask) \ - & ~diffInfoObject.diff_data_object.masks.ignore_mask) - self.common_finite_fraction = float(self.common_finite_count) / float(diffInfoObject.a_data_object.data.size) - self.finite_in_only_one_fraction = float(self.finite_in_only_one_count) / float(diffInfoObject.a_data_object.data.size) + & ~diffInfoObject.diff_data_object.masks.ignore_mask) \ + if not no_data else 0 + self.common_finite_fraction = float(self.common_finite_count) / float(diffInfoObject.a_data_object.data.size) \ + if not no_data else np.nan + self.finite_in_only_one_fraction = float(self.finite_in_only_one_count) / float(diffInfoObject.a_data_object.data.size) \ + if not no_data else np.nan else: raise ValueError ("No data set was given when requesting statistical analysis of finite values.") @@ -396,21 +405,24 @@ class NotANumberStatistics (StatisticalData) : # we have one data set and should save the prefix information self.is_one_data_set = True self.desc_text = dataSetDescription + noData = len(dataObject.data.shape) <= 0 # get some basic statistics self.nan_count = np.sum(dataObject.masks.non_finite_mask) - self.nan_fraction = float(self.nan_count) / float(dataObject.data.size) + self.nan_fraction = float(self.nan_count) / float(dataObject.data.size) if not noData else np.nan # if we have a comparison object analyze the data associated with that comparison elif diffInfoObject is not None : - + + noData = len(diffInfoObject.a_data_object.data.shape) <= 0 + # analyze each of the original data sets that are being compared self.a_nan_stats = NotANumberStatistics(dataObject=diffInfoObject.a_data_object, dataSetDescription="a") self.b_nan_stats = NotANumberStatistics(dataObject=diffInfoObject.b_data_object, dataSetDescription="b") # calculate some common statistics self.common_nan_count = np.sum(diffInfoObject.a_data_object.masks.non_finite_mask & diffInfoObject.b_data_object.masks.non_finite_mask) - self.common_nan_fraction = float(self.common_nan_count) / float(diffInfoObject.a_data_object.data.size) + self.common_nan_fraction = float(self.common_nan_count) / float(diffInfoObject.a_data_object.data.size) if not noData else np.nan else: raise ValueError ("No data set was given when requesting statistical analysis of NaN values.") @@ -567,8 +579,8 @@ class GeneralStatistics (StatisticalData) : # grab the valid data for some calculations tempGoodData = dataObject.data[dataObject.masks.valid_mask] - noData = (tempGoodData.size <= 0) - + noData = (tempGoodData.size <= 0) or (len(dataObject.data.shape) <= 0) + # fill in our statistics self.missing_value = dataObject.select_fill_value() self.max = np.max(tempGoodData) if not noData else np.nan @@ -581,12 +593,14 @@ class GeneralStatistics (StatisticalData) : # if we should also do extra stats, do so if (doExtras) : - self.num_data_points = dataObject.masks.missing_mask.size + self.num_data_points = dataObject.masks.missing_mask.size if not noData else 0 self.shape = dataObject.masks.missing_mask.shape # if we have a comparison object analyze the data associated with that comparison elif diffInfoObject is not None : - + + noData = len(diffInfoObject.a_data_object.data.shape) <= 0 + # analyze each of the original data sets that are being compared self.a_gen_stats = GeneralStatistics(dataObject=diffInfoObject.a_data_object, dataSetDescription="a") self.b_gen_stats = GeneralStatistics(dataObject=diffInfoObject.b_data_object, dataSetDescription="b") @@ -594,7 +608,7 @@ class GeneralStatistics (StatisticalData) : # fill in our statistics self.epsilon = diffInfoObject.epsilon_value self.epsilon_percent = diffInfoObject.epsilon_percent - self.num_data_points = diffInfoObject.a_data_object.masks.missing_mask.size + self.num_data_points = diffInfoObject.a_data_object.masks.missing_mask.size if not noData else 0 self.shape = diffInfoObject.a_data_object.masks.missing_mask.shape # also calculate the invalid points self.spatially_invalid_pts_ignored_in_a = np.sum(diffInfoObject.a_data_object.masks.ignore_mask) @@ -725,31 +739,32 @@ class NumericalComparisonStatistics (StatisticalData) : aData = diffInfoObject.a_data_object.data bData = diffInfoObject.b_data_object.data total_num_finite_values = np.sum(valid_in_both) # just the finite values, not all data - + noData = len(diffInfoObject.a_data_object.data.shape) <= 0 + # fill in some simple statistics self.diff_outside_epsilon_count = np.sum(diffInfoObject.diff_data_object.masks.outside_epsilon_mask) self.perfect_match_count = NumericalComparisonStatistics._get_num_perfect(aData, bData, goodMask=valid_in_both) - self.correlation = delta.compute_correlation(aData, bData, valid_in_both) - self.r_squared_correlation = self.correlation * self.correlation + self.correlation = delta.compute_correlation(aData, bData, valid_in_both) if not noData else np.nan + self.r_squared_correlation = self.correlation * self.correlation if not noData else np.nan self.mismatch_points_count = np.sum(diffInfoObject.diff_data_object.masks.mismatch_mask) # calculate some more complex statistics, be careful not to divide by zero - self.mismatch_points_fraction = float(self.mismatch_points_count) / float(aData.size) if (aData.size > 0) else 0.0 - self.diff_outside_epsilon_fraction = float(self.diff_outside_epsilon_count) / float(total_num_finite_values) if (total_num_finite_values > 0) else 0.0 - self.perfect_match_fraction = float(self.perfect_match_count) / float(total_num_finite_values) if (total_num_finite_values > 0) else 0.0 + self.mismatch_points_fraction = float(self.mismatch_points_count) / float(aData.size) if not noData else np.nan + self.diff_outside_epsilon_fraction = float(self.diff_outside_epsilon_count) / float(total_num_finite_values) if (total_num_finite_values > 0) else np.nan + self.perfect_match_fraction = float(self.perfect_match_count) / float(total_num_finite_values) if (total_num_finite_values > 0) else np.nan # if desired, do the basic analysis self.temp_analysis = NumericalComparisonStatistics.basic_analysis(diffInfoObject.diff_data_object.data, valid_in_both) if include_basic_analysis else { } - self.rms_val = self.temp_analysis['rms_val'] if (len(self.temp_analysis) > 0) else np.nan - self.std_val = self.temp_analysis['std_val'] if (len(self.temp_analysis) > 0) else np.nan - self.mean_diff = self.temp_analysis['mean_diff'] if (len(self.temp_analysis) > 0) else np.nan - self.median_diff = self.temp_analysis['median_diff'] if (len(self.temp_analysis) > 0) else np.nan - self.max_diff = self.temp_analysis['max_diff'] if (len(self.temp_analysis) > 0) else np.nan - self.mean_delta = self.temp_analysis['mean_delta'] if (len(self.temp_analysis) > 0) else np.nan - self.median_delta = self.temp_analysis['median_delta'] if (len(self.temp_analysis) > 0) else np.nan - self.max_delta = self.temp_analysis['max_delta'] if (len(self.temp_analysis) > 0) else np.nan - self.min_delta = self.temp_analysis['min_delta'] if (len(self.temp_analysis) > 0) else np.nan + self.rms_val = self.temp_analysis['rms_val'] if not noData else np.nan + self.std_val = self.temp_analysis['std_val'] if not noData else np.nan + self.mean_diff = self.temp_analysis['mean_diff'] if not noData else np.nan + self.median_diff = self.temp_analysis['median_diff'] if not noData else np.nan + self.max_diff = self.temp_analysis['max_diff'] if not noData else np.nan + self.mean_delta = self.temp_analysis['mean_delta'] if not noData else np.nan + self.median_delta = self.temp_analysis['median_delta'] if not noData else np.nan + self.max_delta = self.temp_analysis['max_delta'] if not noData else np.nan + self.min_delta = self.temp_analysis['min_delta'] if not noData else np.nan def dictionary_form(self) : """ @@ -772,38 +787,34 @@ class NumericalComparisonStatistics (StatisticalData) : @staticmethod def doc_strings( ) : - """ - get documentation strings that match the - dictionary form of the statistics + """get documentation strings that match the dictionary form of the statistics """ return NumericalComparisonStatistics._doc_strings @staticmethod def basic_analysis(diffData, valid_mask): - """ - do some very minimal analysis of the differences + """do some very minimal analysis of the differences """ # if everything's invalid, stop now - if np.sum(valid_mask) <= 0 : - return { } + noData = np.sum(valid_mask) <= 0 # calculate and return statistics - root_mean_square_value = delta.calculate_root_mean_square(diffData, valid_mask) - tempDiffData = diffData[valid_mask] - absDiffData = np.abs(tempDiffData) + root_mean_square_value = delta.calculate_root_mean_square(diffData, valid_mask) if not noData else np.nan + tempDiffData = diffData[valid_mask] if not noData else None + absDiffData = np.abs(tempDiffData) if not noData else None return { 'rms_val': root_mean_square_value, - 'std_val': np.std(tempDiffData), + 'std_val': np.std(tempDiffData) if not noData else np.nan, - 'mean_diff': np.mean(absDiffData), - 'median_diff': np.median(absDiffData), - 'max_diff': np.max(absDiffData), + 'mean_diff': np.mean(absDiffData) if not noData else np.nan, + 'median_diff': np.median(absDiffData) if not noData else np.nan, + 'max_diff': np.max(absDiffData) if not noData else np.nan, - 'mean_delta': np.mean(tempDiffData), - 'median_delta': np.median(tempDiffData), - 'max_delta': np.max(tempDiffData), - 'min_delta': np.min(tempDiffData) + 'mean_delta': np.mean(tempDiffData) if not noData else np.nan, + 'median_delta': np.median(tempDiffData) if not noData else np.nan, + 'max_delta': np.max(tempDiffData) if not noData else np.nan, + 'min_delta': np.min(tempDiffData) if not noData else np.nan, } @staticmethod @@ -984,9 +995,7 @@ class StatisticalAnalysis (StatisticalData) : # TODO, use this method instead of the dictionary at the bottom of this module @staticmethod def doc_strings( ) : - """ - get documentation strings that match the - dictionary form of the statistics + """get documentation strings that match the dictionary form of the statistics """ toReturn = { }