Newer
Older
#!/usr/bin/env python
# encoding: utf-8
"""
Top-level routines to compare two files.
Created by rayg Apr 2009.
Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
"""
(no author)
committed
import os, sys, logging, re, subprocess, datetime
(no author)
committed
import imp as imp
from pprint import pprint, pformat
from numpy import *
(no author)
committed
import pkg_resources
from pycdf import CDFError
(no author)
committed
from subprocess import check_call as sh
(no author)
committed
import matplotlib
(no author)
committed
# this is a hack to keep glance from needing pyqt unless you run the gui
(no author)
committed
if "gui" in sys.argv[1:] :
(no author)
committed
try :
matplotlib.use('Qt4Agg')
import glance.gui_controller as gui_control
except ImportError :
print ("*** Unable to import PyQt4. Please install PyQt4 and add it to your PYTHONPATH in order to use the Glance GUI. ***")
raise
(no author)
committed
else :
matplotlib.use('Agg')
import glance.io as io
import glance.delta as delta
import glance.plot as plot
(no author)
committed
import glance.report as report
import glance.stats as statistics
import glance.plotcreatefns as plotcreate
import glance.collocation as collocation
# these are the built in defaults for the settings
glance_setting_defaults = {'shouldIncludeReport': True,
'shouldIncludeImages': False,
'doFork': False,
'useThreadsToControlMemory': False,
(no author)
committed
'useSharedRangeForOriginal': False,
(no author)
committed
'noLonLatVars': False,
'detail_DPI': 150,
'thumb_DPI': 50}
# these are the built in longitude/latitude defaults
glance_lon_lat_defaults = {'longitude': 'pixel_longitude',
'latitude': 'pixel_latitude',
(no author)
committed
'lon_lat_epsilon': 0.0,
'data_filter_function_lon_in_a': None,
'data_filter_function_lat_in_a': None,
'data_filter_function_lon_in_b': None,
'data_filter_function_lat_in_b': None
}
(no author)
committed
# these are the built in default settings for the variable analysis
(no author)
committed
glance_analysis_defaults = {'epsilon': 0.0,
'epsilon_percent': None,
(no author)
committed
'missing_value': None,
'epsilon_failure_tolerance': 0.0,
'nonfinite_data_tolerance': 0.0,
(no author)
committed
'total_data_failure_tolerance': None,
'minimum_acceptable_squared_correlation_coefficient': None,
'only_plot_on_fail': False
(no author)
committed
}
def _clean_path(string_path) :
"""
Return a clean form of the path without any '.', '..', or '~'
"""
clean_path = None
if string_path is not None :
clean_path = os.path.abspath(os.path.expanduser(string_path))
return clean_path
def _parse_varnames(names, terms, epsilon=0.0, missing=None):
"""filter variable names and substitute default epsilon and missing settings if none provided
returns (variable name, epsilon, missing) triples
>>> _parse_varnames( ['foo','bar', 'baz', 'zoom', 'cat'], ['f..:0.5:-999', 'ba.*:0.001', 'c.t::-9999'], 1e-7 )
set([('foo', 0.5, -999.0), ('cat', 9.9999999999999995e-08, -9999.0), ('bar', 0.001, None), ('baz', 0.001, None)])
names - all the variable names in the file (ie. names that should be considered valid)
terms - variable selection terms given from the command line
epsilon - a default epsilon to be used for all variables that do not have a specific epsilon given
missing - a default fill value to be used for all variables that do not have a specific fill value given
"""
terms = [x.split(':') for x in terms]
terms = [(re.compile(x[0]).match,x[1:]) for x in terms]
def _cvt_em(eps=None, mis=None):
eps = float(eps) if eps else epsilon
mis = float(mis) if mis else missing
return eps, mis
sel = [ ((x,)+_cvt_em(*em)) for x in names for (t,em) in terms if t(x) ]
return set(sel)
(no author)
committed
def _check_file_names(fileAObject, fileBObject) :
"""
(no author)
committed
get information about the names in the two files and how they compare to each other
"""
# get information about the variables stored in the files
aNames = set(fileAObject())
bNames = set(fileBObject())
# get the variable names they have in common
commonNames = aNames.intersection(bNames)
# which names are unique to only one of the two files?
uniqueToANames = aNames - commonNames
uniqueToBNames = bNames - commonNames
(no author)
committed
return _check_shared_names(set(fileAObject()), set(fileBObject()))
def _check_shared_names (nameSetA, nameSetB) :
"""
compare the names in the two sets
"""
# what names do they have in common?
commonNames = nameSetA.intersection(nameSetB)
# what names are unique to each set?
uniqueToANames = nameSetA - commonNames
uniqueToBNames = nameSetB - commonNames
(no author)
committed
return {'sharedVars': commonNames, 'uniqueToAVars': uniqueToANames, 'uniqueToBVars': uniqueToBNames}
def _resolve_names(fileAObject, fileBObject, defaultValues,
requestedNames, usingConfigFileFormat=False) :
"""
figure out which names the two files share and which are unique to each file, as well as which names
were requested and are in both sets
usingConfigFileFormat signals whether the requestedNames parameter will be in the form of the inputed
names from the command line or a more complex dictionary holding information about the names read in
from a configuration file
Note: if we ever need a variable with different names in file A and B to be comparable, this logic
will need to be changed.
"""
# look at the names present in the two files and compare them
nameComparison = _check_file_names(fileAObject, fileBObject)
# figure out which set should be selected based on the user requested names
(no author)
committed
fileCommonNames = nameComparison['sharedVars']
finalNames = {}
if (usingConfigFileFormat) :
# if the user didn't ask for any, try everything
(no author)
committed
if (len(requestedNames) is 0) :
(no author)
committed
finalFromCommandLine = _parse_varnames(fileCommonNames, ['.*'],
defaultValues['epsilon'], defaultValues['missing_value'])
for name, epsilon, missing in finalFromCommandLine :
# we'll use the variable's name as the display name for the time being
finalNames[name] = {}
# make sure we pick up any other controlling defaults
finalNames[name].update(defaultValues)
# but override the values that would have been determined by _parse_varnames
finalNames[name]['variable_name'] = name
finalNames[name]['epsilon'] = epsilon
(no author)
committed
# load the missing value if it was not provided
missing, missing_b = _get_missing_values_if_needed((fileAObject, fileBObject), name,
missing_value_A=missing, missing_value_B=missing)
finalNames[name]['missing_value'] = missing
finalNames[name]['missing_value_alt_in_b'] = missing_b
(no author)
committed
# get any information about the units listed in the files
(no author)
committed
finalNames[name]['units_a'] = fileAObject.get_attribute(name, io.UNITS_CONSTANT)
finalNames[name]['units_b'] = fileBObject.get_attribute(name, io.UNITS_CONSTANT)
(no author)
committed
(no author)
committed
# otherwise just do the ones the user asked for
else :
(no author)
committed
# check each of the names the user asked for to see if it is either in the list of common names
# or, if the user asked for an alternate name mapping in file B, if the two mapped names are in
# files A and B respectively
(no author)
committed
for dispName in requestedNames :
(no author)
committed
(no author)
committed
# hang on to info on the current variable
currNameInfo = requestedNames[dispName]
(no author)
committed
(no author)
committed
# get the variable name
if 'variable_name' in currNameInfo :
name = currNameInfo['variable_name']
name_b = name
(no author)
committed
(no author)
committed
if ('alternate_name_in_B' in currNameInfo) :
name_b = currNameInfo['alternate_name_in_B']
if ( (name in fileCommonNames) and (not currNameInfo.has_key('alternate_name_in_B')) ) or \
( (currNameInfo.has_key('alternate_name_in_B') and
((name in nameComparison['uniqueToAVars']) or (name in fileCommonNames)) and
((name_b in nameComparison['uniqueToBVars']) or (name_b in fileCommonNames))) ) :
(no author)
committed
finalNames[dispName] = defaultValues.copy()
finalNames[dispName]['display_name'] = dispName
finalNames[dispName].update(currNameInfo)
# load the missing value if it was not provided
missing = finalNames[dispName]['missing_value']
if ('missing_value_alt_in_b' in finalNames[dispName]) :
missing_b = finalNames[dispName]['missing_value_alt_in_b']
else :
missing_b = missing
finalNames[dispName]['missing_value'], finalNames[dispName]['missing_value_alt_in_b'] = \
_get_missing_values_if_needed((fileAObject, fileBObject), name, name_b,
missing, missing_b)
(no author)
committed
# get any information about the units listed in the files
(no author)
committed
finalNames[dispName]['units_a'] = fileAObject.get_attribute(name, io.UNITS_CONSTANT)
finalNames[dispName]['units_b'] = fileBObject.get_attribute(name_b, io.UNITS_CONSTANT)
(no author)
committed
(no author)
committed
else :
LOG.warn('No technical variable name was given for the entry described as "' + dispName + '". ' +
'Skipping this variable.')
(no author)
committed
else:
# format command line input similarly to the stuff from the config file
(no author)
committed
finalFromCommandLine = _parse_varnames(fileCommonNames, requestedNames,
defaultValues['epsilon'], defaultValues['missing_value'])
for name, epsilon, missing in finalFromCommandLine :
## we'll use the variable's name as the display name for the time being
finalNames[name] = {}
# make sure we pick up any other controlling defaults
finalNames[name].update(defaultValues)
# but override the values that would have been determined by _parse_varnames
finalNames[name]['variable_name'] = name
finalNames[name]['epsilon'] = epsilon
(no author)
committed
# load the missing value if it was not provided
missing, missing_b = _get_missing_values_if_needed((fileAObject, fileBObject), name,
missing_value_A=missing, missing_value_B=missing)
(no author)
committed
finalNames[name]['missing_value'] = missing
finalNames[name]['missing_value_alt_in_b'] = missing_b
(no author)
committed
# get any information about the units listed in the files
(no author)
committed
finalNames[name]['units_a'] = fileAObject.get_attribute(name, io.UNITS_CONSTANT)
finalNames[name]['units_b'] = fileBObject.get_attribute(name, io.UNITS_CONSTANT)
(no author)
committed
LOG.debug("Final selected set of variables to analyze:")
LOG.debug(str(finalNames))
(no author)
committed
return finalNames, nameComparison
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
def _resolve_names_one_file(fileObject, defaultValues,
requestedNames, usingConfigFileFormat=False) :
"""
sort out which names to examine based on a file that contains names and the names
the caller asked for, then fill in information on missing values based on the
caller requests, possible config file, and defaults
"""
# look at the names present in the file
possibleNames = set(fileObject())
# figure out which names should be selected based on the user requested names
finalNames = {}
if (usingConfigFileFormat) :
# if the user didn't ask for any, try everything
if (len(requestedNames) is 0) :
finalFromCommandLine = _parse_varnames(possibleNames, ['.*'],
None, defaultValues['missing_value'])
for name, _, missing in finalFromCommandLine :
# we'll use the variable's name as the display name for the time being
finalNames[name] = {}
# make sure we pick up any other controlling defaults
finalNames[name].update(defaultValues)
# but override the values that would have been determined by _parse_varnames
finalNames[name]['variable_name'] = name
# load the missing value if it was not provided
if missing is None :
missing = fileObject.missing_value(name)
finalNames[name]['missing_value'] = missing
# get any information about the units listed in the file
finalNames[name]['units'] = fileObject.get_attribute(name, io.UNITS_CONSTANT)
# otherwise just do the ones the user asked for
else :
# check each of the names the user asked for to see if it's among the possible names
for dispName in requestedNames :
# hang on to info on the current variable
currNameInfo = requestedNames[dispName]
# get the variable name
if 'variable_name' in currNameInfo :
name = currNameInfo['variable_name']
if (name in possibleNames) :
finalNames[dispName] = defaultValues.copy()
finalNames[dispName]['display_name'] = dispName
finalNames[dispName].update(currNameInfo)
# load the missing value if it was not provided
missing = finalNames[dispName]['missing_value']
if missing is None :
missing = fileObject.missing_value(name)
finalNames[dispName]['missing_value'] = missing
# get any information about the units listed in the file
finalNames[dispName]['units'] = fileObject.get_attribute(name, io.UNITS_CONSTANT)
else :
LOG.warn('No technical variable name was given for the entry described as "' + dispName + '". ' +
'Skipping this variable.')
else:
# format command line input similarly to the stuff from the config file
#print (requestedNames)
finalFromCommandLine = _parse_varnames(possibleNames, requestedNames,
None, defaultValues['missing_value'])
for name, _, missing in finalFromCommandLine :
## we'll use the variable's name as the display name for the time being
finalNames[name] = {}
# make sure we pick up any other controlling defaults
finalNames[name].update(defaultValues)
# but override the values that would have been determined by _parse_varnames
finalNames[name]['variable_name'] = name
# load the missing value if it was not provided
if missing is None :
missing = fileObject.missing_value(name)
finalNames[name]['missing_value'] = missing
# get any information about the units listed in the file
finalNames[name]['units'] = fileObject.get_attribute(name, io.UNITS_CONSTANT)
LOG.debug("Final selected set of variables to inspect:")
LOG.debug(str(finalNames))
return finalNames, possibleNames
def _get_missing_values_if_needed((fileA, fileB),
var_name, alt_var_name=None,
missing_value_A=None, missing_value_B=None) :
"""
get the missing values for two files based on the variable name(s)
if the alternate variable name is passed it will be used for the
second file in place of the primary variable name
"""
# if we don't have an alternate variable name, use the existing one
if alt_var_name is None :
alt_var_name = var_name
if missing_value_A is None :
missing_value_A = fileA.missing_value(var_name)
if missing_value_B is None :
missing_value_B = fileB.missing_value(alt_var_name)
return missing_value_A, missing_value_B
def _load_config_or_options(aPath, bPath, optionsSet, requestedVars = [ ]) :
(no author)
committed
"""
load information on how the user wants to run the command from a dictionary of options
and info on the files and variables to compare
note: the options may include a configuration file, which will override many of the
settings in the options
(no author)
committed
"""
# basic defaults for stuff we will need to return
runInfo = {}
runInfo.update(glance_setting_defaults) # get the default settings
(no author)
committed
if ('noLonLatVars' not in optionsSet) or (not optionsSet['noLonLatVars']):
runInfo.update(glance_lon_lat_defaults) # get the default lon/lat info
(no author)
committed
(no author)
committed
# by default, we don't have any particular variables to analyze
(no author)
committed
desiredVariables = { }
(no author)
committed
# use the built in default values, to start with
defaultsToUse = glance_analysis_defaults.copy()
requestedNames = None
# set up the paths, they can only come from the command line
paths = {}
(no author)
committed
paths['a'] = aPath
if bPath is not None:
paths['b'] = bPath
paths['out'] = optionsSet['outputpath']
(no author)
committed
(no author)
committed
# the colocation selection can only come from the command line options
# note: since this is really only coming from the user's selection of the call,
# this is ok for the moment, may want to reconsider later (FUTURE)
(no author)
committed
runInfo['doColocate'] = ('doColocate' in optionsSet) and (optionsSet['doColocate'])
(no author)
committed
# check to see if the user wants to use a config file and if the path exists
requestedConfigFile = optionsSet['configFile']
(no author)
committed
usedConfigFile = False
(no author)
committed
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
if (requestedConfigFile is not None) and (requestedConfigFile != "") :
if not os.path.exists(requestedConfigFile) :
LOG.warn("Could not open config file: \"" + requestedConfigFile + "\"")
LOG.warn("Unable to continue analysis without selected configuration file.")
sys.exit(1)
else :
LOG.info ("Using Config File Settings")
# this will handle relative paths
requestedConfigFile = os.path.abspath(os.path.expanduser(requestedConfigFile))
# split out the file base name and the file path
(filePath, fileName) = os.path.split(requestedConfigFile)
splitFileName = fileName.split('.')
fileBaseName = fileName[:-3] # remove the '.py' from the end
# hang onto info about the config file for later
runInfo['config_file_name'] = fileName
runInfo['config_file_path'] = requestedConfigFile
# load the file
LOG.debug ('loading config file: ' + str(requestedConfigFile))
glanceRunConfig = imp.load_module(fileBaseName, file(requestedConfigFile, 'U'),
filePath, ('.py' , 'U', 1))
# this is an exception, since it is not advertised to the user we don't expect it to be in the file
# (at least not at the moment, it could be added later and if they did happen to put it in the
# config file, it would override this line)
runInfo['shouldIncludeReport'] = not optionsSet['imagesOnly'] if 'imagesOnly' in optionsSet else False
runInfo['noLonLatVars'] = optionsSet['noLonLatVars'] if 'noLonLatVars' in optionsSet else False
# get everything from the config file
runInfo.update(glanceRunConfig.settings)
if ('noLonLatVars' not in runInfo) or (not runInfo['noLonLatVars']) :
runInfo.update(glanceRunConfig.lat_lon_info) # get info on the lat/lon variables
# get any requested names
requestedNames = glanceRunConfig.setOfVariables.copy()
# user selected defaults, if they omit any we'll still be using the program defaults
defaultsToUse.update(glanceRunConfig.defaultValues)
usedConfigFile = True
(no author)
committed
# if we didn't get the info from the config file for some reason
# (the user didn't want to, we couldn't, etc...) get it from the command line options
if not usedConfigFile:
LOG.info ('Using Command Line Settings')
# so get everything from the options directly
runInfo['shouldIncludeReport'] = not optionsSet['imagesOnly']
runInfo['shouldIncludeImages'] = not optionsSet['htmlOnly']
runInfo['doFork'] = optionsSet['doFork']
(no author)
committed
# only record these if we are using lon/lat
runInfo['noLonLatVars'] = optionsSet['noLonLatVars']
if not runInfo['noLonLatVars'] :
runInfo['latitude'] = optionsSet['latitudeVar'] or runInfo['latitude']
runInfo['longitude'] = optionsSet['longitudeVar'] or runInfo['longitude']
runInfo['lon_lat_epsilon'] = optionsSet['lonlatepsilon'] if 'lonlatepsilon' in optionsSet else None
(no author)
committed
# get any requested names from the command line
requestedNames = requestedVars or ['.*']
(no author)
committed
# user selected defaults
defaultsToUse['epsilon'] = optionsSet['epsilon'] if 'epsilon' in optionsSet else None
defaultsToUse['missing_value'] = optionsSet['missing']
(no author)
committed
# note: there is no way to set the tolerances from the command line
(no author)
committed
(no author)
committed
return paths, runInfo, defaultsToUse, requestedNames, usedConfigFile
class VariableLoadError(Exception):
"""
The exception raised when a variable could not be loaded.
msg -- explanation of which variable could be loaded (and, if possible, why)
"""
def __init__(self, msg):
self.msg = msg
def __str__(self):
return self.msg
def _get_variable_from_file(fileObject, variableName, dataType, filter=None) :
"""
load a variable, using the given data type and applying a filter if one is given
This may throw a VariableLoadError if the variable cannot be loaded.
"""
dataToReturn = None
exceptionToRaise = None
# get the data from the file
if fileObject.file_object is None :
exceptionToRaise = VariableLoadError("File was not properly opened so variable '" + variableName + "' could not be loaded.")
else :
try :
dataToReturn = array(fileObject.file_object[variableName], dtype=dataType)
except CDFError :
exceptionToRaise = VariableLoadError('Unable to retrieve ' + variableName + ' data. The variable name ' +
' may not exist in this file or an error may have occured while attempting to' +
' access the data. Details of file access error observed: ' + str(CDFError))
if (exceptionToRaise is not None) :
raise exceptionToRaise
if (filter is not None) and (dataToReturn is not None) :
dataToReturn = filter(dataToReturn)
return dataToReturn
def _get_and_analyze_lon_lat (fileObject,
latitudeVariableName, longitudeVariableName,
latitudeDataFilterFn=None, longitudeDataFilterFn=None) :
"""
get the longitude and latitude data from the given file, assuming they are in the given variable names
and analyze them to identify spacially invalid data (ie. data that would fall off the earth)
This may result in a VariableLoadError if the variable cannot be loaded.
"""
# get the data from the file
# get the longitude
LOG.info ('longitude name: ' + longitudeVariableName)
# TODO, should this dtype be a float?
longitudeData = _get_variable_from_file(fileObject, longitudeVariableName,
float, filter=longitudeDataFilterFn)
# get the latitude
LOG.info ('latitude name: ' + latitudeVariableName)
# TODO, should this dtype be a float?
latitudeData = _get_variable_from_file(fileObject, latitudeVariableName,
float, filter=latitudeDataFilterFn)
# we are going to have issues with our comparision if they aren't the same shape
LOG.debug('latitude shape: ' + str(latitudeData.shape))
LOG.debug('longitude shape: ' + str(longitudeData.shape))
assert (latitudeData.shape == longitudeData.shape)
# build a mask of our spacially invalid data
invalidLatitude = (latitudeData < -90) | (latitudeData > 90) | ~isfinite(latitudeData)
invalidLongitude = (longitudeData < -180) | (longitudeData > 360) | ~isfinite(longitudeData)
spaciallyInvalidMask = invalidLatitude | invalidLongitude
(no author)
committed
# get the missing value as well
longitudeMissingVal = fileObject.file_object.missing_value(longitudeVariableName)
latitudeMissingVal = fileObject.file_object.missing_value( latitudeVariableName)
# analyze our spacially invalid data
percentageOfSpaciallyInvalidPts, numberOfSpaciallyInvalidPts = _get_percentage_from_mask(spaciallyInvalidMask)
spatialStatInfo = {
'totNumInvPts': numberOfSpaciallyInvalidPts,
'perInvPts': percentageOfSpaciallyInvalidPts
}
(no author)
committed
return dataobj.DataObject(longitudeData, fillValue=longitudeMissingVal, ignoreMask=invalidLongitude), \
dataobj.DataObject(latitudeData, fillValue=latitudeMissingVal, ignoreMask=invalidLatitude), spatialStatInfo
def _get_percentage_from_mask(dataMask) :
"""
given a mask that marks the elements we want the percentage of as True (and is the size of our original data),
figure out what percentage of the whole they are
"""
numMarkedDataPts = sum(dataMask)
totalDataPts = dataMask.size
# avoid dividing by 0
if totalDataPts is 0 :
return 0.0, 0
percentage = 100.0 * float(numMarkedDataPts) / float(totalDataPts)
return percentage, numMarkedDataPts
# TODO, this comparison needs to encorporate epsilon percent as well
def _check_lon_lat_equality(longitudeADataObject, latitudeADataObject,
longitudeBDataObject, latitudeBDataObject,
(no author)
committed
llepsilon, doMakeImages, outputPath,
fullDPI=None, thumbDPI=None) :
(no author)
committed
"""
check to make sure the longitude and latitude are equal everywhere that's not in the ignore masks
if they are not and doMakeImages was passed as True, generate appropriate figures to show where
return the number of points where they are not equal (0 would mean they're the same)
If the latitude or longitude cannot be compared, this may raise a VariableComparisonError.
(no author)
committed
"""
# first of all, if the latitude and longitude are not the same shape, then things can't ever be "equal"
if (longitudeADataObject.data.shape != longitudeBDataObject.data.shape) :
raise VariableComparisonError ("Unable to compare longitue variables due to different sizes (" + str(longitudeADataObject.data.shape) +
") and (" + str(longitudeBDataObject.data.shape) +").")
if (latitudeADataObject.data.shape != latitudeBDataObject.data.shape) :
raise VariableComparisonError ("Unable to compare latitude variables due to different sizes (" + str(latitudeADataObject.data.shape) +
") and (" + str(latitudeBDataObject.data.shape) +").")
# get information about how the latitude and longitude differ
longitudeDiffInfo = dataobj.DiffInfoObject(longitudeADataObject, longitudeBDataObject, epsilonValue=llepsilon)
latitudeDiffInfo = dataobj.DiffInfoObject(latitudeADataObject, latitudeBDataObject, epsilonValue=llepsilon)
# how much difference is there between the two sets?
(no author)
committed
lon_lat_not_equal_mask = longitudeDiffInfo.diff_data_object.masks.mismatch_mask | latitudeDiffInfo.diff_data_object.masks.mismatch_mask
lon_lat_not_equal_points_count = sum(lon_lat_not_equal_mask)
(no author)
committed
lon_lat_not_equal_points_percent = (float(lon_lat_not_equal_points_count) / float(lon_lat_not_equal_mask.size)) * 100.0
# if we have unequal points, create user legible info about the problem
if (lon_lat_not_equal_points_count > 0) :
(no author)
committed
LOG.warn("Possible mismatch in values stored in file a and file b longitude and latitude values."
+ " Depending on the degree of mismatch, some data value comparisons may be "
+ "distorted or spacially nonsensical.")
# if we are making images, make two showing the invalid lons/lats
if (doMakeImages) :
(no author)
committed
if ((len(longitudeADataObject.data[~longitudeADataObject.masks.ignore_mask]) > 0) and
(len( latitudeADataObject.data[~ latitudeADataObject.masks.ignore_mask]) > 0)) :
(no author)
committed
plot.plot_and_save_spacial_mismatch(longitudeADataObject, latitudeADataObject,
lon_lat_not_equal_mask,
"A", "Lon./Lat. Points Mismatched between A and B\n" +
"(Shown in A)",
"LonLatMismatch",
(no author)
committed
outputPath, True,
(no author)
committed
fullDPI=fullDPI, thumbDPI=thumbDPI, units="degrees")
(no author)
committed
if ((len(longitudeBDataObject.data[~longitudeBDataObject.masks.ignore_mask]) > 0) and
(len( latitudeBDataObject.data[~ latitudeBDataObject.masks.ignore_mask]) > 0)) :
(no author)
committed
plot.plot_and_save_spacial_mismatch(longitudeBDataObject, latitudeBDataObject,
lon_lat_not_equal_mask,
"B", "Lon./Lat. Points Mismatched between A and B\n" +
"(Shown in B)",
"LonLatMismatch",
(no author)
committed
outputPath, True,
(no author)
committed
fullDPI=fullDPI, thumbDPI=thumbDPI, units="degrees")
# setup our return data
returnInfo = {}
returnInfo['lon_lat_not_equal_points_count'] = lon_lat_not_equal_points_count
returnInfo['lon_lat_not_equal_points_percent'] = lon_lat_not_equal_points_percent
return returnInfo
(no author)
committed
def _compare_spatial_invalidity(longitude_a_object, longitude_b_object,
latitude_a_object, latitude_b_object,
(no author)
committed
spatial_info, do_include_images, output_path,
fullDPI=None, thumbDPI=None) :
"""
(no author)
committed
Given information about where the two files are spatially invalid, figure
out what invalidity they share and save information or plots for later use
also build a shared longitude/latitude based on A but also including valid
points in B
"""
# make our common invalid masks
invalid_in_a_mask = longitude_a_object.masks.ignore_mask | latitude_a_object.masks.ignore_mask
invalid_in_b_mask = longitude_b_object.masks.ignore_mask | latitude_b_object.masks.ignore_mask
(no author)
committed
invalid_in_common_mask = invalid_in_a_mask | invalid_in_b_mask
# make a "common" longitude/latitude based on A
longitude_common = longitude_a_object.data.copy()
latitude_common = latitude_a_object.data.copy()
(no author)
committed
# compare our spacialy invalid info
spatial_info['perInvPtsInBoth'] = spatial_info['file A']['perInvPts']
# a default that will hold if the two files have the same spatially invalid pts
if not all(invalid_in_a_mask.ravel() == invalid_in_b_mask.ravel()) :
LOG.info("Mismatch in number of spatially invalid points. " +
"Files may not have corresponding data where expected.")
# figure out which points are only valid in one of the two files
valid_only_in_mask_a = (~invalid_in_a_mask) & invalid_in_b_mask
spatial_info['file A']['numInvPts'] = sum(valid_only_in_mask_a.ravel())
valid_only_in_mask_b = (~invalid_in_b_mask) & invalid_in_a_mask
spatial_info['file B']['numInvPts'] = sum(valid_only_in_mask_b.ravel())
# so how many do they have together?
spatial_info['perInvPtsInBoth'] = _get_percentage_from_mask(invalid_in_common_mask)[0]
# make a "clean" version of the lon/lat
longitude_common[valid_only_in_mask_a] = longitude_a_object.data[valid_only_in_mask_a]
longitude_common[valid_only_in_mask_b] = longitude_b_object.data[valid_only_in_mask_b]
latitude_common [valid_only_in_mask_a] = latitude_a_object.data[valid_only_in_mask_a]
latitude_common [valid_only_in_mask_b] = latitude_b_object.data[valid_only_in_mask_b]
(no author)
committed
# plot the points that are only valid one file and not the other
if ((spatial_info['file A']['numInvPts'] > 0) and (do_include_images) and
(len(longitude_a_object.data[~invalid_in_a_mask]) > 0) and
(len( latitude_a_object.data[~invalid_in_a_mask]) > 0)) :
(no author)
committed
plot.plot_and_save_spacial_mismatch(longitude_a_object, latitude_a_object,
(no author)
committed
valid_only_in_mask_a,
"A", "Points only valid in\nFile A\'s longitude & latitude",
(no author)
committed
"SpatialMismatch",
(no author)
committed
output_path, True,
(no author)
committed
fullDPI=fullDPI, thumbDPI=thumbDPI, units="degrees")
if ((spatial_info['file B']['numInvPts'] > 0) and (do_include_images) and
(len(longitude_b_object.data[~invalid_in_b_mask]) > 0) and
(len( latitude_b_object.data[~invalid_in_b_mask]) > 0)
(no author)
committed
plot.plot_and_save_spacial_mismatch(longitude_b_object, latitude_b_object,
(no author)
committed
valid_only_in_mask_b,
"B", "Points only valid in\nFile B\'s longitude & latitude",
(no author)
committed
"SpatialMismatch",
(no author)
committed
output_path, True,
(no author)
committed
fullDPI=fullDPI, thumbDPI=thumbDPI, units="degrees")
(no author)
committed
return invalid_in_common_mask, spatial_info, longitude_common, latitude_common
class VariableComparisonError(Exception):
"""
The exception raised when a variable could not be compared.
msg -- explanation of which variable could be compared (and, if possible, why)
"""
def __init__(self, msg):
self.msg = msg
def __str__(self):
return self.msg
def _load_lon_lat (lon_name, lat_name, file_object, file_descriptior="",
alt_file=None, lat_filter=None, lon_filter=None) :
"""
load the longitude and latitude from a file, filtering as needed
"""
# for the a file, do we have an alternate?
file_to_use = file_object
if (alt_file is not None) :
LOG.info("Loading alternate file (" + alt_file
+ ") for file " + file_descriptior + " longitude/latitude.")
file_to_use = dataobj.FileInfo(alt_file)
# load our longitude and latitude and do some analysis on them
lon_object, lat_object, spatial_info = \
_get_and_analyze_lon_lat (file_to_use,
lat_name, lon_name,
lat_filter, lon_filter)
return lon_object, lat_object, spatial_info
(no author)
committed
def _handle_lon_lat_info (lon_lat_settings, a_file_object, b_file_object, output_path,
(no author)
committed
should_make_images=False, should_check_equality=True,
fullDPI=None, thumbDPI=None) :
"""
Manage loading and comparing longitude and latitude information for two files
This may result in a VariableLoadError if the longitude or latitude cannot be loaded.
This may result in a VariableComparisonError if the longitude or latitude cannot be compared due to size.
"""
# a place to save some general stats about our lon/lat data
(no author)
committed
spatialInfo = { }
(no author)
committed
# if there is no lon/lat specified, stop now
(no author)
committed
if ( ('longitude' not in lon_lat_settings) or ('latitude' not in lon_lat_settings)
or (('noLonLatVars' in lon_lat_settings) and lon_lat_settings['noLonLatVars']) ) :
return { }, spatialInfo
(no author)
committed
# if we should not be comparing against the logitude and latitude, stop now
LOG.debug ('lon_lat_settings: ' + str(lon_lat_settings))
(no author)
committed
# figure out the names to be used for the longitude and latitude variables
a_longitude_name = lon_lat_settings['longitude']
a_latitude_name = lon_lat_settings['latitude']
b_longitude_name = a_longitude_name
b_latitude_name = a_latitude_name
# if we have alternate b names, use those for b instead
if ('longitude_alt_name_in_b' in lon_lat_settings) :
b_longitude_name = lon_lat_settings['longitude_alt_name_in_b']
if ( 'latitude_alt_name_in_b' in lon_lat_settings):
b_latitude_name = lon_lat_settings['latitude_alt_name_in_b']
# if we need to load our lon/lat from different files, open those files
longitude_a_object, latitude_a_object, spatialInfo['file A'] = \
_load_lon_lat (a_longitude_name, a_latitude_name, a_file_object, file_descriptior="a",
alt_file=lon_lat_settings['a_lon_lat_from_alt_file']
if ('a_lon_lat_from_alt_file' in lon_lat_settings) else None,
lat_filter=lon_lat_settings['data_filter_function_lat_in_a'],
lon_filter=lon_lat_settings['data_filter_function_lon_in_a'])
longitude_b_object, latitude_b_object, spatialInfo['file B'] = \
_load_lon_lat (b_longitude_name, b_latitude_name, b_file_object, file_descriptior="b",
alt_file=lon_lat_settings['b_lon_lat_from_alt_file']
if ('b_lon_lat_from_alt_file' in lon_lat_settings) else None,
lat_filter=lon_lat_settings['data_filter_function_lat_in_b'],
lon_filter=lon_lat_settings['data_filter_function_lon_in_b'])
(no author)
committed
# if we need to, test the level of equality of the "valid" values in our lon/lat
if should_check_equality :
moreSpatialInfo = _check_lon_lat_equality(longitude_a_object, latitude_a_object,
longitude_b_object, latitude_b_object,
(no author)
committed
lon_lat_settings['lon_lat_epsilon'],
(no author)
committed
should_make_images, output_path,
fullDPI=fullDPI, thumbDPI=thumbDPI)
(no author)
committed
# update our existing spatial information
spatialInfo.update(moreSpatialInfo)
(no author)
committed
# compare our spatially invalid info to see if the two files have invalid longitudes and latitudes in the same places
spaciallyInvalidMask, spatialInfo, longitude_common, latitude_common = \
_compare_spatial_invalidity(longitude_a_object, longitude_b_object,
latitude_a_object, latitude_b_object,
(no author)
committed
spatialInfo, should_make_images, output_path,
fullDPI=fullDPI, thumbDPI=thumbDPI)
(no author)
committed
else:
spaciallyInvalidMask = None
longitude_common = None
latitude_common = None
# FUTURE, return the lon/lat objects instead?
(no author)
committed
return {
'a': {
"lon": longitude_a_object.data,
"lat": latitude_a_object.data,
"inv_mask": longitude_a_object.masks.ignore_mask,
"lon_fill": longitude_a_object.fill_value,
"lat_fill": latitude_a_object.fill_value
},
'b': {
"lon": longitude_b_object.data,
"lat": latitude_b_object.data,
"inv_mask": longitude_b_object.masks.ignore_mask,
"lon_fill": longitude_b_object.fill_value,
"lat_fill": latitude_b_object.fill_value
},
'common': {
"lon": longitude_common,
"lat": latitude_common,
"inv_mask": spaciallyInvalidMask
}
}, \
spatialInfo
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
def _handle_lon_lat_info_for_one_file (lon_lat_settings, file_object) :
"""
Manage loading longitude and latitude information for a file
This may result in a VariableLoadError if the longitude or latitude cannot be loaded.
"""
# if there is no lon/lat specified, stop now
if ( ('longitude' not in lon_lat_settings) or ('latitude' not in lon_lat_settings)
or (('noLonLatVars' in lon_lat_settings) and lon_lat_settings['noLonLatVars']) ) :
return { }, { }
# print our settings for debugging purposes
LOG.debug ('lon_lat_settings: ' + str(lon_lat_settings))
# figure out the names to be used for the longitude and latitude variables
lon_name = lon_lat_settings['longitude']
lat_name = lon_lat_settings['latitude' ]
# load our lon/lat data
lon_object, lat_object, spatialInfo = \
_load_lon_lat (lon_name, lat_name, file_object,
alt_file=lon_lat_settings['a_lon_lat_from_alt_file']
if ('a_lon_lat_from_alt_file' in lon_lat_settings) else None,
lat_filter=lon_lat_settings['data_filter_function_lat_in_a'],
lon_filter=lon_lat_settings['data_filter_function_lon_in_a'])
# FUTURE, return the lon/lat objects instead?
return {
"lon": lon_object.data,
"lat": lat_object.data,
"inv_mask": lon_object.masks.ignore_mask | lat_object.masks.ignore_mask,
"lon_fill": lon_object.fill_value,
"lat_fill": lat_object.fill_value
}, \
spatialInfo
def _open_and_process_files (args, numFilesExpected):
"""
open files listed in the args and get information about the variables in them
"""
# get all the file names
fileNames = args[:numFilesExpected]
# open all the files & get their variable names
files = {}
commonNames = None
for fileName in fileNames:
LOG.info("opening %s" % fileName)
files[fileName] = {}
tempFileObject = (io.open(fileName))
files[fileName]['fileObject'] = tempFileObject
tempNames = set(tempFileObject())
LOG.debug ('variable names for ' + fileName + ': ' + str(tempNames))
files[fileName]['varNames'] = tempNames
if commonNames is None :
commonNames = tempNames
else :
commonNames = commonNames.intersection(tempNames)
files['commonVarNames'] = commonNames
return files
def _check_pass_or_fail(varRunInfo, variableStats, defaultValues) :
"""
Check whether the variable passed analysis, failed analysis, or
did not need to be quantitatively tested
also returns information about the fractions of failure
"""
passValues = [ ]
# test the epsilon value tolerance
# get the tolerance for failures compared to epsilon
epsilonTolerance = None
if ('epsilon_failure_tolerance' in varRunInfo) :
epsilonTolerance = varRunInfo['epsilon_failure_tolerance']
else :
epsilonTolerance = defaultValues['epsilon_failure_tolerance']
# did we fail based on the epsilon?
failed_fraction = variableStats['Numerical Comparison Statistics']['diff_outside_epsilon_fraction']
passed_epsilon = None
if epsilonTolerance is not None :
passed_epsilon = failed_fraction <= epsilonTolerance
passValues.append(passed_epsilon)
# test the nonfinite tolerance
# get the tolerance for failures in amount of nonfinite data (in spatially valid areas)
nonfiniteTolerance = None
if ('nonfinite_data_tolerance' in varRunInfo) :
nonfiniteTolerance = varRunInfo['nonfinite_data_tolerance']
else :
nonfiniteTolerance = defaultValues['nonfinite_data_tolerance']
# did we fail based on nonfinite data
non_finite_diff_fraction = variableStats['Finite Data Statistics']['finite_in_only_one_fraction']
passed_nonfinite = None
if nonfiniteTolerance is not None :
passed_nonfinite = non_finite_diff_fraction <= nonfiniteTolerance
passValues.append(passed_nonfinite)
(no author)
committed
# test if the total failed percentage is acceptable
# get the total percentage of failed data that is acceptable
totalFailTolerance = None
if ('total_data_failure_tolerance' in varRunInfo) :
totalFailTolerance = varRunInfo['total_data_failure_tolerance']
# did we fail based on all data failures?
passed_all_percentage = None
if totalFailTolerance is not None :
passed_all_percentage = (non_finite_diff_fraction + failed_fraction) <= totalFailTolerance
passValues.append(passed_all_percentage)
# test the r-squared correlation coefficent
(no author)
committed
# get the minimum acceptable r-squared correlation coefficient
min_r_squared = None
if ('minimum_acceptable_squared_correlation_coefficient' in varRunInfo) :
min_r_squared = varRunInfo['minimum_acceptable_squared_correlation_coefficient']
else :
min_r_squared = defaultValues['minimum_acceptable_squared_correlation_coefficient']
# did we fail based on the r-squared correlation coefficient?
r_squared_value = None
passed_r_squared = None
if min_r_squared is not None :
r_squared_value = variableStats['Numerical Comparison Statistics']['r-squared correlation']
passed_r_squared = r_squared_value >= min_r_squared
passValues.append(passed_r_squared)
# figure out the overall pass/fail result
didPass = None
for passValue in passValues :
(no author)
committed
# if passValue isn't none, we need to update didPass
if passValue is not None :
if didPass is not None :
didPass = passValue and didPass
else :
didPass = passValue
return didPass, failed_fraction, non_finite_diff_fraction, r_squared_value
def _get_run_identification_info( ) :
"""
get info about what user/machine/version of glance is being used
"""
info_to_return = { }
# get info on who's doing the run and where
info_to_return['machine'] = os.uname()[1] # the name of the machine running the report
info_to_return['user'] = os.getenv("LOGNAME") #os.getlogin() # the name of the user running the report
info_to_return['version'] = _get_glance_version_string()
return info_to_return
(no author)
committed
def _get_glance_version_string() :
version_num = pkg_resources.require('uwglance')[0].version
(no author)
committed
return "glance, version " + str(version_num)
def _get_name_info_for_variable(original_display_name, variable_run_info) :
"""
based on the variable run info, figure out the various names for
the variable and return them
the various names are:
technical_name - the name the variable is listed under in the file
b_variable_technical_name - the name the variable is listed under in the b file (may be the same as technical_name)