From e7c5a459936f87e0d6be3c7af44ed4fec3feb847 Mon Sep 17 00:00:00 2001 From: Eva Schiffer <evas@ssec.wisc.edu> Date: Fri, 19 Nov 2021 10:36:29 -0600 Subject: [PATCH] setting up the basic framework for --warnmissingvars in the config handling --- pyglance/glance/compare.py | 2 +- pyglance/glance/config_organizer.py | 99 +++++++++++++++++++++++------ pyglance/glance/constants.py | 1 + 3 files changed, 82 insertions(+), 20 deletions(-) diff --git a/pyglance/glance/compare.py b/pyglance/glance/compare.py index 1878fb7..938dcbc 100644 --- a/pyglance/glance/compare.py +++ b/pyglance/glance/compare.py @@ -776,7 +776,7 @@ def reportGen_library_call (a_path, b_path, var_list=None, pathsTemp, runInfo, defaultValues, requestedNames, usedConfigFile = config_organizer.load_config_or_options(a_path, b_path, options_set, requestedVars = var_list) - + # note some of this information for debugging purposes LOG.debug('paths: ' + str(pathsTemp)) LOG.debug('defaults: ' + str(defaultValues)) diff --git a/pyglance/glance/config_organizer.py b/pyglance/glance/config_organizer.py index ceff407..c5948f4 100644 --- a/pyglance/glance/config_organizer.py +++ b/pyglance/glance/config_organizer.py @@ -54,7 +54,7 @@ glance_analysis_defaults = { DO_IMAGES_ONLY_ON_FAIL_KEY: False } -def parse_varnames(names, terms, epsilon=0.0, missing=None): +def parse_varnames(names, terms, epsilon=0.0, missing=None, warn_unfound=False,): """filter variable names and substitute default epsilon and missing settings if none provided returns (variable name, epsilon, missing) triples @@ -65,15 +65,36 @@ def parse_varnames(names, terms, epsilon=0.0, missing=None): terms - variable selection terms given from the command line epsilon - a default epsilon to be used for all variables that do not have a specific epsilon given missing - a default fill value to be used for all variables that do not have a specific fill value given + warn_unfound - should we warn the user if we can't find anything that matches their requested terms? """ - terms = [x.split(':') for x in terms] - terms = [(re.compile(x[0]).match,x[1:]) for x in terms] + # split the individual variables into name, epsilon, missing value if they have :'s in them + terms_split = [x.split(':') for x in terms] + # turn the variable names into regexes that we can use to match them with our names; keep them packaged with the epsilon and missing details + terms_compiled = [(re.compile(x[0]).match,x[1:]) for x in terms_split] + + # a function to select the epsilons and missing values as either the default or the ones from the original terms def _cvt_em(eps=None, mis=None): eps = float(eps) if eps else epsilon mis = float(mis) if mis else missing return eps, mis - sel = [ ((x,)+_cvt_em(*em)) for x in names for (t,em) in terms if t(x) ] - return set(sel) + + # note: this was the original way that these were parsed, but I need to do some intermediary stuff to them + #selected_names = set( [ ((x,)+_cvt_em(*em)) for x in names for (t,em) in terms_compiled if t(x) ] ) + + # pick out the names that match what the user asked for + selected_names = set() + for t in range(len(terms_compiled)): + (re_term, details) = terms_compiled[t] + matched_term = False + for name in names : + if re_term(name) : + temp_epsilon, temp_missing = _cvt_em(*details) + selected_names.add((name, temp_epsilon, temp_missing)) + matched_term = True + if not matched_term and warn_unfound : + LOG.warn("No matching variables found for user requested variable: " + terms[t]) + + return selected_names def _check_shared_names (nameSetA, nameSetB) : """ @@ -89,7 +110,8 @@ def _check_shared_names (nameSetA, nameSetB) : return {SHARED_VARIABLE_NAMES_KEY: commonNames, VAR_NAMES_UNIQUE_TO_A_KEY: uniqueToANames, VAR_NAMES_UNIQUE_TO_B_KEY: uniqueToBNames} def resolve_names(fileAObject, fileBObject, defaultValues, - requestedNames, usingConfigFileFormat=False) : + requestedNames, usingConfigFileFormat=False, + warnIfRequestedVarsUnavailable=False, ) : """ figure out which names the two files share and which are unique to each file, as well as which names were requested and are in both sets @@ -97,19 +119,23 @@ def resolve_names(fileAObject, fileBObject, defaultValues, usingConfigFileFormat signals whether the requestedNames parameter will be in the form of the inputed names from the command line or a more complex dictionary holding information about the names read in from a configuration file + + When warnIfRequestedVarsUnavailable is True, we will log a warning when user requested variables + are not present in one or both of the files. Note: if we ever need a variable with different names in file A and B to be comparable, this logic will need to be changed. """ # look at the names present in the two files and compare them nameComparison = _check_shared_names(set(fileAObject()), set(fileBObject())) - + # figure out which set should be selected based on the user requested names fileCommonNames = nameComparison[SHARED_VARIABLE_NAMES_KEY] finalNames = {} + # if the user gave us a config file to pull info from if (usingConfigFileFormat) : - # if the user didn't ask for any, try everything + # if the user didn't ask for any specific variables, try everything if (len(requestedNames) == 0) : finalFromCommandLine = parse_varnames(fileCommonNames, ['.*'], defaultValues[EPSILON_KEY], defaultValues[FILL_VALUE_KEY]) @@ -168,15 +194,33 @@ def resolve_names(fileAObject, fileBObject, defaultValues, # get any information about the units listed in the files finalNames[dispName][VAR_UNITS_A_KEY] = fileAObject.get_attribute(name, io.UNITS_CONSTANT) finalNames[dispName][VAR_UNITS_B_KEY] = fileBObject.get_attribute(name_b, io.UNITS_CONSTANT) + + else : # in this case, the user asked for a variable and it was not available + if warnIfRequestedVarsUnavailable : + tempVarName = name if name_b == name else "A: " + name + "; B: " + name_b + aStatus = name in nameComparison[VAR_NAMES_UNIQUE_TO_A_KEY] or name in fileCommonNames + bStatus = name_b in nameComparison[VAR_NAMES_UNIQUE_TO_B_KEY] or name_b in fileCommonNames + tempReason = "of an unknown reason." + if not aStatus and not bStatus : + tempReason = "the variable was not present in either file." + if aStatus and not bStatus : + tempReason = "the variable was available in the A file but was not present in the B file." + if bStatus and not aStatus : + tempReason = "the variable was available in the B file but was not present in the A file." + LOG.warn("Unable to compare requested variable (" + tempVarName + ") because " + tempReason) else : LOG.warn('No technical variable name was given for the entry described as "' + dispName + '". ' + 'Skipping this variable.') + + # if we are using the command line online, so that's the only place requested variable names could be else: + # format command line input similarly to the stuff from the config file - #print (requestedNames) finalFromCommandLine = parse_varnames(fileCommonNames, requestedNames, - defaultValues[EPSILON_KEY], defaultValues[FILL_VALUE_KEY]) + defaultValues[EPSILON_KEY], defaultValues[FILL_VALUE_KEY], + warnIfRequestedVarsUnavailable,) + for name, epsilon, missing in finalFromCommandLine : ## we'll use the variable's name as the display name for the time being finalNames[name] = {} @@ -202,7 +246,8 @@ def resolve_names(fileAObject, fileBObject, defaultValues, return finalNames, nameComparison def resolve_names_one_file(fileObject, defaultValues, - requestedNames, usingConfigFileFormat=False) : + requestedNames, usingConfigFileFormat=False, + warnIfRequestedVarsUnavailable=False,) : """ sort out which names to examine based on a file that contains names and the names the caller asked for, then fill in information on missing values based on the @@ -260,6 +305,12 @@ def resolve_names_one_file(fileObject, defaultValues, # get any information about the units listed in the file finalNames[dispName][VAR_UNITS_A_KEY] = fileObject.get_attribute(name, io.UNITS_CONSTANT) + + # they asked for a variable name we don't have + else : + if warnIfRequestedVarsUnavailable: + LOG.warn("Unable to compare requested variable (" + name + + ") because the variable was not present in the input file.") else : LOG.warn('No technical variable name was given for the entry described as "' + dispName + '". ' + @@ -268,7 +319,8 @@ def resolve_names_one_file(fileObject, defaultValues, # format command line input similarly to the stuff from the config file #print (requestedNames) finalFromCommandLine = parse_varnames(possibleNames, requestedNames, - None, defaultValues[FILL_VALUE_KEY]) + None, defaultValues[FILL_VALUE_KEY], + warnIfRequestedVarsUnavailable,) for name, _, missing in finalFromCommandLine : ## we'll use the variable's name as the display name for the time being finalNames[name] = {} @@ -415,11 +467,13 @@ def load_config_or_options(aPath, bPath, optionsSet, requestedVars = [ ]) : #glanceRunConfig = imp.load_module(fileBaseName, open(requestedConfigFile, 'U'), # requestedConfigFile, ('.py' , 'U', 1)) - # this is an exception, since it is not advertised to the user we don't expect it to be in the file - # (at least not at the moment, it could be added later and if they did happen to put it in the - # config file, it would override this line) + # this is an exception, since these are not advertised to the user as being available in the config + # we don't expect them to be in the file (at least not at the moment, these could be added to the docs + # later and if the user did happen to put any of them in the config file, it would override the command + # line derived value that we set here) runInfo[DO_MAKE_REPORT_KEY] = not optionsSet[OPTIONS_NO_REPORT_KEY] if OPTIONS_NO_REPORT_KEY in optionsSet else False runInfo[USE_NO_LON_OR_LAT_VARS_KEY] = optionsSet[USE_NO_LON_OR_LAT_VARS_KEY] if USE_NO_LON_OR_LAT_VARS_KEY in optionsSet else False + runInfo[OPTIONS_WARN_MISSING_KEY] = optionsSet[OPTIONS_WARN_MISSING_KEY] if OPTIONS_WARN_MISSING_KEY in optionsSet else False # get everything from the config file runInfo.update(glanceRunConfig.settings) @@ -440,10 +494,11 @@ def load_config_or_options(aPath, bPath, optionsSet, requestedVars = [ ]) : LOG.info ('Using Command Line Settings') # so get everything from the options directly - runInfo[DO_MAKE_REPORT_KEY] = not optionsSet[OPTIONS_NO_REPORT_KEY] - runInfo[DO_MAKE_IMAGES_KEY] = not optionsSet[OPTIONS_NO_IMAGES_KEY] + runInfo[DO_MAKE_REPORT_KEY] = not optionsSet[OPTIONS_NO_REPORT_KEY] + runInfo[DO_MAKE_IMAGES_KEY] = not optionsSet[OPTIONS_NO_IMAGES_KEY] runInfo[DO_IMAGES_ONLY_ON_FAIL_KEY] = optionsSet[OPTIONS_IMAGES_ON_FAIL_KEY] - runInfo[DO_MAKE_FORKS_KEY] = optionsSet[DO_MAKE_FORKS_KEY] + runInfo[DO_MAKE_FORKS_KEY] = optionsSet[DO_MAKE_FORKS_KEY] + runInfo[OPTIONS_WARN_MISSING_KEY] = optionsSet[OPTIONS_WARN_MISSING_KEY] # only record these if we are using lon/lat runInfo[USE_NO_LON_OR_LAT_VARS_KEY] = optionsSet[USE_NO_LON_OR_LAT_VARS_KEY] @@ -536,6 +591,9 @@ def parse_arguments (version_string, commands_list, commands_help_text, ) : help="enable logging of debug output (warning: this will generate far more messages)") parser.add_argument('-n', '--version', version=version_string, action="version", help="print the Glance version") + parser.add_argument('--warnmissingvars', dest=OPTIONS_WARN_MISSING_KEY, + action="store_true", default=False, + help="emit warnings if variables you request by name are missing from your input files") # data options for setting variable defaults parser.add_argument('-e', '--epsilon', dest=EPSILON_KEY, type=float, default=0.0, @@ -621,7 +679,10 @@ def convert_options_to_dict (options) : """ tempOptions = { } - + + # warning defaults + tempOptions[OPTIONS_WARN_MISSING_KEY] = options.warnmissing + # variable defaults tempOptions[EPSILON_KEY] = options.epsilon tempOptions[OPTIONS_FILL_VALUE_KEY] = options.missing diff --git a/pyglance/glance/constants.py b/pyglance/glance/constants.py index 57285bb..a58357d 100644 --- a/pyglance/glance/constants.py +++ b/pyglance/glance/constants.py @@ -226,6 +226,7 @@ OPTIONS_LAT_VAR_NAME_KEY = 'latitudeVar' OPTIONS_LON_VAR_NAME_KEY = 'longitudeVar' OPTIONS_LONLAT_EPSILON_KEY = 'lonlatepsilon' OPTIONS_RE_TO_STRIP_KEY = 'regularExpressionsToStrip' +OPTIONS_WARN_MISSING_KEY = 'warnmissing' # values used by the reports -- GitLab