From e7c5a459936f87e0d6be3c7af44ed4fec3feb847 Mon Sep 17 00:00:00 2001
From: Eva Schiffer <evas@ssec.wisc.edu>
Date: Fri, 19 Nov 2021 10:36:29 -0600
Subject: [PATCH] setting up the basic framework for --warnmissingvars in the
 config handling

---
 pyglance/glance/compare.py          |  2 +-
 pyglance/glance/config_organizer.py | 99 +++++++++++++++++++++++------
 pyglance/glance/constants.py        |  1 +
 3 files changed, 82 insertions(+), 20 deletions(-)

diff --git a/pyglance/glance/compare.py b/pyglance/glance/compare.py
index 1878fb7..938dcbc 100644
--- a/pyglance/glance/compare.py
+++ b/pyglance/glance/compare.py
@@ -776,7 +776,7 @@ def reportGen_library_call (a_path, b_path, var_list=None,
     pathsTemp, runInfo, defaultValues, requestedNames, usedConfigFile = config_organizer.load_config_or_options(a_path, b_path,
                                                                                                                 options_set,
                                                                                                                 requestedVars = var_list)
-    
+
     # note some of this information for debugging purposes
     LOG.debug('paths: ' +           str(pathsTemp))
     LOG.debug('defaults: ' +        str(defaultValues))
diff --git a/pyglance/glance/config_organizer.py b/pyglance/glance/config_organizer.py
index ceff407..c5948f4 100644
--- a/pyglance/glance/config_organizer.py
+++ b/pyglance/glance/config_organizer.py
@@ -54,7 +54,7 @@ glance_analysis_defaults = {
                             DO_IMAGES_ONLY_ON_FAIL_KEY: False
                            }
 
-def parse_varnames(names, terms, epsilon=0.0, missing=None):
+def parse_varnames(names, terms, epsilon=0.0, missing=None, warn_unfound=False,):
     """filter variable names and substitute default epsilon and missing settings if none provided
     returns (variable name, epsilon, missing) triples
     
@@ -65,15 +65,36 @@ def parse_varnames(names, terms, epsilon=0.0, missing=None):
     terms   - variable selection terms given from the command line
     epsilon - a default epsilon to be used for all variables that do not have a specific epsilon given
     missing - a default fill value to be used for all variables that do not have a specific fill value given
+    warn_unfound - should we warn the user if we can't find anything that matches their requested terms?
     """
-    terms = [x.split(':') for x in terms]
-    terms = [(re.compile(x[0]).match,x[1:]) for x in terms]
+    # split the individual variables into name, epsilon, missing value if they have :'s in them
+    terms_split = [x.split(':') for x in terms]
+    # turn the variable names into regexes that we can use to match them with our names; keep them packaged with the epsilon and missing details
+    terms_compiled = [(re.compile(x[0]).match,x[1:]) for x in terms_split]
+
+    # a function to select the epsilons and missing values as either the default or the ones from the original terms
     def _cvt_em(eps=None, mis=None):
         eps = float(eps) if eps else epsilon
         mis = float(mis) if mis else missing
         return eps, mis
-    sel = [ ((x,)+_cvt_em(*em)) for x in names for (t,em) in terms if t(x) ]
-    return set(sel)
+
+    # note: this was the original way that these were parsed, but I need to do some intermediary stuff to them
+    #selected_names = set( [ ((x,)+_cvt_em(*em)) for x in names for (t,em) in terms_compiled if t(x) ] )
+
+    # pick out the names that match what the user asked for
+    selected_names = set()
+    for t in range(len(terms_compiled)):
+        (re_term, details) = terms_compiled[t]
+        matched_term = False
+        for name in names :
+            if re_term(name) :
+                temp_epsilon, temp_missing = _cvt_em(*details)
+                selected_names.add((name, temp_epsilon, temp_missing))
+                matched_term = True
+        if not matched_term and warn_unfound :
+            LOG.warn("No matching variables found for user requested variable: " + terms[t])
+
+    return selected_names
 
 def _check_shared_names (nameSetA, nameSetB) :
     """
@@ -89,7 +110,8 @@ def _check_shared_names (nameSetA, nameSetB) :
     return {SHARED_VARIABLE_NAMES_KEY: commonNames,  VAR_NAMES_UNIQUE_TO_A_KEY: uniqueToANames, VAR_NAMES_UNIQUE_TO_B_KEY: uniqueToBNames}
 
 def resolve_names(fileAObject, fileBObject, defaultValues,
-                   requestedNames, usingConfigFileFormat=False) :
+                  requestedNames, usingConfigFileFormat=False,
+                  warnIfRequestedVarsUnavailable=False, ) :
     """
     figure out which names the two files share and which are unique to each file, as well as which names
     were requested and are in both sets
@@ -97,19 +119,23 @@ def resolve_names(fileAObject, fileBObject, defaultValues,
     usingConfigFileFormat signals whether the requestedNames parameter will be in the form of the inputed
     names from the command line or a more complex dictionary holding information about the names read in
     from a configuration file
+
+    When warnIfRequestedVarsUnavailable is True, we will log a warning when user requested variables
+    are not present in one or both of the files.
     
     Note: if we ever need a variable with different names in file A and B to be comparable, this logic
     will need to be changed.
     """
     # look at the names present in the two files and compare them
     nameComparison = _check_shared_names(set(fileAObject()), set(fileBObject()))
-    
+
     # figure out which set should be selected based on the user requested names
     fileCommonNames = nameComparison[SHARED_VARIABLE_NAMES_KEY]
     finalNames = {}
+    # if the user gave us a config file to pull info from
     if (usingConfigFileFormat) :
         
-        # if the user didn't ask for any, try everything
+        # if the user didn't ask for any specific variables, try everything
         if (len(requestedNames) == 0) :
             finalFromCommandLine = parse_varnames(fileCommonNames, ['.*'],
                                                   defaultValues[EPSILON_KEY], defaultValues[FILL_VALUE_KEY])
@@ -168,15 +194,33 @@ def resolve_names(fileAObject, fileBObject, defaultValues,
                         # get any information about the units listed in the files
                         finalNames[dispName][VAR_UNITS_A_KEY] = fileAObject.get_attribute(name,   io.UNITS_CONSTANT)
                         finalNames[dispName][VAR_UNITS_B_KEY] = fileBObject.get_attribute(name_b, io.UNITS_CONSTANT)
+
+                    else : # in this case, the user asked for a variable and it was not available
+                        if warnIfRequestedVarsUnavailable :
+                            tempVarName = name if name_b == name else "A: " + name + "; B: " + name_b
+                            aStatus = name   in nameComparison[VAR_NAMES_UNIQUE_TO_A_KEY] or name   in fileCommonNames
+                            bStatus = name_b in nameComparison[VAR_NAMES_UNIQUE_TO_B_KEY] or name_b in fileCommonNames
+                            tempReason = "of an unknown reason."
+                            if not aStatus and not bStatus :
+                                tempReason = "the variable was not present in either file."
+                            if aStatus and not bStatus :
+                                tempReason = "the variable was available in the A file but was not present in the B file."
+                            if bStatus and not aStatus :
+                                tempReason = "the variable was available in the B file but was not present in the A file."
+                            LOG.warn("Unable to compare requested variable (" + tempVarName + ") because " + tempReason)
                         
                 else :
                     LOG.warn('No technical variable name was given for the entry described as "' + dispName + '". ' +
                              'Skipping this variable.')
+
+    # if we are using the command line online, so that's the only place requested variable names could be
     else:
+
         # format command line input similarly to the stuff from the config file
-        #print (requestedNames)
         finalFromCommandLine = parse_varnames(fileCommonNames, requestedNames,
-                                              defaultValues[EPSILON_KEY], defaultValues[FILL_VALUE_KEY])
+                                              defaultValues[EPSILON_KEY], defaultValues[FILL_VALUE_KEY],
+                                              warnIfRequestedVarsUnavailable,)
+
         for name, epsilon, missing in finalFromCommandLine :
             ## we'll use the variable's name as the display name for the time being
             finalNames[name] = {}
@@ -202,7 +246,8 @@ def resolve_names(fileAObject, fileBObject, defaultValues,
     return finalNames, nameComparison
 
 def resolve_names_one_file(fileObject, defaultValues,
-                           requestedNames, usingConfigFileFormat=False) :
+                           requestedNames, usingConfigFileFormat=False,
+                           warnIfRequestedVarsUnavailable=False,) :
     """
     sort out which names to examine based on a file that contains names and the names
     the caller asked for, then fill in information on missing values based on the
@@ -260,6 +305,12 @@ def resolve_names_one_file(fileObject, defaultValues,
                         
                         # get any information about the units listed in the file
                         finalNames[dispName][VAR_UNITS_A_KEY] = fileObject.get_attribute(name, io.UNITS_CONSTANT)
+
+                    # they asked for a variable name we don't have
+                    else :
+                        if warnIfRequestedVarsUnavailable:
+                            LOG.warn("Unable to compare requested variable (" + name +
+                                     ") because the variable was not present in the input file.")
                         
                 else :
                     LOG.warn('No technical variable name was given for the entry described as "' + dispName + '". ' +
@@ -268,7 +319,8 @@ def resolve_names_one_file(fileObject, defaultValues,
         # format command line input similarly to the stuff from the config file
         #print (requestedNames)
         finalFromCommandLine = parse_varnames(possibleNames, requestedNames,
-                                              None, defaultValues[FILL_VALUE_KEY])
+                                              None, defaultValues[FILL_VALUE_KEY],
+                                              warnIfRequestedVarsUnavailable,)
         for name, _, missing in finalFromCommandLine :
             ## we'll use the variable's name as the display name for the time being
             finalNames[name] = {}
@@ -415,11 +467,13 @@ def load_config_or_options(aPath, bPath, optionsSet, requestedVars = [ ]) :
             #glanceRunConfig = imp.load_module(fileBaseName, open(requestedConfigFile, 'U'),
             #                                  requestedConfigFile, ('.py' , 'U', 1))
 
-            # this is an exception, since it is not advertised to the user we don't expect it to be in the file
-            # (at least not at the moment, it could be added later and if they did happen to put it in the
-            # config file, it would override this line)
+            # this is an exception, since these are not advertised to the user as being available in the config
+            # we don't expect them to be in the file (at least not at the moment, these could be added to the docs
+            # later and if the user did happen to put any of them in the config file, it would override the command
+            # line derived value that we set here)
             runInfo[DO_MAKE_REPORT_KEY]         = not optionsSet[OPTIONS_NO_REPORT_KEY]      if OPTIONS_NO_REPORT_KEY      in optionsSet else False
             runInfo[USE_NO_LON_OR_LAT_VARS_KEY] =     optionsSet[USE_NO_LON_OR_LAT_VARS_KEY] if USE_NO_LON_OR_LAT_VARS_KEY in optionsSet else False
+            runInfo[OPTIONS_WARN_MISSING_KEY]   =     optionsSet[OPTIONS_WARN_MISSING_KEY]   if OPTIONS_WARN_MISSING_KEY   in optionsSet else False
             
             # get everything from the config file
             runInfo.update(glanceRunConfig.settings)
@@ -440,10 +494,11 @@ def load_config_or_options(aPath, bPath, optionsSet, requestedVars = [ ]) :
         LOG.info ('Using Command Line Settings')
         
         # so get everything from the options directly
-        runInfo[DO_MAKE_REPORT_KEY] = not optionsSet[OPTIONS_NO_REPORT_KEY]
-        runInfo[DO_MAKE_IMAGES_KEY] = not optionsSet[OPTIONS_NO_IMAGES_KEY]
+        runInfo[DO_MAKE_REPORT_KEY]         = not optionsSet[OPTIONS_NO_REPORT_KEY]
+        runInfo[DO_MAKE_IMAGES_KEY]         = not optionsSet[OPTIONS_NO_IMAGES_KEY]
         runInfo[DO_IMAGES_ONLY_ON_FAIL_KEY] = optionsSet[OPTIONS_IMAGES_ON_FAIL_KEY]
-        runInfo[DO_MAKE_FORKS_KEY]  =     optionsSet[DO_MAKE_FORKS_KEY]
+        runInfo[DO_MAKE_FORKS_KEY]          = optionsSet[DO_MAKE_FORKS_KEY]
+        runInfo[OPTIONS_WARN_MISSING_KEY]   = optionsSet[OPTIONS_WARN_MISSING_KEY]
         
         # only record these if we are using lon/lat
         runInfo[USE_NO_LON_OR_LAT_VARS_KEY] = optionsSet[USE_NO_LON_OR_LAT_VARS_KEY]
@@ -536,6 +591,9 @@ def parse_arguments (version_string, commands_list, commands_help_text, ) :
                         help="enable logging of debug output (warning: this will generate far more messages)")
     parser.add_argument('-n', '--version', version=version_string, action="version",
                         help="print the Glance version")
+    parser.add_argument('--warnmissingvars', dest=OPTIONS_WARN_MISSING_KEY,
+                        action="store_true", default=False,
+                        help="emit warnings if variables you request by name are missing from your input files")
     
     # data options for setting variable defaults
     parser.add_argument('-e', '--epsilon', dest=EPSILON_KEY, type=float, default=0.0,
@@ -621,7 +679,10 @@ def convert_options_to_dict (options) :
     """
     
     tempOptions = { }
-    
+
+    # warning defaults
+    tempOptions[OPTIONS_WARN_MISSING_KEY]   = options.warnmissing
+
     # variable defaults
     tempOptions[EPSILON_KEY]                = options.epsilon
     tempOptions[OPTIONS_FILL_VALUE_KEY]     = options.missing
diff --git a/pyglance/glance/constants.py b/pyglance/glance/constants.py
index 57285bb..a58357d 100644
--- a/pyglance/glance/constants.py
+++ b/pyglance/glance/constants.py
@@ -226,6 +226,7 @@ OPTIONS_LAT_VAR_NAME_KEY   = 'latitudeVar'
 OPTIONS_LON_VAR_NAME_KEY   = 'longitudeVar'
 OPTIONS_LONLAT_EPSILON_KEY = 'lonlatepsilon'
 OPTIONS_RE_TO_STRIP_KEY    = 'regularExpressionsToStrip'
+OPTIONS_WARN_MISSING_KEY   = 'warnmissing'
 
 # values used by the reports
 
-- 
GitLab