From 91c034a2bd52f734e9c5542e33d1cb130f30ba95 Mon Sep 17 00:00:00 2001
From: Eva Schiffer <evas@ssec.wisc.edu>
Date: Thu, 25 Feb 2021 12:04:18 -0600
Subject: [PATCH] bug fixes and clarifications for the new report command

---
 pyglance/glance/compare.py | 64 +++++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 25 deletions(-)

diff --git a/pyglance/glance/compare.py b/pyglance/glance/compare.py
index 4acf874..ae4e47f 100644
--- a/pyglance/glance/compare.py
+++ b/pyglance/glance/compare.py
@@ -72,7 +72,7 @@ def _match_files_from_dirs (a_path, b_path, ) :
     # find all the files in the a path we might be able to open
     found_a_files = _get_possible_files_from_dir(a_path)
 
-    LOG.debug("Found " + str(len(found_a_files)) + " possible files in the A directory: ")
+    LOG.debug("Found " + str(len(found_a_files)) + " possible file(s) in the A directory: ")
     for filepath in found_a_files :
         LOG.debug(filepath)
 
@@ -85,17 +85,11 @@ def _match_files_from_dirs (a_path, b_path, ) :
     # test to see if there is a matching file in the b_path for each a_path file
     file_pairs = set()
     for a_filepath in found_a_files :
-        inner_path = _remove_prefix(a_filepath, a_path)
+        inner_path = _remove_prefix(a_filepath, a_path)[1:] # for some reason this leaves a prefix / on the inner_path, so we need to remove that
         b_filepath = os.path.join(b_path, inner_path)
         if os.path.exists(b_filepath) :
             file_pairs.add((a_filepath, b_filepath,))
 
-    # print out info on each resulting pair
-    #print ("Found " + str(len(file_pairs)) + " file pair(s): ")
-    #for a, b in file_pairs :
-    #    print ("A: " + a)
-    #    print ("B: " + b)
-
     return file_pairs
 
 # TODO, I'd like to move this into a different file at some point
@@ -742,7 +736,7 @@ def reportGen_library_call (a_path, b_path, var_list=[ ],
         if do_pass_fail :
             return 0 # nothing went wrong, we just had nothing to do!
         else :
-            return
+            return 0
     
     # hang onto info to identify who/what/when/where/etc. the report is being run by/for 
     runInfo[MACHINE_INFO_KEY], runInfo[USER_INFO_KEY], runInfo[GLANCE_VERSION_INFO_KEY] = get_run_identification_info()
@@ -1076,6 +1070,8 @@ def reportGen_library_call (a_path, b_path, var_list=[ ],
     if do_pass_fail :
         LOG.debug("Pass/Fail return code: " + str(returnCode))
         return returnCode
+    else :
+        return 0
 
 def stats_library_call(afn, bfn, var_list=[ ],
                        options_set={ },
@@ -1329,7 +1325,9 @@ def main():
             return status_result
 
     def plotDiffs(*args) :
-        """create a set of images comparing two files
+        """create a set of images comparing two files (Depreciated, please use report in future.)
+        This command is DEPRECIATED. Please use the report command in future.
+
         Create and save images comparing variables in two files. Variables to be compared may be specified after
         the names of the two input files. If no variable names are given, this command will create plots for all
         variables that can be matched by name and shape between the two files.
@@ -1361,7 +1359,9 @@ def main():
         return
 
     def reportGen(*args) :
-        """create a report comparing two files
+        """create a report comparing two files (Depreciated, please use report in future.)
+        This command is DEPRECIATED. Please use the report command in future.
+
         Generate an html report comparing the variables in two files. Variables to be compared may be specified after
         the names of the two input files. If no variable names are given, this command will create reports for all
         variables that can be matched by name and shape between the two files.
@@ -1448,7 +1448,8 @@ def main():
             fileForOutput.close()
     
     def inspectReport(*args) :
-        """create a report to inspect the contents of one file
+        """create a report to inspect the contents of one file (Depreciated, please use report in future.)
+        This command is DEPRECIATED. Please use the report command in future.
 
         Generate an html report examining the variables in a file. Variables to be analyzed may be specified after
         the name of the input file. If no variable names are given, this command will create reports for all
@@ -1591,11 +1592,10 @@ def main():
         each variable analyzed. If no output path is provided, output will be saved in the current directory.
         Created images will be embedded in the report or visible as separate .png files.
 
-        Note: if you provided one or two directory paths and those paths included more than one set of files
+        Note: If you provided one or two directory paths and those paths included more than one set of files
         that Glance is able to generate reports for, those reports will be placed in the output path in
         separate temporarily directories. These directories are only labeled numerically at the current
-        time. In future we hope to have a summary report available for the run, but this does not currently
-        exist.
+        time. In future we hope to have a summary report available for the run.
 
         If you would prefer to generate reports without images, use the --reportonly option. This option will
         generate the html report but omit the images. This may be significantly faster, depending on your system,
@@ -1608,27 +1608,28 @@ def main():
         Examples:
 
          glance report A.hdf variable_name_1:: variable_name_2 variable_name_3::missing3 variable_name_4::missing4
-         glance report A.nc B.hdf variable_name
+         glance report A.nc B.hdf variable_name::missing_value
          glance report C.nc --outputpath=/path/where/output/will/be/placed/
          glance report --longitude=lon_variable_name --latitude=lat_variable_name D.h5 variable_name
          glance report --nolonlat ./A_dir/ ./B_dir/
-         glance report --reportonly ./G_dir/
+         glance report --reportonly ./A_dir/
         """
 
         # examine the args and see how many valid file paths we have
         files = [ ]
         variables = [ ]
+        LOG.debug("Examining arguments to see how many possible file paths we have.")
         for argument_val in args :
             if os.path.exists(argument_val) :
-                LOG.debug("argument value is a file path: " + argument_val)
+                LOG.debug("Argument value is an existing file path: " + argument_val)
                 files.append(clean_path(argument_val))
             else :
-                LOG.debug("argument value is not an existing file path, it will be treated as a variable name: " + argument_val)
+                LOG.debug("Argument value is not an existing file path, it will be treated as a variable name: " + argument_val)
                 variables.append(argument_val)
 
         # if we have no file paths, just stop now
         if len(files) < 1 :
-            LOG.warn("Expected at least one file path to input data. " +
+            LOG.warn("Expected at least one valid file path as input data. " +
                      "Unable to generate a report without a file path.")
             return 1
 
@@ -1641,6 +1642,10 @@ def main():
             # check to see if the file is a dir
             if os.path.isdir(a_file_path) :
                 a_files_list = _get_possible_files_from_dir(a_file_path)
+                # check if we found anything
+                if len(a_files_list) <= 0 :
+                    LOG.warn("Unable to find any files to analyze in the given directory path.")
+                    return 1
                 temp_offset = 0
                 to_return = 0
                 # run each of the reports, putting them in inner temp dirs
@@ -1648,16 +1653,18 @@ def main():
                     ops_copy = tempOptions.copy()
                     if len(a_files_list) > 1 :
                         ops_copy[OPTIONS_OUTPUT_PATH_KEY] = os.path.join(ops_copy[OPTIONS_OUTPUT_PATH_KEY], "tmp" + str(temp_offset))
-                    temp_offset += 1
+                        temp_offset += 1
+                    LOG.info("Generating inspection report for file: " + file_path)
                     to_return += inspect_library_call(file_path, variables, ops_copy, )
                 return to_return
             else : # in this case we just have a regular file, so run one inspect report
+                LOG.info("Generating inspection report for file: " + a_file_path)
                 return inspect_library_call(a_file_path, variables, tempOptions, )
 
-        # this doesn't seem super likely, but just in case, let's at least give a warning so the user has some idea why we ignored some paths
+        # just in case, let's at least give a warning so the user has some idea why we ignored some paths
         if len(files) > 2:
             LOG.warn("More than two file paths were found in your command line input. "
-                     "Only the first two will be used. The rest will be discarded.")
+                     "Only the first two will be used. The rest will be ignored.")
 
         # if we have two file paths, do either one or many comparison reports
         if len(files) >= 2 :
@@ -1666,6 +1673,11 @@ def main():
             # check to see if the paths are dirs
             if os.path.isdir(a_file_path) and os.path.isdir(b_file_path) :
                 file_pairs = _match_files_from_dirs(a_file_path, b_file_path)
+                # if we didn't find anything, warn the user and stop
+                if len(file_pairs) <= 0 :
+                    LOG.warn("Unable to match any files between the given directories. "
+                             "Please check that the files are named the same in both directories.")
+                    return 1
                 temp_offset = 0
                 to_return = 0
                 # run each of the reports, putting them in inner temp dirs
@@ -1673,15 +1685,17 @@ def main():
                     ops_copy = tempOptions.copy()
                     if len(file_pairs) > 1 :
                         ops_copy[OPTIONS_OUTPUT_PATH_KEY] = os.path.join(ops_copy[OPTIONS_OUTPUT_PATH_KEY], "tmp" + str(temp_offset))
-                    temp_offset += 1
+                        temp_offset += 1
+                    LOG.info("Generating comparison report for files: " + single_a_file + "   " + single_b_file)
                     to_return += reportGen_library_call(single_a_file, single_b_file, variables, ops_copy, )
                 return to_return
             # if both the paths are regular files, just run one report
             elif os.path.isfile(a_file_path) and os.path.isfile(b_file_path) :
+                LOG.info("Generating comparison report for files: " + a_file_path + "   " + b_file_path)
                 return reportGen_library_call(a_file_path, b_file_path, variables, tempOptions, )
             else :
                 LOG.error("You have provided one directory path and one file path. "
-                          "Please input two directories or two files, not a mixture.")
+                          "Please input paths to two directories or two files, not a mixture.")
                 return 1
 
         # if we got to here, something has gone terribly wrong
-- 
GitLab