diff --git a/pyglance/glance/compare.py b/pyglance/glance/compare.py index f12ef5621d97139c804dbeb79649ec0092af4f34..70aa0c6b0ab7f3243edc039318a4284f3ec40886 100644 --- a/pyglance/glance/compare.py +++ b/pyglance/glance/compare.py @@ -11,7 +11,7 @@ Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved. #from pprint import pprint, pformat -import os, sys, logging, datetime, glob +import os, sys, logging, datetime, glob, re from numpy import * import numpy from urllib.parse import quote @@ -64,11 +64,14 @@ def _get_possible_files_from_dir (dir_path) : return found_files -def _match_files_from_dirs (a_path, b_path, ) : +def _match_files_from_dirs (a_path, b_path, strip_expressions=None, ) : """given two paths to directories, try to match up the files we can analyze in them """ + if strip_expressions is None : + strip_expressions = [ ] + # find all the files in the a path we might be able to open found_a_files = _get_possible_files_from_dir(a_path) @@ -76,6 +79,8 @@ def _match_files_from_dirs (a_path, b_path, ) : for filepath in found_a_files : LOG.debug(filepath) + """ + # TODO, when we get to python 3.9, we can use str.removeprefix but until then def _remove_prefix(text, prefix): if text.startswith(prefix): @@ -89,6 +94,29 @@ def _match_files_from_dirs (a_path, b_path, ) : b_filepath = os.path.join(b_path, inner_path) if os.path.exists(b_filepath) : file_pairs.add((a_filepath, b_filepath,)) + """ + + # find all the files in the b path we might be able to open + found_b_files = _get_possible_files_from_dir(b_path) + + LOG.debug("Found " + str(len(found_b_files)) + " possible file(s) in the B directory: ") + for filepath in found_a_files: + LOG.debug(filepath) + + def strip_expressions_from_base (file_path, expressions,) : + clean_name = os.path.basename(file_path) + for expr in expressions : + clean_name = re.sub(expr, '', clean_name) + return clean_name + + # try to pair up our files if possible + file_pairs = set() + for a_filepath in found_a_files : + clean_a = strip_expressions_from_base(a_filepath, strip_expressions,) + for b_filepath in found_b_files : + clean_b = strip_expressions_from_base(b_filepath, strip_expressions,) + if clean_a == clean_b : + file_pairs.add((a_filepath, b_filepath,)) return file_pairs @@ -1653,8 +1681,14 @@ def main(): Depending on how many input file paths you give this command it will either generate comparison or inspection reports. If you give it a directory instead of a file path it will search the underlying file structure to find any appropriately typed files and attempt to create reports for each of them in the - output directory. If you include two directory paths it will attempt to automatically match files in the - same part of the sub-hierarchy with the same names and create comparison reports for them. + output directory. If you include two directory paths it will attempt to automatically match files and + create comparison reports for them. + + Note: If you wish to automatically match files in two directories and they do not have the same names, + you can strip out sections that may not match using '--stripfromname'. If your names have nothing in + common Glance will not be able to figure out how to match them and you will have to run separate reports + for each file pair or provide input directories of files that have been soft linked or renamed to more + compatible file names. If latitude and longitude data are present in the file(s) and specified in the call options, the plots will be drawn on a map. The longitude and latitude variable names may be specified with @@ -1669,8 +1703,8 @@ def main(): Note: If you provided one or two directory paths and those paths included more than one set of files that Glance is able to generate reports for, those reports will be placed in the output path in - separate temporarily directories. These directories are only labeled numerically at the current - time. In future we hope to have a summary report available for the run. + separate temporarily directories. These directories are only labeled numerically and with the name of + the A file. In future we hope to have a summary report available for the run. If you would prefer to generate reports without images, use the --reportonly option. This option will generate the html report but omit the images. This may be significantly faster, depending on your system, @@ -1721,13 +1755,15 @@ def main(): if len(a_files_list) <= 0 : LOG.warn("Unable to find any files to analyze in the given directory path.") return 1 - temp_offset = 0 to_return = 0 + temp_offset = 0 # run each of the reports, putting them in inner temp dirs - for file_path in a_files_list : + for file_path in sorted(a_files_list) : ops_copy = tempOptions.copy() if len(a_files_list) > 1 : - ops_copy[OPTIONS_OUTPUT_PATH_KEY] = os.path.join(ops_copy[OPTIONS_OUTPUT_PATH_KEY], "tmp" + str(temp_offset)) + file_name_temp = os.path.splitext(os.path.basename(file_path))[0] + ops_copy[OPTIONS_OUTPUT_PATH_KEY] = os.path.join(ops_copy[OPTIONS_OUTPUT_PATH_KEY], + "report-" + str(temp_offset) + "-" + file_name_temp) temp_offset += 1 LOG.info("Generating inspection report for file: " + file_path) to_return += inspect_library_call(file_path, variables, ops_copy, ) @@ -1747,7 +1783,7 @@ def main(): b_file_path = files[1] # check to see if the paths are dirs if os.path.isdir(a_file_path) and os.path.isdir(b_file_path) : - file_pairs = _match_files_from_dirs(a_file_path, b_file_path) + file_pairs = _match_files_from_dirs(a_file_path, b_file_path, tempOptions[OPTIONS_RE_TO_STRIP_KEY],) # if we didn't find anything, warn the user and stop if len(file_pairs) <= 0 : LOG.warn("Unable to match any files between the given directories. " @@ -1756,10 +1792,12 @@ def main(): temp_offset = 0 to_return = 0 # run each of the reports, putting them in inner temp dirs - for single_a_file, single_b_file in file_pairs : + for single_a_file, single_b_file in sorted(file_pairs) : ops_copy = tempOptions.copy() if len(file_pairs) > 1 : - ops_copy[OPTIONS_OUTPUT_PATH_KEY] = os.path.join(ops_copy[OPTIONS_OUTPUT_PATH_KEY], "tmp" + str(temp_offset)) + file_name_temp = os.path.splitext(os.path.basename(single_a_file))[0] + ops_copy[OPTIONS_OUTPUT_PATH_KEY] = os.path.join(ops_copy[OPTIONS_OUTPUT_PATH_KEY], + "report-" + str(temp_offset) + "-" + file_name_temp) temp_offset += 1 LOG.info("Generating comparison report for files: " + single_a_file + " " + single_b_file) to_return += reportGen_library_call(single_a_file, single_b_file, variables, ops_copy, ) diff --git a/pyglance/glance/config_organizer.py b/pyglance/glance/config_organizer.py index 2667ff188437bbe1cdab1779e6642e4803d2a4df..bc743909fa5423166550d4c9ff6986755b8d15b0 100644 --- a/pyglance/glance/config_organizer.py +++ b/pyglance/glance/config_organizer.py @@ -495,59 +495,65 @@ def parse_arguments () : # option to run a test parser.add_argument('-t', '--test', dest="self_test", - action="store_true", default=False, help="run internal unit tests") + action="store_true", default=False, help="run internal unit tests") # message related options parser.add_argument('-q', '--quiet', dest="quiet", - action="store_true", default=False, help="only log error output") + action="store_true", default=False, help="only log error output") parser.add_argument('-v', '--verbose', dest="verbose", - action="store_true", default=False, help="enable logging of more detailed informational output") + action="store_true", default=False, help="enable logging of more detailed informational output") parser.add_argument('-w', '--debug', dest="debug", - action="store_true", default=False, help="enable logging of debug output (warning: this will generate far more messages)") + action="store_true", default=False, + help="enable logging of debug output (warning: this will generate far more messages)") parser.add_argument('-n', '--version', dest='version', - action="store_true", default=False, help="print the Glance version") + action="store_true", default=False, help="print the Glance version") # data options for setting variable defaults parser.add_argument('-e', '--epsilon', dest=EPSILON_KEY, type=float, default=0.0, - help="set default epsilon value for comparison threshold (default: %(default)s)") + help="set default epsilon value for comparison threshold (default: %(default)s)") parser.add_argument('-m', '--missing', dest=OPTIONS_FILL_VALUE_KEY, type=float, default=None, - help="set default missing-data value (default: %(default)s)") + help="set default missing-data value (default: %(default)s)") # longitude and latitude related options parser.add_argument('-o', '--longitude', dest=OPTIONS_LON_VAR_NAME_KEY, type=str, - help="set name of longitude variable") + help="set name of longitude variable") parser.add_argument('-a', '--latitude', dest=OPTIONS_LAT_VAR_NAME_KEY, type=str, - help="set name of latitude variable") + help="set name of latitude variable") parser.add_argument('-l', '--llepsilon', dest=OPTIONS_LONLAT_EPSILON_KEY, type=float, default=0.0, - help="set default epsilon for longitude and latitude comparsion (default: %(default)s)") + help="set default epsilon for longitude and latitude comparsion (default: %(default)s)") parser.add_argument('-d', '--nolonlat', dest=USE_NO_LON_OR_LAT_VARS_KEY, - action="store_true", default=False, help="do not try to find or analyze logitude and latitude") + action="store_true", default=False, help="do not try to find or analyze logitude and latitude") # note: this one has no help message because it's a pure synonym for --nolonlat parser.add_argument( '--nolatlon', dest=USE_NO_LON_OR_LAT_VARS_KEY, - action="store_true", default=False, help=argparse.SUPPRESS) + action="store_true", default=False, help=argparse.SUPPRESS) # output generation related options parser.add_argument('-p', '--outputpath', dest=OPTIONS_OUTPUT_PATH_KEY, type=str, default='./', - help="set path to the output directory (default: %(default)s)") + help="set path to the output directory (default: %(default)s)") parser.add_argument('-i', '--imagesonly', dest=OPTIONS_NO_REPORT_KEY, - action="store_true", default=False, - help="generate only the images (no html report)") + action="store_true", default=False, + help="generate only the images (no html report)") parser.add_argument('-r', '--reportonly', dest=OPTIONS_NO_IMAGES_KEY, - action="store_true", default=False, - help="generate only the html report (no images)") + action="store_true", default=False, + help="generate only the html report (no images)") parser.add_argument('-c', '--configfile', dest=OPTIONS_CONFIG_FILE_KEY, type=str, default=None, - help="set a configuration file to load (default: %(default)s)") - + help="set a configuration file to load (default: %(default)s)") + parser.add_argument('--stripfromname', default=[], dest=OPTIONS_RE_TO_STRIP_KEY, action='append', + help='regular expression to remove from all file names when automatically matching files for comparison') + # should pass/fail be tested? parser.add_argument('-x', '--doPassFail', dest=DO_TEST_PASSFAIL_KEY, - action="store_true", default=False, help="test for pass/fail while comparing data (only affects analysis where data is compared)") + action="store_true", default=False, + help="test for pass/fail while comparing data (only affects analysis where data is compared)") # whether or not to do multiprocessing parser.add_argument('-f', '--fork', dest=DO_MAKE_FORKS_KEY, - action="store_true", default=False, help="start multiple processes to create images in parallel") + action="store_true", default=False, + help="start multiple processes to create images in parallel") parser.add_argument('--parsable', dest=PARSABLE_OUTPUT_KEY, - action="store_true", default=False, help="format output to be programmatically parsed. (only affects 'info')") + action="store_true", default=False, + help="format output to be programmatically parsed. (only affects 'info')") parser.add_argument('misc', metavar='COMMAND [FILE [VARIABLE]]+', nargs='*') @@ -597,6 +603,7 @@ def convert_options_to_dict (options) : tempOptions[OPTIONS_CONFIG_FILE_KEY] = clean_path(options.configFile) tempOptions[OPTIONS_NO_REPORT_KEY] = options.imagesOnly tempOptions[OPTIONS_NO_IMAGES_KEY] = options.htmlOnly + tempOptions[OPTIONS_RE_TO_STRIP_KEY] = options.regularExpressionsToStrip # whether or not to do pass fail testing tempOptions[DO_TEST_PASSFAIL_KEY] = options.usePassFail diff --git a/pyglance/glance/constants.py b/pyglance/glance/constants.py index 7f2419e2d19149696ac85d56047e3135ba0ca095..ba272c681858921bec07bfd8452ea05f6329143d 100644 --- a/pyglance/glance/constants.py +++ b/pyglance/glance/constants.py @@ -224,6 +224,7 @@ OPTIONS_NO_IMAGES_KEY = 'htmlOnly' OPTIONS_LAT_VAR_NAME_KEY = 'latitudeVar' OPTIONS_LON_VAR_NAME_KEY = 'longitudeVar' OPTIONS_LONLAT_EPSILON_KEY = 'lonlatepsilon' +OPTIONS_RE_TO_STRIP_KEY = 'regularExpressionsToStrip' # values used by the reports diff --git a/pyglance/glance/report.py b/pyglance/glance/report.py index 9b6a3b1de69260695123cd63b2d617fa16b4b1a0..1fad62f23b7ef71e0b2ca9282c0391155c72d229 100644 --- a/pyglance/glance/report.py +++ b/pyglance/glance/report.py @@ -38,6 +38,14 @@ INT_TYPES = [ INT_FMT = '%d' for t in INT_TYPES: formattingSettings[t] = INT_FMT +# our report template names +MAIN_REPORT_TEMPLATE = 'mainreport.txt' +INSPECT_MAIN_REPORT_TEMPLATE = 'inspectmainreport.txt' +VAR_REPORT_TEMPLATE = 'variablereport.txt' +INSPECT_VAR_REPORT_TEMPLATE = 'inspectvariablereport.txt' +DOC_REPORT_TEMPLATE = 'doc.txt' + + # make and save an html page using a mako template, put all the data you need # in the template into the kwargs @@ -181,7 +189,7 @@ def generate_and_save_summary_report(files, ATTRS_INFO_DICT_KEY: globalAttrs, } - _make_and_save_page((outputPath + "/" + reportFileName), 'mainreport.txt', **kwargs) + _make_and_save_page((outputPath + "/" + reportFileName), MAIN_REPORT_TEMPLATE, **kwargs) # copy the original configuration file, TODO should I move this to a list input in the parameters? if (CONFIG_FILE_PATH_KEY in runInfo) : @@ -197,7 +205,7 @@ def generate_and_save_doc_page(definitions, outputPath) : kwargs = { DEFINITIONS_INFO_KEY: definitions } - _make_and_save_page(outputPath + "/doc.html", 'doc.txt', **kwargs) + _make_and_save_page(outputPath + "/doc.html", DOC_REPORT_TEMPLATE, **kwargs) return @@ -366,7 +374,7 @@ def generate_and_save_variable_report(files, ATTRS_INFO_DICT_KEY: variableAttrs, } - _make_and_save_page((outputPath + "/" + reportFileName), 'variablereport.txt', **kwargs) + _make_and_save_page((outputPath + "/" + reportFileName), VAR_REPORT_TEMPLATE, **kwargs) return @@ -455,7 +463,7 @@ def generate_and_save_inspect_variable_report(files, ATTRS_INFO_DICT_KEY: variableAttrs, } - _make_and_save_page((outputPath + "/" + reportFileName), 'inspectvariablereport.txt', **kwargs) + _make_and_save_page((outputPath + "/" + reportFileName), INSPECT_VAR_REPORT_TEMPLATE, **kwargs) return @@ -539,7 +547,7 @@ def generate_and_save_inspection_summary_report(files, ATTRS_INFO_DICT_KEY: globalAttrs, } - _make_and_save_page((outputPath + "/" + reportFileName), 'inspectmainreport.txt', **kwargs) + _make_and_save_page((outputPath + "/" + reportFileName), INSPECT_MAIN_REPORT_TEMPLATE, **kwargs) # copy the original configuration file, TODO should I move this to a list input in the parameters? if (CONFIG_FILE_PATH_KEY in runInfo) :