compare.py

                        plotFunctionGenerationObjects.append(plotcreate.BasicComparisonPlotsFunctionFactory())
                    
                    # if the bin and tuple are defined, try to analyze the data as complex
                    # multidimentional information requiring careful sampling
                    if (BIN_INDEX_KEY in varRunInfo) and (TUPLE_INDEX_KEY in varRunInfo) :
                        plotFunctionGenerationObjects.append(plotcreate.BinTupleAnalysisFunctionFactory())
                        
                    else : # if it's not bin/tuple, there are lots of other posibilities

                        # if it's vector data with longitude and latitude, quiver plot it on the Earth
                        if isVectorData and (not do_not_test_with_lon_lat) :
                            plotFunctionGenerationObjects.append(plotcreate.MappedQuiverPlotFunctionFactory())
                        
                        # if the data is one dimensional we can plot it as lines
                        elif   (len(aData.shape) == 1) :
                            plotFunctionGenerationObjects.append(plotcreate.LinePlotsFunctionFactory())
                        
                        # if the data is 2D we have some options based on the type of data
                        elif (len(aData.shape) == 2) :
                            
                            # if the data is not mapped to a longitude and latitude, just show it as an image
                            if (do_not_test_with_lon_lat) :
                                plotFunctionGenerationObjects.append(plotcreate.IMShowPlotFunctionFactory())
                            
                            # if it's 2D and mapped to the Earth, contour plot it on the earth
                            else :
                                plotFunctionGenerationObjects.append(plotcreate.MappedContourPlotFunctionFactory())

                    # plot our lon/lat related info
                    image_names[ORIGINAL_IMAGES_KEY], image_names[COMPARED_IMAGES_KEY] = \
                        plot.plot_and_save_comparison_figures \
                                (aData, bData,
                                 plotFunctionGenerationObjects,
                                 varRunInfo[VARIABLE_DIRECTORY_KEY],
                                 displayName,
                                 varRunInfo[EPSILON_KEY],
                                 varRunInfo[FILL_VALUE_KEY],
                                 missingValueAltInB = varRunInfo[FILL_VALUE_ALT_IN_B_KEY] if FILL_VALUE_ALT_IN_B_KEY in varRunInfo else None,
                                 lonLatDataDict=lon_lat_data,
                                 dataRanges     = varRunInfo[DISPLAY_RANGES_KEY]       if DISPLAY_RANGES_KEY       in varRunInfo else None,
                                 dataRangeNames = varRunInfo[DISPLAY_RANGE_NAMES_KEY]  if DISPLAY_RANGE_NAMES_KEY  in varRunInfo else None,
                                 dataColors     = varRunInfo[DISPLAY_RANGE_COLORS_KEY] if DISPLAY_RANGE_COLORS_KEY in varRunInfo else None,
                                 doFork=runInfo[DO_MAKE_FORKS_KEY],
                                 shouldClearMemoryWithThreads=runInfo[DO_CLEAR_MEM_THREADED_KEY],
                                 shouldUseSharedRangeForOriginal=runInfo[USE_SHARED_ORIG_RANGE_KEY],
                                 doPlotSettingsDict = varRunInfo,
                                 aUData=aUData, aVData=aVData,
                                 bUData=bUData, bVData=bVData,
                                 binIndex=      varRunInfo[BIN_INDEX_KEY]       if BIN_INDEX_KEY       in varRunInfo else None,
                                 tupleIndex=    varRunInfo[TUPLE_INDEX_KEY]     if TUPLE_INDEX_KEY     in varRunInfo else None,
                                 binName=       varRunInfo[BIN_NAME_KEY]        if BIN_NAME_KEY        in varRunInfo else 'bin',
                                 tupleName=     varRunInfo[TUPLE_NAME_KEY]      if TUPLE_NAME_KEY      in varRunInfo else 'tuple',
                                 epsilonPercent=varRunInfo[EPSILON_PERCENT_KEY] if EPSILON_PERCENT_KEY in varRunInfo else None,
                                 fullDPI=       runInfo[DETAIL_DPI_KEY],
                                 thumbDPI=      runInfo[THUMBNAIL_DPI_KEY],
                                 units_a=       varRunInfo[VAR_UNITS_A_KEY]     if VAR_UNITS_A_KEY     in varRunInfo else None,
                                 units_b=       varRunInfo[VAR_UNITS_B_KEY]     if VAR_UNITS_B_KEY     in varRunInfo else None,
                                )#histRange=     varRunInfo[HISTOGRAM_RANGE_KEY] if HISTOGRAM_RANGE_KEY in varRunInfo else None)
                    
                    LOG.info("\tfinished creating figures for: " + explanationName)
                
                # create the report page for this variable
                if (runInfo[DO_MAKE_REPORT_KEY]) :
                    
                    # hang on to our good % and other info to describe our comparison
                    epsilonPassedPercent = (1.0 -  epsilon_failed_fraction) * 100.0
                    finitePassedPercent  = (1.0 - non_finite_fail_fraction) * 100.0 
                    variableComparisons[displayName] = {
                                                        PASSED_EPSILON_PERCENT_KEY: epsilonPassedPercent,
                                                        FINITE_SIMILAR_PERCENT_KEY: finitePassedPercent,
                                                        R_SQUARED_COEFF_VALUE_KEY:  r_squared_value,
                                                        VARIABLE_RUN_INFO_KEY:      varRunInfo
                                                        }
                    variableStatsCollection[displayName] = variable_stats.dictionary_form() # TODO, also image names?
                    
                    LOG.info ('\tgenerating report for: ' + explanationName)
                    reportModule.generate_and_save_variable_report(files,
                                                             varRunInfo, runInfo,
                                                             variable_stats.dictionary_form(),
                                                             spatialInfo,
                                                             image_names,
                                                             varRunInfo[VARIABLE_DIRECTORY_KEY], "index.html",
                                                             variableAttrs=attributeInfo,)
            
            # if we can't compare the variable, we should tell the user 
            else :
                message = (explanationName + ' ' + 
                         'could not be compared. This may be because the data for this variable does not match in shape ' +
                         'between the two files (file A data shape: ' + str(aData.shape) + '; file B data shape: '
                         + str(bData.shape) + ')')
                if do_not_test_with_lon_lat :
                    message = message + '.'
                else :
                    message = (message + ' or the data may not match the shape of the selected '
                         + 'longitude ' + str(good_shape_from_lon_lat) + ' and '
                         + 'latitude '  + str(good_shape_from_lon_lat) + ' variables.')
                LOG.warn(message)
        except ValueErrorStringToFloat as e:
            LOG.warn("Unable to compare "+displayName+": "+str(e))

    # the end of the loop to examine all the variables
    
    # generate our general report pages once we've analyzed all the variables
    if runInfo[DO_MAKE_REPORT_KEY] :
        
        # get the current time
        runInfo[TIME_INFO_KEY] = datetime.datetime.ctime(datetime.datetime.now())
        
        # make the main summary report
        LOG.info ('generating summary report')
        reportModule.generate_and_save_summary_report(files,
                                                pathsTemp[OUT_FILE_KEY], 'index.html',
                                                runInfo,
                                                variableComparisons, 
                                                spatialInfo,
                                                nameStats,
                                                globalAttrs=globalAttrInfo,)

        # TODO save some info we need for the concise summary report
        concise_info =  {
                            "files": files,
                            "runInfo": runInfo,
                            "var_comparisons": variableComparisons,
                            "var_stats": variableStatsCollection,
                        }

        # make the glossary
        LOG.info ('generating glossary')
        reportModule.generate_and_save_doc_page(statistics.StatisticalAnalysis.doc_strings(), pathsTemp[OUT_FILE_KEY])
    
    returnCode = 0 if didPassAll else 2 # return 2 only if some of the variables failed
    
    # if we are reporting the pass / fail, return an appropriate status code
    to_return = returnCode if do_pass_fail else 0
    if do_pass_fail :
        LOG.debug("Pass/Fail return code: " + str(returnCode))
    if do_return_summary_info and runInfo[DO_MAKE_REPORT_KEY] :
        return to_return, concise_info
    else :
        return to_return

def stats_library_call(afn, bfn, var_list=None,
                       options_set=None,
                       do_document=False,
                       output_channel=sys.stdout): 
    """
    this method handles the actual work of the stats command line tool and
    can also be used as a library routine, simply pass in an output channel
    and/or use the returned dictionary of statistics for your own form of
    display.
    TODO, should this move to a different file?
    """

    # set some values for defaults
    var_list = [] if var_list is None else var_list
    options_set = {} if options_set is None else options_set

    # unpack some options
    epsilon_val  = options_set[EPSILON_KEY]
    missing_val  = options_set[OPTIONS_FILL_VALUE_KEY]
    do_pass_fail = options_set[DO_TEST_PASSFAIL_KEY]
    
    LOG.debug ("file a: " + afn)
    LOG.debug ("file b: " + bfn)
    
    # open the files
    filesInfo = open_and_process_files([afn, bfn])
    aFile = filesInfo[afn][FILE_OBJECT_KEY]
    bFile = filesInfo[bfn][FILE_OBJECT_KEY]
    
    # information for testing pass/fail if needed
    has_failed = False
    epsilon_fail_tolerance   = 0.0
    nonfinite_fail_tolerance = 0.0
    
    # figure out the variable names and their individual settings
    if len(var_list) <= 0 :
        var_list = ['.*']
    names = config_organizer.parse_varnames( filesInfo[COMMON_VAR_NAMES_KEY], var_list, epsilon_val, missing_val )
    LOG.debug(str(names))
    doc_each  = do_document and len(names)==1
    doc_atend = do_document and len(names)!=1

    for name, epsilon, missing in sorted(names, key=lambda X:X[0]):
        
        # make sure that it's possible to load this variable
        if not(aFile.is_loadable_type(name)) or not(bFile.is_loadable_type(name)) :
            LOG.warn(name + " is of a type that cannot be loaded using current file handling libraries included with Glance." +
                    " Skipping " + name + ".")
            continue

        try :
            aData = aFile[name]
            bData = bFile[name]
        except io.IONonnumericalTypeError as bad_data_error :
            LOG.error("Skipping variable %s because it is of a non-numerical type "
                      "(may indicate array of variable-length strings): %s" % (name, repr(bad_data_error)))
            continue

        if missing is None:
            amiss = aFile.missing_value(name)
            bmiss = bFile.missing_value(name)
        else:
            amiss,bmiss = missing,missing
        LOG.debug('comparing %s with epsilon %s and missing %s,%s' % (name,epsilon,amiss,bmiss))
        print('-'*32, file=output_channel,)
        print(name,   file=output_channel,)
        print('',     file=output_channel)
        variable_stats = statistics.StatisticalAnalysis.withSimpleData(aData, bData, amiss, bmiss, epsilon=epsilon)
        # if we're doing pass/fail testing, do that now
        if do_pass_fail :
            
            tempDefaults = config_organizer.get_simple_variable_defaults()
            didPass, _, _, _ = variable_stats.check_pass_or_fail(epsilon_failure_tolerance=epsilon_fail_tolerance,
                                                                 epsilon_failure_tolerance_default=tempDefaults[EPSILON_FAIL_TOLERANCE_KEY],
                                                                 non_finite_data_tolerance=nonfinite_fail_tolerance,
                                                                 non_finite_data_tolerance_default=tempDefaults[NONFINITE_TOLERANCE_KEY],
                                                                 total_data_failure_tolerance_default=tempDefaults[TOTAL_FAIL_TOLERANCE_KEY],
                                                                 min_acceptable_r_squared_default=tempDefaults[MIN_OK_R_SQUARED_COEFF_KEY],
                                                                )
            has_failed = has_failed or not(didPass)
        lal = list(variable_stats.dictionary_form().items())
        #lal = list(statistics.summarize(aData, bData, epsilon, (amiss,bmiss)).items()) 
        lal.sort()
        for dictionary_title, dict_data in lal:
            print('%s' % dictionary_title, file=output_channel,)
            #dict_data
            for each_stat in sorted(list(dict_data)):
                print('  %s: %s' % (each_stat, dict_data[each_stat]), file=output_channel,)
                if doc_each: print(('    ' + statistics.StatisticalAnalysis.doc_strings()[each_stat]), file=output_channel,)
            print('', file=output_channel, )
    if doc_atend:
        print(('\n\n' + statistics.get_comparison_doc_string()), file=output_channel,)
    
    # if we are doing pass/fail, we need to return a status code
    if do_pass_fail :
        status_code = 0
        if has_failed :
            status_code = 3
        LOG.debug("stats is returning status code: " + str(status_code))
        return status_code
    # note: if we aren't doing pass/fail, stats will not return anything

def inspect_stats_library_call (afn, var_list=None, options_set=None, do_document=False, output_channel=sys.stdout):
    """
    this method handles the actual work of the inspect_stats command line tool and
    can also be used as a library routine, simply pass in an output channel
    and/or use the returned dictionary of statistics for your own form of
    display.
    TODO, should this move to a different file?
    """

    # set some values for defaults
    var_list = [ ] if var_list is None else var_list
    options_set = { } if options_set is None else options_set

    # unpack some options
    missing_val  = options_set[OPTIONS_FILL_VALUE_KEY]
    
    LOG.debug ("file a: " + afn)
    
    # open the file
    filesInfo = open_and_process_files([afn])
    aFile = filesInfo[afn][FILE_OBJECT_KEY]
    
    # figure out the variable names and their individual settings
    if len(var_list) <= 0 :
        var_list = ['.*']
    names = config_organizer.parse_varnames( filesInfo[COMMON_VAR_NAMES_KEY], var_list, epsilon=None, missing=missing_val )
    LOG.debug(str(names))
    doc_each  = do_document and len(names)==1
    doc_atend = do_document and len(names)!=1

    for name, epsilon, missing in sorted(names, key=lambda X:X[0]):

        # make sure that it's possible to load this variable
        if not(aFile.is_loadable_type(name)) :
            LOG.warn(name + " is of a type that cannot be loaded using current file handling libraries included with Glance." +
                    " Skipping " + name + ".")
            continue
        
        aData = aFile[name]
        
        amiss = missing
        if missing is None:
            amiss = aFile.missing_value(name)
        LOG.debug('analyzing %s with missing data value %s' % (name,amiss))
        print('-' * 32,     file=output_channel,)
        print(name,         file=output_channel,)
        print('',           file=output_channel,)
        variable_stats = statistics.StatisticalInspectionAnalysis.withSimpleData(aData, amiss)
        lal = list(variable_stats.dictionary_form().items())
        lal.sort()
        for dictionary_title, dict_data in lal:
            print('%s' % dictionary_title, file=output_channel,)
            #dict_data
            for each_stat in sorted(list(dict_data)):
                print('  %s: %s' % (each_stat, dict_data[each_stat]), file=output_channel, )
                if doc_each: print(('    ' + statistics.StatisticalInspectionAnalysis.doc_strings()[each_stat]), file=output_channel,)
            print('', file=output_channel,)
    if doc_atend:
        print(('\n\n' + statistics.get_inspection_doc_string()), file=output_channel,)

    return 0

def main():

    # horray, dummy options!
    options = None

    commands = {}
    prior = None
    prior = dict(locals())
    
    """
    The following functions represent available menu selections in glance.
    """
    
    def info(*args):
        """list information about a list of files
        Given a file (or list of files), print out the names of the available variables in the file(s).

        Examples:

         glance info A.hdf
         glance info B.h5 C.nc
        """
        problems = 0
        for fn in args:
            try :
                lal = list(io.open(fn)())
                lal.sort()
                if options.parsable_output:
                    print("".join([fn + "\t" + x + "\n" for x in lal]))
                else:
                    print(fn + ': ' + ('\n  ' + ' ' * len(fn)).join(lal))
            except KeyError :
                LOG.warn('Unable to open / process file selection: ' + fn)
                problems += 1
        if problems > 255:
            # exit code is 8-bits, limit ourselves.
            problems = 255
        return problems

    def stats (*args) :
        """do statistical analysis on your input

        The stats command will perform various types of statistical analysis depending on what input files
        you give it.

        If you give it two input files, it will summarize the statistical differences between the two files.
        If you give it one file it will print some informational statistics about that file.
        Please note that this command does not handle any sort of directory navigation, so you will need
        to give it simple paths to specific files you want to analyze.

        Optionally you may also provide variable names to be analyzed. If no variable names are
        given, this command will attempt to analyze all variables it can. When analyzing two files
        only variables with matching names and shapes will be examined.

        If you provide an output path, statistics will be saved to a stats.txt file in that directory.

        Run with -v to get more detailed information about the statistics being reported.

        Examples:

         glance stats A.hdf B.hdf
         glance stats --epsilon=0.00001 C.nc B.hdf baseline_cmask_seviri_cloud_mask:0.002:
         glance -w stats A.hdf D.h5 imager_prof_retr_abi_.*::-999 'nwp__.__index:0:'
         glance stats A.nc cloud_top_height
         glance stats C.hdf cloud_top_*::-999.0
        """

        # examine the args and see how many valid file paths we have
        files = []
        variables = []
        LOG.debug("Examining arguments to see how many possible file paths we have.")
        for argument_val in args:
            if os.path.exists(argument_val):
                # double check that this is not a directory
                if os.path.isdir(argument_val):
                    LOG.warn("Glance statistics cannot currently analyze directories of files. Please provide specific file paths.")
                    return 1
                LOG.debug("Argument value is an existing file path: " + argument_val)
                files.append(clean_path(argument_val))
            else:
                LOG.debug("Argument value is not an existing file path, it will be treated as a variable name: " + argument_val)
                variables.append(argument_val)

        # if we have no file paths, just stop now
        if len(files) < 1:
            LOG.warn("Expected at least one valid file path as input data. " +
                     "Unable to create statistics without a file path.")
            return 1

        # organize our command line options and get some other informational stuff
        tempOptions = config_organizer.convert_options_to_dict(options)
        do_doc = (options.verbose or options.debug)
        to_return = 0

        # if we were given an output path use that to create the stats
        toPrintTo = sys.stdout
        outpath = clean_path(options.outputpath)
        fileForOutput = None
        if outpath != clean_path('./') :
            # if needed, create the directory
            setup_dir_if_needed(outpath, "output")
            # open the file for writing, get rid of whatever's there
            fileForOutput = open(os.path.join(outpath, "stats.txt"), "w")
            toPrintTo = fileForOutput

        # if we have one file path, analyze that single file
        if len(files) <= 1:
            a_file_path = files[0]
            LOG.info("Generating statistics for file: " + a_file_path)
            to_return = inspect_stats_library_call(a_file_path,
                                                   var_list=variables,
                                                   options_set=tempOptions,
                                                   do_document=do_doc,
                                                   output_channel=toPrintTo)

        # just in case, let's at least give a warning so the user has some idea why we ignored some paths
        if len(files) > 2:
            LOG.warn("More than two file paths were found in your command line input. "
                     "Only the first two will be used. The rest will be ignored.")

        # if we have two file paths, create comparison statistics
        if len(files) >= 2:
            a_file_path = files[0]
            b_file_path = files[1]
            LOG.info("Generating statistical comparison for files: " + a_file_path + "   " + b_file_path)
            to_return = stats_library_call(a_file_path, b_file_path,
                                           var_list=variables,
                                           options_set=tempOptions,
                                           do_document=do_doc,
                                           output_channel=toPrintTo)

        # close our text output file if needed
        if fileForOutput is not None:
            fileForOutput.close()

        # return a return code if we have one, otherwise everything must be fine, right?
        if to_return is not None:
            return to_return
        return 0

    def plotDiffs(*args) :
        """create a set of images comparing two files (Deprecated, please use report in future.)
        This command is DEPRECATED. Please use the report command in future.

        Create and save images comparing variables in two files. Variables to be compared may be specified after
        the names of the two input files. If no variable names are given, this command will create plots for all
        variables that can be matched by name and shape between the two files.

        If latitude and longitude data are present in the file and specified in the call options, the variables
        will be plotted on a map. The longitude and latitude variable names may be specified with --longitude
        and --latitude command line options. If no longitude or latitude names are specified Glance will attempt
        to use pixel_longitude and pixel_latitude.

        If you would rather plot the data without a map, use the --nolonlat option.

        The created images will be saved at the provided output path in subdirectories named for each variable
        analyzed. If no output path is provided, output will be saved in the current directory.

        Examples:

         glance plotDiffs A.hdf B.hdf
         glance plotDiffs C.nc D.h5 variable_name_1:epsilon1: variable_name_2 variable_name_3:epsilon3:missing3
         glance --outputpath=/path/where/output/will/be/placed/ plotDiffs A.hdf B.hdf
         glance plotDiffs --longitude=lon_variable_name --latitude=lat_variable_name A.hdf B.hdf variable_name
        """

        # set the options so that a report will not be generated
        options.imagesOnly = True
        
        # make the images
        reportGen(*args)
        
        return

    def reportGen(*args) :
        """create a report comparing two files (Deprecated, please use report in future.)
        This command is DEPRECATED. Please use the report command in future.

        Generate an html report comparing the variables in two files. Variables to be compared may be specified after
        the names of the two input files. If no variable names are given, this command will create reports for all
        variables that can be matched by name and shape between the two files.

        If latitude and longitude data are present in the file and specified in the call options, the variable
        comparison plots will be drawn on a map. The longitude and latitude variable names may be specified with
        --longitude and --latitude command line options. If no longitude or latitude names are specified Glance
        will attempt to use pixel_longitude and pixel_latitude.

        If you would rather analyze the data without trying to place it on a map, use the --nolonlat option.

        The created reports and images will be saved at the provided output path in subdirectories named for
        each variable analyzed. If no output path is provided, output will be saved in the current directory.
        Created images will be embedded in the report or visible as separate .png files.

        If you would prefer to generate the report without images, use the --reportonly option. This option will
        generate the html report but omit the images. This may be significantly faster, depending on your system,
        but the differences between the files may be more difficult to interpret.

        Examples:

         glance reportGen A.hdf C.nc
         glance reportGen A.hdf B.hdf variable_name_1:epsilon1: variable_name_2 variable_name_3:epsilon3:missing3 variable_name_4::missing4
         glance --outputpath=/path/where/output/will/be/placed/ reportGen A.hdf D.h5
         glance reportGen --longitude=lon_variable_name --latitude=lat_variable_name D.h5 C.nc variable_name
         glance reportGen --imagesonly A.hdf B.hdf
        """
        
        tempOptions = config_organizer.convert_options_to_dict(options)

        if len(args) < 2 :
            LOG.warn("Expected two paths to files to compare. "
                     "Unable to generate a comparison report or comparison plots without two file paths.")
            return 1

        a_path = clean_path(args[0])
        b_path = clean_path(args[1])
        
        return reportGen_library_call(a_path, b_path, args[2:], tempOptions)
    
    def inspectStats(*args):
        """create statistics summary of one file (Deprecated, please use stats in future.)
        This command is DEPRECATED. Please use the stats command in future.

        Print statistical summaries of the variables in a file. If no variable names are given, this
        command will summarize all variables found in the file.

        Run with -v to get more detailed information about the statistics being reported.

        Examples:

         glance    inspectStats A.hdf
         glance    inspectStats A.hdf baseline_cmask_seviri_cloud_mask
         glance -w inspectStats B.nc imager_prof_retr_abi_total_precipitable_water_low::-999
        """

        if len(args) < 1:
            LOG.warn("Expected a path to a file to inspect. "
                     "Unable to generate inspection statistics without a file path.")
            return 1

        afn = args[0]
        do_doc = (options.verbose or options.debug)
        
        tempOptions = config_organizer.convert_options_to_dict(options)
        
        # TODO, clean up how the output is set up
        # if we were given an output path use that to create the stats
        toPrintTo = sys.stdout
        outpath = clean_path(options.outputpath)
        fileForOutput = None
        if outpath != clean_path('./') :
            
            # if needed, create the directory
            setup_dir_if_needed(outpath, "output")
            
            # open the file for writing, get rid of whatever's there
            fileForOutput = open(outpath + "/stats.txt", "w") # TODO, forming the path this way won't work on windows?
            toPrintTo     = fileForOutput
        
        inspect_stats_library_call(clean_path(afn), var_list=args[1:],
                                   options_set=tempOptions, do_document=do_doc,
                                   output_channel=toPrintTo)
        
        if fileForOutput is not None :
            fileForOutput.close()
    
    def inspectReport(*args) :
        """create a report to inspect the contents of one file (Deprecated, please use report in future.)
        This command is DEPRECATED. Please use the report command in future.

        Generate an html report examining the variables in a file. Variables to be analyzed may be specified after
        the name of the input file. If no variable names are given, this command will create reports for all
        variables in the file.

        If latitude and longitude data are present in the file and specified in the call options, the variable
        plots will be drawn on a map. The longitude and latitude variable names may be specified with
        --longitude and --latitude command line options. If no longitude or latitude names are specified Glance
        will attempt to use pixel_longitude and pixel_latitude.

        If you would rather analyze the data without trying to place it on a map, use the --nolonlat option.

        The created reports and images will be saved at the provided output path in subdirectories named for
        each variable analyzed. If no output path is provided, output will be saved in the current directory.
        Created images will be embedded in the report or visible as separate .png files.

        If you would prefer to generate the report without images, use the --reportonly option. This option will
        generate the html report but omit the images. This may be significantly faster, depending on your system,
        but the results may be more difficult to interpret.

        Examples:

         glance inspect_report A.hdf variable_name_1:: variable_name_2 variable_name_3::missing3 variable_name_4::missing4
         glance --outputpath=/path/where/output/will/be/placed/ inspect_report C.nc
         glance inspect_report --longitude=lon_variable_name --latitude=lat_variable_name D.h5 variable_name
         glance inspect_report --reportonly A.hdf
        """

        if len(args) < 1:
            LOG.warn("Expected a path to a files to inspect. "
                     "Unable to generate a comparison report without a file path.")
            return 1

        tempOptions = config_organizer.convert_options_to_dict(options)
        
        # args[0] is the path of the file to be analyzed, an other args should be variable names
        return inspect_library_call(clean_path(args[0]), args[1:], tempOptions)
    
    def colocateData(*args) :
        """colocate data from two files
        
        Colocates data in the two given input files and saves it to separate output files.

        Data will be colocated based on its corresponding longitude and latitude. Multiple matches may be
        made between a data point in file A and those in file B if they are within the longitude/latitude epsilon.
        Points from each file that could not be matched and the number of duplicate matches will also be
        recorded in the output file.
        
        The user may also use the notation variable_name::missing_value to specify the missing_value which indicates
        missing data. If no missing value is given, glance will attempt to load a missing value from the input file.
        If there is no missing value defined for that variable in the file, no missing value will be analyzed.
        Missing value data points will not be considered for colocation.
        
        Data which corresponds to longitude or latitude values which fall outside the earth (outside the normally
        accepted valid ranges) will also be considered invalid and will not be considered for colocation.
        
        The longitude and latitude variables may be specified with --longitude and --latitude
        If no longitude or latitude are specified the pixel_latitude and pixel_longitude variables will be used.
        The longitude and latitude epsilon may be specified with --llepsilon
        If no longitude/latitude epsilon is given the value of 0.0 (degrees) will be used
        
        The output data files generated by this option will appear in the selected output directory, or the current
        directory if no out put directory is selected. The output files will be named originalFileName-colocation.nc
        (replacing "originalFileName" with the names of your input files).
        
        Examples:

         glance colocateData A.hdf B.hdf variable_name_1 variable_name_2 variable_name_3::missing3 
         glance colocateData --outputpath=/path/where/output/will/be/placed/ A.nc B.nc
         glance colocateData --longitude=lon_variable_name --latitude=lat_variable_name A.hdf B.hdf variable_name
         glance colocateData --llepsilon=0.0001 A.nc B.hdf
        """

        if len(args) < 2:
            LOG.warn("Expected two paths to files to colocate. "
                     "Unable to generate colocation information without two file paths.")
            return 1

        # TODO, is this really needed?
        options.imagesOnly = False
        options.htmlOnly   = False
        options.doFork     = False
        
        tempOptions = config_organizer.convert_options_to_dict(options)
        
        # TODO, remove this eventually
        tempOptions[DO_COLOCATION_KEY] = True
        
        a_path = clean_path(args[0])
        b_path = clean_path(args[1])
        
        colocateToFile_library_call(a_path, b_path, args[2:], tempOptions)
    
    # Note: the figure plotting in the GUI is dependant on having selected an interactive renderer in the first "use"
    # statement at the beginning of this module. (It had to be moved into this module to pre-empt other use statempents
    # from imports of other glance modules.)
    def gui (*args) :
        """start the Glance graphical user interface
        
        This option launches the graphical user interface for Glance. This interface includes only some of the
        functionality of the command line version of Glance and may be expanded in the future.
        
        Files to be loaded may be specified on the command line or loaded inside the GUI.

        The various logging related arguments (quiet, verbose, debug, etc.) may be used if desired.
        
        Examples:

         glance gui
         glance gui -w
         glance gui A.hdf
         glance gui A.hdf C.nc
        """
        
        LOG.debug("Launching Glance GUI")
        temp_controller = gui_control.GlanceGUIController(get_glance_version_string())
        if len(args) >= 1:
            temp_controller.newFileSelected(A_CONST, args[0])
        if len(args) >= 2:
            temp_controller.newFileSelected(B_CONST, args[1])
        temp_controller.launch_gui()

    def report (*args) :
        """generate reports of various types depending on your input

        Depending on how many input file paths you give this command it will either generate comparison or
        inspection reports. If you give it a directory instead of a file path it will search the underlying file
        structure to find any appropriately typed files and attempt to create reports for each of them in the
        output directory. If you include two directory paths it will attempt to automatically match files and
        create comparison reports for them.

        Note: If you wish to automatically match files in two directories and they do not have the same names,
        you can strip out sections that may not match using '--stripfromname'. If your names have nothing in
        common Glance will not be able to figure out how to match them and you will have to run separate reports
        for each file pair or provide input directories of files that have been soft linked or renamed to more
        compatible file names.

        If latitude and longitude data are present in the file(s) and specified in the call options, the
        plots will be drawn on a map. The longitude and latitude variable names may be specified with
        --longitude and --latitude command line options. If no longitude or latitude names are specified Glance
        will attempt to use pixel_longitude and pixel_latitude.

        If you would rather plot the data without trying to place it on a map, use the --nolonlat option.

        The created reports and images will be saved at the provided output path in subdirectories named for
        each variable analyzed. If no output path is provided, output will be saved in the current directory.
        Created images will be embedded in the report or visible as separate .png files.

        Note: If you provided one or two directory paths and those paths included more than one set of files
        that Glance is able to generate reports for, those reports will be placed in the output path in
        separate temporarily directories. These directories are labeled numerically and with the name of
        the A file. A "summary.html" summary report for the run will be created in the top level output
        directory when comparing files (when simply inspecting files no summary report will be created).

        If you would prefer to generate reports without images, use the --reportonly option. This option will
        generate the html report but omit the images. This may be significantly faster, depending on your system,
        but the differences between the files may be more difficult to interpret.

        If you need to generate plots but do not want the reports, you can use the --imagesonly option. This
        option will generate only the plots and omit the html reports. Unfortunately there will probably not
        be much time savings, as the images are usually the bulk of the run time.

        Examples:

         glance report A.hdf variable_name_1:: variable_name_2 variable_name_3::missing3 variable_name_4::missing4
         glance report A.nc B.hdf variable_name::missing_value
         glance report C.nc --outputpath=/path/where/output/will/be/placed/
         glance report --longitude=lon_variable_name --latitude=lat_variable_name D.h5 variable_name
         glance report --nolonlat ./A_dir/ ./B_dir/
         glance report --reportonly ./A_dir/
        """

        # examine the args and see how many valid file paths we have
        files = [ ]
        variables = [ ]
        LOG.debug("Examining arguments to see how many possible file paths we have.")
        for argument_val in args :
            if os.path.exists(argument_val) :
                LOG.debug("Argument value is an existing file path: " + argument_val)
                files.append(clean_path(argument_val))
            else :
                LOG.debug("Argument value is not an existing file path, it will be treated as a variable name: " + argument_val)
                variables.append(argument_val)

        # if we have no file paths, just stop now
        if len(files) < 1 :
            LOG.warn("Expected at least one valid file path as input data. " +
                     "Unable to generate a report without a file path.")
            return 1

        # organize our command line options
        tempOptions = config_organizer.convert_options_to_dict(options)

        # if there are requested variables and a config file, warn the user that we're going to ignore their request
        if len(variables) > 0 and tempOptions[OPTIONS_CONFIG_FILE_KEY] is not None :
            LOG.warn("User requested specific variables (" + str(variables) + ") on the command line that will be "
                     "overridden by the requested configuration file. Requested variables from the command line will be ignored. "
                     "Please list your desired variables in your configuration file.")

        # if we have one file path, do either one or many inspect reports
        if len(files) <= 1 :
            a_file_path = files[0]
            # check to see if the file is a dir
            if os.path.isdir(a_file_path) :
                a_files_list = _get_possible_files_from_dir(a_file_path)
                # check if we found anything
                if len(a_files_list) <= 0 :
                    LOG.warn("Unable to find any files to analyze in the given directory path.")
                    return 1
                to_return = 0
                temp_offset = 0
                # run each of the reports, putting them in inner temp dirs
                for file_path in sorted(a_files_list) :
                    ops_copy = tempOptions.copy()
                    if len(a_files_list) > 1 :
                        file_name_temp = os.path.splitext(os.path.basename(file_path))[0]
                        ops_copy[OPTIONS_OUTPUT_PATH_KEY] = os.path.join(ops_copy[OPTIONS_OUTPUT_PATH_KEY],
                                                                         "report-" + str(temp_offset) + "-" + file_name_temp)
                        temp_offset += 1
                    LOG.info("Generating inspection report for file: " + file_path)
                    to_return += inspect_library_call(file_path, variables, ops_copy, )
                return to_return
            else : # in this case we just have a regular file, so run one inspect report
                LOG.info("Generating inspection report for file: " + a_file_path)
                return inspect_library_call(a_file_path, variables, tempOptions, )

        # just in case, let's at least give a warning so the user has some idea why we ignored some paths
        if len(files) > 2:
            LOG.warn("More than two file paths were found in your command line input. "
                     "Only the first two will be used. The rest will be ignored.")

        # if we have two file paths, do either one or many comparison reports
        if len(files) >= 2 :
            a_file_path = files[0]
            b_file_path = files[1]
            # check to see if the paths are dirs
            if os.path.isdir(a_file_path) and os.path.isdir(b_file_path) :
                file_pairs = _match_files_from_dirs(a_file_path, b_file_path, tempOptions[OPTIONS_RE_TO_STRIP_KEY],)
                # if we didn't find anything, warn the user and stop
                if len(file_pairs) <= 0 :
                    LOG.warn("Unable to match any files between the given directories. "
                             "Please check that the files are named the same in both directories.")
                    return 1
                temp_offset = 0
                pad_to_digits = len(str(len(file_pairs))) # how many potential reports are we making? zero pad names to the same width
                to_return = 0
                summary_report_info = { }
                # run each of the reports, putting them in inner temp dirs
                for single_a_file, single_b_file in sorted(file_pairs) :
                    ops_copy = tempOptions.copy()
                    LOG.info("Generating comparison report for files: " + single_a_file + "   " + single_b_file)
                    if len(file_pairs) > 1 :
                        file_name_temp = os.path.splitext(os.path.basename(single_a_file))[0]
                        out_dir_name = "report-" + str(temp_offset).zfill(pad_to_digits) + "-" + file_name_temp
                        ops_copy[OPTIONS_OUTPUT_PATH_KEY] = os.path.join(ops_copy[OPTIONS_OUTPUT_PATH_KEY],
                                                                         out_dir_name)
                        temp_offset += 1
                        temp_rc, summary_report_info[out_dir_name] = reportGen_library_call(single_a_file, single_b_file,
                                                                                            variables, ops_copy,
                                                                                            do_return_summary_info=True,)
                        to_return += temp_rc
                    else :
                        to_return += reportGen_library_call(single_a_file, single_b_file, variables, ops_copy, )
                # if we did a bunch of reports, make a top level summary
                if len(summary_report_info) > 0 :
                    LOG.info('Generating summary of all reports created in this run')
                    reportModule.generate_and_save_concise_report(tempOptions[OPTIONS_OUTPUT_PATH_KEY], 'summary.html',
                                                            summary_report_info, )
                return to_return
            # if both the paths are regular files, just run one report
            elif os.path.isfile(a_file_path) and os.path.isfile(b_file_path) :
                LOG.info("Generating comparison report for files: " + a_file_path + "   " + b_file_path)
                return reportGen_library_call(a_file_path, b_file_path, variables, tempOptions, )
            else :
                LOG.error("You have provided one directory path and one file path. "
                          "Please input paths to two directories or two files, not a mixture.")
                return 1

        # if we got to here, something has gone terribly wrong
        return 1

    def help(command=None):
        """print help for a specific command or list of commands
        print help for a specific command or list of commands

        Examples:
             glance help stats
             glance help report
        """

        print_all_summary = False

        # check to see if the user specified a command to get info on
        if command is None: 
            print_all_summary = True

        # if the user did specify a command, just print that one
        else:
            if command.lower() in lower_locals :
                split_doc = lower_locals[command.lower()].__doc__.split('\n')
                print()
                for line in split_doc[1:] :
                    print (line)
            else :
                print_all_summary = True

        # print out a list of summaries for each command
        if print_all_summary :
            print("\n" + _get_all_commands_help_string(commands,))

    # all the local public functions are considered part of glance, collect them up
    commands.update(dict(x for x in list(locals().items()) if x[0] not in prior))

    # lowercase locals
    # Future: this is an awkward use and could be made more elegant
    lower_locals = {}
    for command_key in commands:
        lower_locals[command_key.lower()] = locals()[command_key]

    # get our command line argument handling set up
    options = config_organizer.parse_arguments(get_glance_version_string(),
                                               list(lower_locals.keys()),
                                               _get_all_commands_help_string(commands,), )
    args = options.misc

    if options.self_test:
        import doctest
        doctest.testmod()
        sys.exit(2)

    # set up the logging level based on the options the user selected on the command line
    lvl = logging.WARNING
    if options.debug:
        lvl = logging.DEBUG
    elif options.verbose:
        lvl = logging.INFO
    elif options.quiet:
        lvl = logging.ERROR
    logging.basicConfig(level=lvl)

    # if what the user asked for is not one of our existing functions, print the help
    to_return = 0
    if (options.command == "") or (options.command not in lower_locals):
        if options.version:
            to_return = 0
        else :
            options.print_help()
            help()
            to_return = 9
    else:
        # call the function the user named, given the arguments from the command line, lowercase the request to ignore case
        return_code = lower_locals[options.command](*args)
        to_return = 0 if return_code is None else return_code

    LOG.debug("Glance run complete, returning code: " + str(to_return))
    return to_return

if __name__=='__main__':
    sys.exit(main())