From 87d55e81b0cf5830fdd9f8a96336ddb641daa672 Mon Sep 17 00:00:00 2001
From: "(no author)" <(no author)@8a9318a1-56ba-4d59-b755-99d26321be01>
Date: Fri, 30 Mar 2012 22:54:54 +0000
Subject: [PATCH] adding a statistics option to inspect variables in one file
 on the command line

git-svn-id: https://svn.ssec.wisc.edu/repos/glance/trunk@169 8a9318a1-56ba-4d59-b755-99d26321be01
---
 pyglance/glance/compare.py | 453 ++++++++++++++++++++++++++++++++++++-
 pyglance/glance/stats.py   | 328 +++++++++++++++++++++++++++
 2 files changed, 778 insertions(+), 3 deletions(-)

diff --git a/pyglance/glance/compare.py b/pyglance/glance/compare.py
index bb675a0..39c728f 100644
--- a/pyglance/glance/compare.py
+++ b/pyglance/glance/compare.py
@@ -287,8 +287,9 @@ def _load_config_or_options(aPath, bPath, optionsSet, requestedVars = [ ]) :
     
     # set up the paths, they can only come from the command line
     paths = {}
-    paths['a']   = aPath 
-    paths['b']   = bPath
+    paths['a']   = aPath
+    if bPath is not None:
+        paths['b']   = bPath
     paths['out'] = optionsSet['outputpath']
     
     # the colocation selection can only come from the command line options
@@ -1255,6 +1256,334 @@ def _setup_dir_if_needed(dirPath, descriptionName) :
         LOG.info("Creating " + descriptionName + " directory.")
         os.makedirs(dirPath)
 
+def inspect_library_call (a_path, var_list=[ ],
+                            options_set={ },
+                            # todo, this doesn't yet do anything
+                            do_document=False,
+                            # todo, the output channel does nothing at the moment
+                            output_channel=sys.stdout) :
+    """
+    this method handles the actual work of the inspect command line tool
+    and can also be used as a library routine, pass in the slightly parsed
+    command line input, or call it as a library function... be sure to fill
+    out the options
+    
+    TODO at the moment the options are very brittle and need to be fully filled
+    or this method will fail badly (note: the addition of some glance defaults
+    has minimized the problem, but you still need to be careful when dealing with
+    optional boolean values. this needs more work.)
+    """
+    
+    # load the user settings from either the command line or a user defined config file
+    pathsTemp, runInfo, defaultValues, requestedNames, usedConfigFile = _load_config_or_options(a_path, None, # note, there is no B path
+                                                                                                options_set,
+                                                                                                requestedVars = var_list)
+    
+    # note some of this information for debugging purposes
+    LOG.debug('paths: ' +           str(pathsTemp))
+    LOG.debug('defaults: ' +        str(defaultValues))
+    LOG.debug('run information: ' + str(runInfo))
+    
+    # if we wouldn't generate anything, just stop now
+    if (not runInfo['shouldIncludeImages']) and (not runInfo['shouldIncludeReport']) :
+        LOG.warn("User selection of no image generation and no report generation will result in no " +
+                 "content being generated. Aborting generation function.")
+        return
+    
+    # hang onto info to identify who/what/when/where/etc. the report is being run by/for 
+    runInfo.update(_get_run_identification_info( ))
+    
+    # deal with the input and output files
+    if not (os.path.isdir(pathsTemp['out'])) :
+        LOG.info("Specified output directory (" + pathsTemp['out'] + ") does not exist.")
+        LOG.info("Creating output directory.")
+        os.makedirs(pathsTemp['out'])
+    # open the file
+    files = {}
+    LOG.info("Processing File A:")
+    aFile = dataobj.FileInfo(pathsTemp['a'])
+    files['file A'] = aFile.get_old_info_dictionary() # FUTURE move to actually using the file object to generate the report
+    if aFile.file_object is None:
+        LOG.warn("Unable to continue with examination because file (" + pathsTemp['a'] + ") could not be opened.")
+        sys.exit(1)
+    
+    # TODO, translate the copied code
+    
+    # get information about the names the user requested
+    finalNames, nameStats = _resolve_names(aFile.file_object, bFile.file_object,
+                                           defaultValues,
+                                           requestedNames, usedConfigFile)
+    
+    LOG.debug("output dir: " + str(pathsTemp['out']))
+    
+    # return for lon_lat_data variables will be in the form 
+    #{"lon": longitude_data,      "lat": latitude_data,      "inv_mask": spaciallyInvalidMaskData}
+    # or { } if there is no lon/lat info
+    lon_lat_data = { }
+    spatialInfo  = { }
+    try :
+        lon_lat_data, spatialInfo = _handle_lon_lat_info (runInfo, aFile, bFile, pathsTemp['out'],
+                                                          should_make_images = runInfo["shouldIncludeImages"],
+                                                          fullDPI=runInfo['detail_DPI'], thumbDPI=runInfo['thumb_DPI'])
+    except VariableLoadError, vle :
+        LOG.warn("Error while loading longitude or latitude: ")
+        LOG.warn(vle.msg)
+        exit(1)
+    except VariableComparisonError, vce :
+        LOG.warn("Error while comparing longitude or latitude: ")
+        LOG.warn(vce.msg)
+        exit(1)
+    
+    # if there is an approved lon/lat shape, hang on to that for future checks
+    good_shape_from_lon_lat = None
+    if len(lon_lat_data.keys()) > 0:
+        good_shape_from_lon_lat = lon_lat_data['common']['lon'].shape
+    
+    # this will hold information for the summary report
+    # it will be in the form
+    # [displayName] = {"passEpsilonPercent":     percent ok with epsilon,
+    #                  "finite_similar_percent": percent with the same finiteness, 
+    #                  "epsilon":                epsilon value used}
+    variableComparisons = {}
+    
+    # go through each of the possible variables in our files
+    # and make a report section with images for whichever ones we can
+    for displayName in finalNames:
+        
+        # pull out the information for this variable analysis run
+        varRunInfo = finalNames[displayName].copy()
+        
+        # get the various names
+        technical_name, b_variable_technical_name, \
+                explanationName = _get_name_info_for_variable(displayName, varRunInfo)
+        
+        LOG.info('analyzing: ' + explanationName)
+        
+        # load the variable data
+        aData = _load_variable_data(aFile.file_object, technical_name,
+                                    dataFilter = varRunInfo['data_filter_function_a'] if 'data_filter_function_a' in varRunInfo else None,
+                                    variableToFilterOn = varRunInfo['variable_to_filter_on_a'] if 'variable_to_filter_on_a' in varRunInfo else None,
+                                    variableBasedFilter = varRunInfo['variable_based_filter_a'] if 'variable_based_filter_a' in varRunInfo else None,
+                                    fileDescriptionForDisplay = "file A")
+        bData = _load_variable_data(bFile.file_object, b_variable_technical_name,
+                                    dataFilter = varRunInfo['data_filter_function_b'] if 'data_filter_function_b' in varRunInfo else None,
+                                    variableToFilterOn = varRunInfo['variable_to_filter_on_b'] if 'variable_to_filter_on_b' in varRunInfo else None,
+                                    variableBasedFilter = varRunInfo['variable_based_filter_b'] if 'variable_based_filter_b' in varRunInfo else None,
+                                    fileDescriptionForDisplay = "file B")
+        
+        # pre-check if this data should be plotted and if it should be compared to the longitude and latitude
+        include_images_for_this_variable = ((not('shouldIncludeImages' in runInfo)) or (runInfo['shouldIncludeImages']))
+        if 'shouldIncludeImages' in varRunInfo :
+            include_images_for_this_variable = varRunInfo['shouldIncludeImages']
+        do_not_test_with_lon_lat = (not include_images_for_this_variable) or (len(lon_lat_data.keys()) <= 0)
+        
+        # handle vector data
+        isVectorData = ( ('magnitudeName' in varRunInfo)  and ('directionName'  in varRunInfo) and
+                         ('magnitudeBName' in varRunInfo) and ('directionBName' in varRunInfo) )
+        
+        # check if this data can be displayed but
+        # don't compare lon/lat sizes if we won't be plotting
+        if ( (aData.shape == bData.shape) 
+             and 
+             ( do_not_test_with_lon_lat
+              or
+              ((aData.shape == good_shape_from_lon_lat) and (bData.shape == good_shape_from_lon_lat)) ) ) :
+            
+            # check to see if there is a directory to put information about this variable in,
+            # if not then create it
+            variableDir = os.path.join(pathsTemp['out'], './' + displayName)
+            varRunInfo['variable_dir'] = variableDir
+            varRunInfo['variable_report_path_escaped'] = quote(os.path.join(displayName, 'index.html'))
+            LOG.debug ("Directory selected for variable information: " + varRunInfo['variable_report_path_escaped'])
+            if not (os.path.isdir(variableDir)) :
+                LOG.debug("Variable directory (" + variableDir + ") does not exist.")
+                LOG.debug("Creating variable directory.")
+                os.makedirs(variableDir)
+            
+            # form the doc and config paths relative to where the variable is
+            upwardPath = './'
+            for number in range(len(displayName.split('/'))) : # TODO this is not general to windows
+                upwardPath = os.path.join(upwardPath, '../')
+            varRunInfo['doc_path'] = quote(os.path.join(upwardPath, 'doc.html'))
+            if 'config_file_name' in runInfo :
+                varRunInfo['config_file_path'] = quote(os.path.join(upwardPath, runInfo['config_file_name']))
+            
+            # figure out the masks we want, and then do our statistical analysis
+            mask_a_to_use = None
+            mask_b_to_use = None
+            if not do_not_test_with_lon_lat :
+                mask_a_to_use = lon_lat_data['a']['inv_mask']
+                mask_b_to_use = lon_lat_data['b']['inv_mask']
+            variable_stats = statistics.StatisticalAnalysis.withSimpleData(aData, bData,
+                                                                           varRunInfo['missing_value'], varRunInfo['missing_value_alt_in_b'],
+                                                                           mask_a_to_use, mask_b_to_use,
+                                                                           varRunInfo['epsilon'], varRunInfo['epsilon_percent']).dictionary_form()
+            
+            # add a little additional info to our variable run info before we squirrel it away
+            varRunInfo['time'] = datetime.datetime.ctime(datetime.datetime.now())  # todo is this needed?
+            didPass, epsilon_failed_fraction, \
+                     non_finite_fail_fraction, \
+                     r_squared_value = _check_pass_or_fail(varRunInfo, variable_stats, defaultValues)
+            varRunInfo['did_pass'] = didPass
+            # update the overall pass status
+            if didPass is not None :
+                didPassAll = didPassAll & didPass
+            
+            # based on the settings and whether the variable passsed or failed,
+            # should we include images for this variable?
+            if ('only_plot_on_fail' in varRunInfo) and varRunInfo['only_plot_on_fail'] :
+                include_images_for_this_variable = include_images_for_this_variable and (not didPass)
+                varRunInfo['shouldIncludeImages'] = include_images_for_this_variable
+            
+            # to hold the names of any images created
+            image_names = {
+                            'original': [ ],
+                            'compared': [ ]
+                            }
+            
+            # create the images for this variable
+            if (include_images_for_this_variable) :
+                
+                plotFunctionGenerationObjects = [ ]
+                
+                # if the data is the same size, we can always make our basic statistical comparison plots
+                if (aData.shape == bData.shape) :
+                    plotFunctionGenerationObjects.append(plotcreate.BasicComparisonPlotsFunctionFactory())
+                
+                # if the bin and tuple are defined, try to analyze the data as complex
+                # multidimentional information requiring careful sampling
+                if ('binIndex' in varRunInfo) and ('tupleIndex' in varRunInfo) :
+                    plotFunctionGenerationObjects.append(plotcreate.BinTupleAnalysisFunctionFactory())
+                    
+                else : # if it's not bin/tuple, there are lots of other posibilities
+                    
+                    # if it's vector data with longitude and latitude, quiver plot it on the Earth
+                    if isVectorData and (not do_not_test_with_lon_lat) :
+                        plotFunctionGenerationObjects.append(plotcreate.MappedQuiverPlotFunctionFactory())
+                    
+                    # if the data is one dimensional we can plot it as lines
+                    elif   (len(aData.shape) is 1) : 
+                        plotFunctionGenerationObjects.append(plotcreate.LinePlotsFunctionFactory())
+                    
+                    # if the data is 2D we have some options based on the type of data
+                    elif (len(aData.shape) is 2) :
+                        
+                        # if the data is not mapped to a longitude and latitude, just show it as an image
+                        if (do_not_test_with_lon_lat) :
+                            plotFunctionGenerationObjects.append(plotcreate.IMShowPlotFunctionFactory())
+                        
+                        # if it's 2D and mapped to the Earth, contour plot it on the earth
+                        else :
+                            plotFunctionGenerationObjects.append(plotcreate.MappedContourPlotFunctionFactory())
+                
+                # if there's magnitude and direction data, figure out the u and v, otherwise these will be None
+                aUData, aVData = _get_UV_info_from_magnitude_direction_info (aFile.file_object,
+                                                                             varRunInfo['magnitudeName'] if ('magnitudeName') in varRunInfo else None,
+                                                                             varRunInfo['directionName'] if ('directionName') in varRunInfo else None,
+                                                                             lon_lat_data['a']['inv_mask']
+                                                                             if ('a' in lon_lat_data) and ('inv_mask' in lon_lat_data['a']) else None)
+                bUData, bVData = _get_UV_info_from_magnitude_direction_info (bFile.file_object,
+                                                                             varRunInfo['magnitudeBName'] if ('magnitudeBName') in varRunInfo else None,
+                                                                             varRunInfo['directionBName'] if ('directionBName') in varRunInfo else None,
+                                                                             lon_lat_data['b']['inv_mask']
+                                                                             if ('b' in lon_lat_data) and ('inv_mask' in lon_lat_data['b']) else None)
+                
+                # plot our lon/lat related info
+                image_names['original'], image_names['compared'] = \
+                    plot.plot_and_save_comparison_figures \
+                            (aData, bData,
+                             plotFunctionGenerationObjects,
+                             varRunInfo['variable_dir'],
+                             displayName,
+                             varRunInfo['epsilon'],
+                             varRunInfo['missing_value'],
+                             missingValueAltInB = varRunInfo['missing_value_alt_in_b'] if 'missing_value_alt_in_b' in varRunInfo else None,
+                             lonLatDataDict=lon_lat_data,
+                             dataRanges     = varRunInfo['display_ranges']      if 'display_ranges'      in varRunInfo else None,
+                             dataRangeNames = varRunInfo['display_range_names'] if 'display_range_names' in varRunInfo else None,
+                             dataColors     = varRunInfo['display_colors']      if 'display_colors'      in varRunInfo else None,
+                             makeSmall=True,
+                             doFork=runInfo['doFork'],
+                             shouldClearMemoryWithThreads=runInfo['useThreadsToControlMemory'],
+                             shouldUseSharedRangeForOriginal=runInfo['useSharedRangeForOriginal'],
+                             doPlotSettingsDict = varRunInfo,
+                             aUData=aUData, aVData=aVData,
+                             bUData=bUData, bVData=bVData,
+                             binIndex=      varRunInfo['binIndex']        if 'binIndex'        in varRunInfo else None,
+                             tupleIndex=    varRunInfo['tupleIndex']      if 'tupleIndex'      in varRunInfo else None,
+                             binName=       varRunInfo['binName']         if 'binName'         in varRunInfo else 'bin',
+                             tupleName=     varRunInfo['tupleName']       if 'tupleName'       in varRunInfo else 'tuple',
+                             epsilonPercent=varRunInfo['epsilon_percent'] if 'epsilon_percent' in varRunInfo else None,
+                             fullDPI=       runInfo['detail_DPI'],
+                             thumbDPI=      runInfo['thumb_DPI'],
+                             units_a=       varRunInfo['units_a']         if 'units_a'         in varRunInfo else None,
+                             units_b=       varRunInfo['units_b']         if 'units_b'         in varRunInfo else None)
+                
+                LOG.info("\tfinished creating figures for: " + explanationName)
+            
+            # create the report page for this variable
+            if (runInfo['shouldIncludeReport']) :
+                
+                # hang on to our good % and other info to describe our comparison
+                epsilonPassedPercent = (1.0 -  epsilon_failed_fraction) * 100.0
+                finitePassedPercent  = (1.0 - non_finite_fail_fraction) * 100.0 
+                variableComparisons[displayName] = {'pass_epsilon_percent':   epsilonPassedPercent,
+                                                    'finite_similar_percent': finitePassedPercent,
+                                                    'r_squared_correlation':  r_squared_value,
+                                                    'variable_run_info':      varRunInfo
+                                                    }
+                
+                LOG.info ('\tgenerating report for: ' + explanationName) 
+                report.generate_and_save_variable_report(files,
+                                                         varRunInfo, runInfo,
+                                                         variable_stats,
+                                                         spatialInfo,
+                                                         image_names,
+                                                         varRunInfo['variable_dir'], "index.html")
+        
+        # if we can't compare the variable, we should tell the user 
+        else :
+            message = (explanationName + ' ' + 
+                     'could not be compared. This may be because the data for this variable does not match in shape ' +
+                     'between the two files (file A data shape: ' + str(aData.shape) + '; file B data shape: '
+                     + str(bData.shape) + ')')
+            if do_not_test_with_lon_lat :
+                message = message + '.'
+            else :
+                message = (message + ' or the data may not match the shape of the selected '
+                     + 'longitude ' + str(good_shape_from_lon_lat) + ' and '
+                     + 'latitude '  + str(good_shape_from_lon_lat) + ' variables.')
+            LOG.warn(message)
+        
+    # the end of the loop to examine all the variables
+    
+    # generate our general report pages once we've analyzed all the variables
+    if (runInfo['shouldIncludeReport']) :
+        
+        # get the current time
+        runInfo['time'] = datetime.datetime.ctime(datetime.datetime.now())
+        
+        # make the main summary report
+        LOG.info ('generating summary report')
+        report.generate_and_save_summary_report(files,
+                                                pathsTemp['out'], 'index.html',
+                                                runInfo,
+                                                variableComparisons, 
+                                                spatialInfo,
+                                                nameStats)
+        
+        # make the glossary
+        LOG.info ('generating glossary')
+        report.generate_and_save_doc_page(statistics.StatisticalAnalysis.doc_strings(), pathsTemp['out'])
+    
+    returnCode = 0 if didPassAll else 2 # return 2 only if some of the variables failed
+    
+    # if we are reporting the pass / fail, return an appropriate status code
+    if do_pass_fail :
+        LOG.debug("Pass/Fail return code: " + str(returnCode))
+        return returnCode
+
+
 def reportGen_library_call (a_path, b_path, var_list=[ ],
                             options_set={ },
                             # todo, this doesn't yet do anything
@@ -1673,6 +2002,53 @@ def stats_library_call(afn, bfn, var_list=[ ],
         return status_code
     # note: if we aren't doing pass/fail, stats will not return anything
 
+def inspect_stats_library_call (afn, var_list=[ ], options_set={ }, do_document=False, output_channel=sys.stdout): 
+    """
+    this method handles the actual work of the inspect_stats command line tool and
+    can also be used as a library routine, simply pass in an output channel
+    and/or use the returned dictionary of statistics for your own form of
+    display.
+    TODO, should this move to a different file?
+    """
+    # unpack some options
+    missing_val  = options_set['missing']
+    
+    LOG.debug ("file a: " + afn)
+    
+    # open the file
+    filesInfo = _open_and_process_files([afn], 1)
+    aFile = filesInfo[afn]['fileObject']
+    
+    # figure out the variable names and their individual settings
+    if len(var_list) <= 0 :
+        var_list = ['.*']
+    names = _parse_varnames( filesInfo['commonVarNames'], var_list, epsilon=None, missing=missing_val )
+    LOG.debug(str(names))
+    doc_each  = do_document and len(names)==1
+    doc_atend = do_document and len(names)!=1
+    
+    for name, epsilon, missing in names:
+        aData = aFile[name]
+        amiss = missing
+        if missing is None:
+            amiss = aFile.missing_value(name)
+        LOG.debug('analyzing %s with missing data value %s' % (name,amiss))
+        print >> output_channel, '-'*32
+        print >> output_channel, name
+        print >> output_channel, ''
+        variable_stats = statistics.StatisticalInspectionAnalysis.withSimpleData(aData, amiss)
+        lal = list(variable_stats.dictionary_form().items())
+        lal.sort()
+        for dictionary_title, dict_data in lal:
+            print >> output_channel, '%s' %  dictionary_title
+            dict_data
+            for each_stat in sorted(list(dict_data)):
+                print >> output_channel, '  %s: %s' % (each_stat, dict_data[each_stat])
+                if doc_each: print >> output_channel, ('    ' + statistics.StatisticalAnalysis.doc_strings()[each_stat])
+            print >> output_channel, '' 
+    if doc_atend:
+        print >> output_channel, ('\n\n' + statistics.STATISTICS_DOC_STR)
+
 def main():
     import optparse
     usage = """
@@ -1683,7 +2059,7 @@ examples:
 python -m glance.compare info A.hdf
 python -m glance.compare stats A.hdf B.hdf '.*_prof_retr_.*:1e-4' 'nwp_._index:0'
 python -m glance.compare plotDiffs A.hdf B.hdf
-python -m glance compare reportGen A.hdf B.hdf
+python -m glance.compare reportGen A.hdf B.hdf
 python -m glance 
 
 """
@@ -1874,6 +2250,77 @@ python -m glance
         
         return reportGen_library_call(a_path, b_path, args[2:], tempOptions)
     
+    def inspectStats(*args):
+        """create statistics summary of variables from one file
+        Summarize data on variables in a file.
+        If no variable names are given, summarize all variables.
+        Variable names can be of the form varname::missing to use non-default missing value.
+        Variable names can be regular expressions, e.g. 'image.*' or '.*prof_retr.*::-999'
+        Missing can be empty to stay with default.
+        If _FillValue is an attribute of a variable, that will be used to find missing values where no value is given.
+        Run with -v to get more detailed information on inspect_stats.
+        Examples:
+         python -m glance.compare    inspect_stats A.hdf
+         python -m glance.compare    inspect_stats A.hdf baseline_cmask_seviri_cloud_mask
+         python -m glance.compare -w inspect_stats A.hdf imager_prof_retr_abi_total_precipitable_water_low::-999
+        """ 
+        afn = args[0]
+        do_doc = (options.verbose or options.debug)
+        
+        tempOptions = { }
+        tempOptions['missing']       = options.missing
+        # add more if needed for stats
+        
+        inspect_stats_library_call(_clean_path(afn), var_list=args[1:], options_set=tempOptions, do_document=do_doc)
+    
+#    def inspect_report(*args) :
+#        """inspect the contents of a file
+#        This option creates a report and or images examining variables in a file.
+#        
+#        An html report and images detailing the variables in the file will be generated and saved to disk.
+#        The images will be embedded in the report or visible as separate .png files.
+#        
+#        Variables to be compared may be specified after the names of the input file. If no variables
+#        are specified, all variables that match the shape of the longitude and latitude will be compared.
+#        Specified variables that do not exist or do not match the correct data shape will be ignored.
+#        
+#        The user may also use the notation variable_name::missing_value to specify the missing_value which indicates
+#        fill data. If this value is absent (in the case of variable_name:: or just variable_name) glance with attempt
+#        to load the missing value from the file (failing that, no missing values will be analyzed).
+#        
+#        The html report and any created images will be stored in the provided path, or if no path is provided,
+#        they will be stored in the current directory.
+#        
+#        If for some reason you would prefer to generate the report without images, use the --reportonly option. This
+#        option will generate the html report but omit the images. This may be significantly faster, depending on
+#        your system, but the information may be quite a bit more difficult to interpret.
+#        
+#        The longitude and latitude variables may be specified with --longitude and --latitude
+#        If no longitude or latitude are specified the pixel_latitude and pixel_longitude variables will be used by default.
+#        If no longitude or latitude mappings are desired, the --nolonlat option will disable this spatial mapping.
+#        
+#        Examples:
+#         python -m glance.compare inspect_report A.hdf variable_name_1:: variable_name_2 variable_name_3::missing3 variable_name_4::missing4
+#         python -m glance.compare --outputpath=/path/where/output/will/be/placed/ inspect_report A.hdf 
+#         python -m glance.compare inspect_report --longitude=lon_variable_name --latitude=lat_variable_name A.hdf variable_name
+#         python -m glance.compare inspect_report --reportonly A.hdf
+#        """
+#        
+#        tempOptions = { }
+#        tempOptions['outputpath']    = _clean_path(options.outputpath)
+#        tempOptions['configFile']    = _clean_path(options.configFile)
+#        tempOptions['imagesOnly']    = options.imagesOnly
+#        tempOptions['htmlOnly']      = options.htmlOnly
+#        tempOptions['doFork']        = False
+#        tempOptions['noLonLatVars']  = options.noLonLatVars
+#        tempOptions['latitudeVar']   = options.latitudeVar
+#        tempOptions['longitudeVar']  = options.longitudeVar
+#        tempOptions['missing']       = options.missing
+#        
+#        a_path = _clean_path(args[0])
+#        
+#        return inspect_library_call(a_path, args[1:], tempOptions)
+    
     def colocateData(*args) :
         """colocate data in two files
         
diff --git a/pyglance/glance/stats.py b/pyglance/glance/stats.py
index af89e49..ccb6efb 100644
--- a/pyglance/glance/stats.py
+++ b/pyglance/glance/stats.py
@@ -129,6 +129,56 @@ class MissingValueStatistics (StatisticalData) :
         
         return MissingValueStatistics._doc_strings
 
+class MissingValueInspectionStatistics (StatisticalData) :
+    """
+    A class representing information about where fill values are found
+    in a data.
+    
+    includes the following statistics:
+    
+    missing_count         -    count of points that are missing in the a data set
+    missing_fraction      - fraction of points that are missing in the a data set
+    """
+    
+    _doc_strings = {
+                    'missing_count':         "number of values flagged missing",
+                    'missing_fraction':      "fraction of values flagged missing",
+                    }
+    
+    def __init__(self, dataObject) :
+        """
+        build our fill value related statistics based on the data set
+        """
+        self.title = 'Missing Value Statistics'
+        
+        # pull out a mask for later use
+        missing_mask          = dataObject.masks.missing_mask
+        
+        # figure out some basic statistics
+        self.missing_count    = np.sum(missing_mask)
+        self.missing_fraction = float(self.missing_count) / float(missing_mask.size)
+    
+    def dictionary_form(self) :
+        """
+        get a dictionary form of the statistics
+        """
+        
+        toReturn = {
+                    'missing_count':         self.missing_count,
+                    'missing_fraction':      self.missing_fraction,
+                    }
+        
+        return toReturn
+    
+    @staticmethod
+    def doc_strings( ) :
+        """
+        get documentation strings that match the
+        dictionary form of the statistics
+        """
+        
+        return MissingValueInspectionStatistics._doc_strings
+
 class FiniteDataStatistics (StatisticalData) :
     """
     A class representing information about where finite values are found
@@ -217,6 +267,56 @@ class FiniteDataStatistics (StatisticalData) :
         
         return FiniteDataStatistics._doc_strings
 
+class FiniteDataInspectionStatistics (StatisticalData) :
+    """
+    A class representing information about where finite values are found
+    in a data set.
+    
+    includes the following statistics:
+    
+    finite_count              - the number   of finite data values in the data set
+    finite_fraction           - the fraction of finite data values in the data set
+    """
+    
+    _doc_strings = {
+                    'finite_count': "number of finite values",
+                    'finite_fraction': "fraction of finite values (out of all data points in set)",
+                    }
+    
+    def __init__(self, dataObject) :
+        """
+        build our finite data related statistics based on the data set
+        """
+        self.title = 'Finite Data Statistics'
+        
+        # pull out some data we will use later
+        is_finite_mask       = dataObject.masks.valid_mask
+        
+        # figure out some basic statistics
+        self.finite_count    = np.sum(is_finite_mask)
+        self.finite_fraction = float(self.finite_count) / float(is_finite_mask.size)
+    
+    def dictionary_form(self) :
+        """
+        get a dictionary form of the statistics
+        """
+        
+        toReturn = {
+                    'finite_count':              self.finite_count,
+                    'finite_fraction':           self.finite_fraction,
+                    }
+        
+        return toReturn
+    
+    @staticmethod
+    def doc_strings( ) :
+        """
+        get documentation strings that match the
+        dictionary form of the statistics
+        """
+        
+        return FiniteDataInspectionStatistics._doc_strings
+
 class NotANumberStatistics (StatisticalData) :
     """
     A class representing information about where non-finite values are found
@@ -292,6 +392,56 @@ class NotANumberStatistics (StatisticalData) :
         
         return NotANumberStatistics._doc_strings
 
+class NotANumberInspectionStatistics (StatisticalData) :
+    """
+    A class representing information about where non-finite values are found
+    in a data set.
+    
+    includes the following statistics:
+    
+    nan_count         - the number   of non finite values that are present in the data set
+    nan_fraction      - the fraction of non finite values that are present in the data set
+    """
+    
+    _doc_strings = {
+                    'nan_count': "number of NaNs",
+                    'nan_fraction': "fraction of NaNs",
+                    }
+    
+    def __init__(self, dataObject) :
+        """
+        build our nonfinite data related statistics based on the data set
+        """
+        self.title = 'NaN Statistics'
+        
+        # pull out a mask we will use
+        nan_mask = dataObject.masks.non_finite_mask
+        
+        # get some basic statistics
+        self.nan_count = np.sum(nan_mask)
+        self.nan_fraction = float(self.nan_count) / float(nan_mask.size)
+    
+    def dictionary_form(self) :
+        """
+        get a dictionary form of the statistics
+        """
+        
+        toReturn = {
+                    'nan_count':         self.nan_count,
+                    'nan_fraction':      self.nan_fraction,
+                    }
+        
+        return toReturn
+    
+    @staticmethod
+    def doc_strings( ) :
+        """
+        get documentation strings that match the
+        dictionary form of the statistics
+        """
+        
+        return NotANumberInspectionStatistics._doc_strings
+
 class GeneralStatistics (StatisticalData) :
     """
     A class representing general information about a pair of data sets.
@@ -399,6 +549,83 @@ class GeneralStatistics (StatisticalData) :
         
         return GeneralStatistics._doc_strings
 
+class GeneralInspectionStatistics (StatisticalData) :
+    """
+    A class representing general information about a data set.
+    
+    includes the following statistics:
+    
+    missing_value   - the fill data value
+    max             - the maximum value
+    min             - the minimum value
+    num_data_points - the total number of data points
+    shape           - the shape of the data
+    spatially_invalid_pts_ignored - number of points corresponding to invalid lat/lon in the set
+                                    (optional if no /lon lat mapped)
+    """
+    
+    _doc_strings = {
+                    'missing_value': 'the value that is considered \"missing\" data when it is found in the data',
+                    'max': 'the maximum finite, non-missing value found in the data',
+                    'min': 'the minimum finite, non-missing value found in the data',
+                    'num_data_points': "number of data points (may be valid or invalid data)",
+                    'shape': "shape of the data",
+                    'spatially_invalid_pts_ignored': 'number of points with invalid latitude/longitude information ' +
+                                                     'in the data that were' +
+                                                     ' ignored for the purposes of data analysis and presentation',
+                    }
+    
+    def __init__(self, dataObject) :
+        """
+        build our general statistics based on the data set
+        """
+        self.title = 'General Statistics'
+        
+        # pull out some masks for later use
+        missing_mask = dataObject.masks.missing_mask
+        ignore_mask  = dataObject.masks.ignore_mask
+        good_mask    = dataObject.masks.valid_mask
+        
+        #assert(missing_mask.shape == ignore_mask.shape)
+        #assert(ignore_mask.shape  == good_mask.shape  )
+        
+        # get the number of data points
+        total_num_values = missing_mask.size
+        
+        # fill in our statistics
+        self.missing_value   = dataObject.select_fill_value()
+        self.max             = delta.max_with_mask(dataObject.data, good_mask)
+        self.min             = delta.min_with_mask(dataObject.data, good_mask)
+        self.num_data_points = total_num_values
+        self.shape           = missing_mask.shape
+        # also calculate the invalid points
+        self.spatially_invalid_pts_ignored = np.sum(ignore_mask)
+    
+    def dictionary_form(self) :
+        """
+        get a dictionary form of the statistics
+        """
+        
+        toReturn = {
+                    'missing_value':   self.missing_value,
+                    'max':             self.max,
+                    'max':             self.max,
+                    'num_data_points': self.num_data_points,
+                    'shape':           self.shape,
+                    'spatially_invalid_pts_ignored': self.spatially_invalid_pts_ignored,
+                    }
+        
+        return toReturn
+    
+    @staticmethod
+    def doc_strings( ) :
+        """
+        get documentation strings that match the
+        dictionary form of the statistics
+        """
+        
+        return GeneralInspectionStatistics._doc_strings
+
 class NumericalComparisonStatistics (StatisticalData) :
     """
     A class representing more complex comparisons between a pair of data sets.
@@ -711,6 +938,107 @@ class StatisticalAnalysis (StatisticalData) :
         
         return toReturn
 
+class StatisticalInspectionAnalysis (StatisticalData) :
+    """
+    This class represents a complete statistical analysis of a data set.
+    
+    It includes the following sets of statistics:
+    
+    general      - a GeneralInspectionStatistics object
+    notANumber   - a NotANumberInspectionStatistics object
+    missingValue - a MissingValueInspectionStatistics object
+    finiteData   - a FiniteDataInspectionStatistics object
+    
+    It can also provide a dictionary form of the statistics or the
+    documentation of the statistics.
+    """
+    
+    def __init__ (self) :
+        """
+        this is a blank constructor to support our new class method creation pattern
+        """
+        self.title = "Statistical Summary"
+    
+    @classmethod
+    def withSimpleData (in_class,
+                        dataSet,
+                        missingValue=None,
+                        ignoreMask=None) :
+        """
+        do a full statistical analysis of the data, after building the data object
+        """
+        
+        new_object  = in_class()
+        
+        dataObject = dataobj.DataObject(dataSet, fillValue=missingValue, ignoreMask=ignoreMask)
+        dataObject.self_analysis()
+        
+        new_object._create_stats(dataObject)
+        
+        return new_object
+    
+    @classmethod
+    def withDataObjects (in_class,
+                         dataObject) :
+        """
+        do a full statistical analysis of the data, using the given data object
+        """
+        
+        new_object = in_class()
+        
+        dataObject.self_analysis()
+        new_object._create_stats(dataObject)
+        
+        return new_object
+    
+    def _create_stats(self, dataObject) :
+        """
+        build and set all of the statistics sets
+        """
+        
+        self.general      = GeneralInspectionStatistics(dataObject)
+        self.notANumber   = NotANumberInspectionStatistics(dataObject)
+        self.missingValue = MissingValueInspectionStatistics(dataObject)
+        self.finiteData   = FiniteDataInspectionStatistics(dataObject)
+    
+    
+    def dictionary_form(self) :
+        """
+        get a dictionary form of the statistics
+        """
+        toReturn = { }
+        
+        # build a dictionary of all our statistics
+        toReturn[self.general.title]      = self.general.dictionary_form()
+        toReturn[self.notANumber.title]   = self.notANumber.dictionary_form()
+        toReturn[self.missingValue.title] = self.missingValue.dictionary_form()
+        toReturn[self.finiteData.title]   = self.finiteData.dictionary_form()
+        
+        return toReturn
+    
+    def doc_strings(self) :
+        """
+        get documentation strings that match the
+        dictionary form of the statistics
+        """
+        return StatisticalAnalysis.doc_strings( )
+    
+    # TODO, use this method instead of the dictionary at the bottom of this module
+    @staticmethod
+    def doc_strings( ) :
+        """
+        get documentation strings that match the
+        dictionary form of the statistics
+        """
+        
+        toReturn = { }
+        toReturn.update(GeneralInspectionStatistics.doc_strings())
+        toReturn.update(NotANumberInspectionStatistics.doc_strings())
+        toReturn.update(MissingValueInspectionStatistics.doc_strings())
+        toReturn.update(FiniteDataInspectionStatistics.doc_strings())
+        
+        return toReturn
+
 # -------------------------- documentation -----------------------------
 
 # TODO, can this be moved?
-- 
GitLab