compare.py 118 KB
Newer Older
(no author)'s avatar
(no author) committed
1
2
3
4
5
6
7
8
9
10
11
#!/usr/bin/env python
# encoding: utf-8
"""

Top-level routines to compare two files.


Created by rayg Apr 2009.
Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
"""

12
13
#from pprint import pprint, pformat

14
import os, sys, logging, datetime, glob, re
15
from numpy import *
16
import numpy
17
from urllib.parse import quote
(no author)'s avatar
(no author) committed
18

19
20
21
import locale
locale.setlocale(locale.LC_ALL,'') # Initialize our locale

22
import matplotlib
23
# this is a hack to keep glance from needing pyqt unless you run the gui
24
if "gui" in sys.argv[1:] :
25
    try :
Eva Schiffer's avatar
Eva Schiffer committed
26
        matplotlib.use('Qt5Agg')
27
28
        import glance.gui_controller as gui_control
    except ImportError :
Eva Schiffer's avatar
Eva Schiffer committed
29
        print ("*** Unable to import PyQt5. Please install PyQt5 and add it to your PYTHONPATH in order to use the Glance GUI. ***")
30
        raise
31
32
33
else :
    matplotlib.use('Agg')

34
import glance.io     as io
35
import glance.data   as dataobj
36
import glance.report as reportModule
37
import glance.stats  as statistics
38
import glance.plot   as plot
39
import glance.plotcreatefns as plotcreate
40
import glance.collocation   as collocation
41
import glance.config_organizer as config_organizer
42

43
from glance.util        import clean_path, rsync_or_copy_files, get_glance_version_string, get_run_identification_info, setup_dir_if_needed
44
from glance.load        import get_UV_info_from_magnitude_direction_info, load_variable_data, open_and_process_files, handle_lon_lat_info, handle_lon_lat_info_for_one_file, ValueErrorStringToFloat
45
46
from glance.lonlat_util import VariableComparisonError
from glance.constants   import *
47
from glance.gui_constants import A_CONST, B_CONST
48

49
LOG = logging.getLogger(__name__)
50

51
52
53
54
55
56
57
58
59
60
61
62
63
def _get_all_commands_help_string (commands_dict, ) :
    """
    given the dictonary of commands, compose the string with brief information about all of them
    """

    to_return = "Available commands in Glance:\n"

    for command_name in commands_dict :
        short_desc = commands_dict[command_name].__doc__.split('\n')[0]
        to_return += "\t%-16s %s\n" % (command_name, short_desc)

    return to_return

64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def _get_possible_files_from_dir (dir_path) :
    """given a path to a directory, return all the paths to files we think we can open in that directory

    """

    # find all the appropriate files in a_path
    possible_extensions = io.get_acceptable_file_extensions()
    found_files = set()
    for filepath in glob.iglob(os.path.join(dir_path, "**"), recursive=True, ):
        ext_txt = filepath.split(".")[-1]

        if ext_txt in possible_extensions:
            found_files.add(filepath)

    return found_files

80
def _match_files_from_dirs (a_path, b_path, strip_expressions=None, ) :
81
82
83
84
    """given two paths to directories, try to match up the files we can analyze in them

    """

85
86
87
    if strip_expressions is None :
        strip_expressions = [ ]

88
89
90
    # find all the files in the a path we might be able to open
    found_a_files = _get_possible_files_from_dir(a_path)

91
    LOG.debug("Found " + str(len(found_a_files)) + " possible file(s) in the A directory: ")
92
93
94
    for filepath in found_a_files :
        LOG.debug(filepath)

95
96
    """

97
98
99
100
101
102
103
104
105
    # TODO, when we get to python 3.9, we can use str.removeprefix but until then
    def _remove_prefix(text, prefix):
        if text.startswith(prefix):
            return text[len(prefix):]
        return None

    # test to see if there is a matching file in the b_path for each a_path file
    file_pairs = set()
    for a_filepath in found_a_files :
106
        inner_path = _remove_prefix(a_filepath, a_path)[1:] # for some reason this leaves a prefix / on the inner_path, so we need to remove that
107
108
109
        b_filepath = os.path.join(b_path, inner_path)
        if os.path.exists(b_filepath) :
            file_pairs.add((a_filepath, b_filepath,))
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
    """

    # find all the files in the b path we might be able to open
    found_b_files = _get_possible_files_from_dir(b_path)

    LOG.debug("Found " + str(len(found_b_files)) + " possible file(s) in the B directory: ")
    for filepath in found_a_files:
        LOG.debug(filepath)

    def strip_expressions_from_base (file_path, expressions,) :
        clean_name = os.path.basename(file_path)
        for expr in expressions :
            clean_name = re.sub(expr, '', clean_name)
        return clean_name

    # try to pair up our files if possible
    file_pairs = set()
    for a_filepath in found_a_files :
        clean_a = strip_expressions_from_base(a_filepath, strip_expressions,)
        for b_filepath in found_b_files :
            clean_b = strip_expressions_from_base(b_filepath, strip_expressions,)
            if clean_a == clean_b :
                file_pairs.add((a_filepath, b_filepath,))
133
134
135

    return file_pairs

136
# TODO, I'd like to move this into a different file at some point
137
def _get_name_info_for_variable (original_display_name, variable_run_info) :
138
139
140
141
142
143
144
145
146
147
148
149
150
    """
    based on the variable run info, figure out the various names for
    the variable and return them
    
    the various names are:
    
    technical_name -            the name the variable is listed under in the file
    b_variable_technical_name - the name the variable is listed under in the b file (may be the same as technical_name)
    explanation_name -          the more verbose name that will be shown to the user to identify the variable
    original_display_name -     the display name given by the user to describe the variable
    """
    
    # figure out the various name related info
151
    technical_name = variable_run_info[VARIABLE_TECH_NAME_KEY]
152
153
154
155
    explanation_name = technical_name # for now, will add to this later
    
    # if B has an alternate variable name, figure that out
    b_variable_technical_name = technical_name
156
157
    if VARIABLE_B_TECH_NAME_KEY in variable_run_info :
        b_variable_technical_name = variable_run_info[VARIABLE_B_TECH_NAME_KEY]
158
159
160
161
162
163
164
165
166
        # put both names in our explanation
        explanation_name = explanation_name + " / " + b_variable_technical_name
    
    # show both the display and current explanation names if they differ
    if not (original_display_name == explanation_name) :
        explanation_name = original_display_name + ' (' + explanation_name + ')'
    
    return technical_name, b_variable_technical_name, explanation_name

167
168
169
170
171
172
def collocate_to_file_library_call(a_path, b_path, var_list=None,
                                   options_set=None,
                                   # todo, this doesn't yet do anything
                                   do_document=False,
                                   # todo, the output channel does nothing at the moment
                                   output_channel=sys.stdout) :
173
    """
174
    this method handles the actual work of the collocating two files
175
176
177
178
    and can be used as a library routine.
    
    TODO, properly document the options
    """
179
180
181
182
183

    # set some values for defaults
    var_list = [ ] if var_list is None else var_list
    options_set = { } if options_set is None else options_set

184
    # load the user settings from either the command line or a user defined config file
185
186
187
    pathsTemp, runInfo, defaultValues, requestedNames, usedConfigFile = config_organizer.load_config_or_options(a_path, b_path,
                                                                                                                options_set,
                                                                                                                requestedVars = var_list)
188

189
    # deal with the input and output files
190
    setup_dir_if_needed(pathsTemp[OUT_FILE_KEY], "output")
191
    
192
    # make copies of the input files for colocation TODO, fix paths
193
194
195
    [pathsTemp[A_FILE_KEY], pathsTemp[B_FILE_KEY]] = rsync_or_copy_files ([pathsTemp[A_FILE_KEY], pathsTemp[B_FILE_KEY]],
                                                                          target_directory=pathsTemp[OUT_FILE_KEY],
                                                                          additionalFileNameSuffix='-collocated')
196

197
198
    # open the files
    LOG.info("Processing File A:")
199
    aFile = dataobj.FileInfo(pathsTemp[A_FILE_KEY], allowWrite=True)
200
    if aFile is None:
201
        LOG.error("Unable to continue with comparison because file a (" + pathsTemp[A_FILE_KEY] + ") could not be opened.")
202
203
        sys.exit(1)
    LOG.info("Processing File B:")
204
    bFile = dataobj.FileInfo(pathsTemp[B_FILE_KEY], allowWrite=True)
205
    if bFile is None:
206
        LOG.error("Unable to continue with comparison because file b (" + pathsTemp[B_FILE_KEY] + ") could not be opened.")
207
        sys.exit(1)
208

209
    # get information about the names the user requested
210
211
212
213
    finalNames, nameStats = config_organizer.resolve_names(aFile.file_object,
                                                           bFile.file_object,
                                                           defaultValues,
                                                           requestedNames,
214
215
                                                           usedConfigFile,
                                                           warnIfRequestedVarsUnavailable=runInfo[OPTIONS_WARN_MISSING_KEY],)
216

217
    # return for lon_lat_data variables will be in the form 
218
    #{LON_KEY: longitude_data,      LAT_KEY: latitude_data,      INVALID_MASK_KEY: spaciallyInvalidMaskData}
219
    # or { } if there is no lon/lat info
220
221
    lon_lat_data = { }
    try :
222
223
        lon_lat_data, _ = handle_lon_lat_info (runInfo, aFile, bFile, pathsTemp[OUT_FILE_KEY], should_check_equality=False,
                                               fullDPI=runInfo[DETAIL_DPI_KEY], thumbDPI=runInfo[THUMBNAIL_DPI_KEY])
224
    except ValueError as vle :
225
226
        LOG.error("Error while loading longitude or latitude: ")
        LOG.error(str(vle))
227
        exit(1)
228
    except VariableComparisonError as vce :
229
230
        LOG.error("Error while comparing longitude or latitude: ")
        LOG.error(str(vce))
231
        exit(1)
232
233
234
235
    
    # handle the longitude and latitude colocation
    LOG.info("Colocating raw longitude and latitude information")
    aColocationInfomation, bColocationInformation, totalNumberOfMatchedPoints = \
236
237
                    collocation.create_colocation_mapping_within_epsilon(lon_lat_data[A_FILE_KEY][LON_KEY], lon_lat_data[A_FILE_KEY][LAT_KEY],
                                                                         lon_lat_data[B_FILE_KEY][LON_KEY], lon_lat_data[B_FILE_KEY][LAT_KEY],
238
239
240
                                                                         runInfo[LON_LAT_EPSILON_KEY],
                                                                         invalidAMask=lon_lat_data[A_FILE_KEY][INVALID_MASK_KEY],
                                                                         invalidBMask=lon_lat_data[B_FILE_KEY][INVALID_MASK_KEY])
241
242
243
    (colocatedLongitude, colocatedLatitude, (numMultipleMatchesInA, numMultipleMatchesInB)), \
    (unmatchedALongitude, unmatchedALatitude), \
    (unmatchedBLongitude, unmatchedBLatitude) = \
244
245
                collocation.create_colocated_lonlat_with_lon_lat_colocation(aColocationInfomation, bColocationInformation,
                                                                            totalNumberOfMatchedPoints,
246
247
                                                                            lon_lat_data[A_FILE_KEY][LON_KEY], lon_lat_data[A_FILE_KEY][LAT_KEY],
                                                                            lon_lat_data[B_FILE_KEY][LON_KEY], lon_lat_data[B_FILE_KEY][LAT_KEY])
248
249
    
    # TODO, based on unmatched, issue warnings and record info in the file?
250
251
    LOG.debug("collocated shape of the longitude: " + str(colocatedLongitude.shape))
    LOG.debug("collocated shape of the latitude:  " + str(colocatedLatitude.shape))
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
    LOG.debug(str(numMultipleMatchesInA) + " lon/lat pairs contain A points used for multiple matches.")
    LOG.debug(str(numMultipleMatchesInB) + " lon/lat pairs contain B points used for multiple matches.")
    LOG.debug(str(len(unmatchedALatitude)) + " A lon/lat points could not be matched.")
    LOG.debug(str(len(unmatchedBLatitude)) + " B lon/lat points could not be matched.")
    
    # go through each of the possible variables in our files
    # and do our colocation for whichever ones we can
    for displayName in finalNames:
        
        # pull out the information for this variable analysis run
        varRunInfo = finalNames[displayName].copy()
        
        # get the various names
        technical_name, b_variable_technical_name, \
                explanationName = _get_name_info_for_variable(displayName, varRunInfo)
        
(no author)'s avatar
(no author) committed
268
        LOG.info('analyzing: ' + explanationName + ')')
269
270
        
        # load the variable data
271
272
273
274
275
276
277
278
279
280
281
282
        aData = load_variable_data(aFile.file_object, technical_name,
                                   dataFilter = varRunInfo[FILTER_FUNCTION_A_KEY] if FILTER_FUNCTION_A_KEY in varRunInfo else None,
                                   variableToFilterOn = varRunInfo[VAR_FILTER_NAME_A_KEY] if VAR_FILTER_NAME_A_KEY in varRunInfo else None,
                                   variableBasedFilter = varRunInfo[VAR_FILTER_FUNCTION_A_KEY] if VAR_FILTER_FUNCTION_A_KEY in varRunInfo else None,
                                   altVariableFileObject = dataobj.FileInfo(varRunInfo[VAR_FILTER_ALT_FILE_A_KEY]).file_object if VAR_FILTER_ALT_FILE_A_KEY in varRunInfo else None,
                                   fileDescriptionForDisplay = "file A")
        bData = load_variable_data(bFile.file_object, b_variable_technical_name,
                                   dataFilter = varRunInfo[FILTER_FUNCTION_B_KEY] if FILTER_FUNCTION_B_KEY in varRunInfo else None,
                                   variableToFilterOn = varRunInfo[VAR_FILTER_NAME_B_KEY] if VAR_FILTER_NAME_B_KEY in varRunInfo else None,
                                   variableBasedFilter = varRunInfo[VAR_FILTER_FUNCTION_B_KEY] if VAR_FILTER_FUNCTION_B_KEY in varRunInfo else None,
                                   altVariableFileObject = dataobj.FileInfo(varRunInfo[VAR_FILTER_ALT_FILE_B_KEY]).file_object if VAR_FILTER_ALT_FILE_B_KEY in varRunInfo else None,
                                   fileDescriptionForDisplay = "file B")
283
        
284
        # collocate the data for this variable if we have longitude/latitude data
Eva Schiffer's avatar
Eva Schiffer committed
285
        if (len(lon_lat_data) > 0) and runInfo[DO_COLOCATION_KEY] :
286
            
287
            # figure out the invalid masks
288
289
            invalidA = lon_lat_data[A_FILE_KEY][INVALID_MASK_KEY] | (aData == varRunInfo[FILL_VALUE_KEY])
            invalidB = lon_lat_data[B_FILE_KEY][INVALID_MASK_KEY] | (bData == varRunInfo[FILL_VALUE_ALT_IN_B_KEY])
290
            
291
292
293
294
            # match up our points in A and B
            (aData, bData, (numberOfMultipleMatchesInA, numberOfMultipleMatchesInB)), \
            (aUnmatchedData,             unmatchedALongitude, unmatchedALatitude), \
            (bUnmatchedData,             unmatchedBLongitude, unmatchedBLatitude) = \
295
296
297
                    collocation.create_colocated_data_with_lon_lat_colocation(aColocationInfomation, bColocationInformation,
                                                                              colocatedLongitude, colocatedLatitude,
                                                                              aData, bData,
298
299
                                                                              missingData=varRunInfo[FILL_VALUE_KEY],
                                                                              altMissingDataInB=varRunInfo[FILL_VALUE_ALT_IN_B_KEY],
300
301
                                                                              invalidAMask=invalidA,
                                                                              invalidBMask=invalidB)
302
303
304
305
306
307
            
            LOG.debug(str(numberOfMultipleMatchesInA) + " data pairs contain A data points used for multiple matches.")
            LOG.debug(str(numberOfMultipleMatchesInB) + " data pairs contain B data points used for multiple matches.")
            LOG.debug(str(len(aUnmatchedData)) + " A data points could not be matched.")
            LOG.debug(str(len(bUnmatchedData)) + " B data points could not be matched.")
            
308
            # save the collocated data information in the output files
309
310
            
            # all the a file information
311
312
            temp_var_name = technical_name + '-collocated'
            variableObjTemp = aFile.file_object.create_new_variable(  temp_var_name,
313
314
315
                                                                      missingvalue = varRunInfo[FILL_VALUE_KEY] if FILL_VALUE_KEY in varRunInfo else None,
                                                                      data = aData,
                                                                      variabletocopyattributesfrom = technical_name)
316
            aFile.file_object.add_attribute_data_to_variable(temp_var_name, 'number of multiple matches',
317
                                                             numberOfMultipleMatchesInA, variableObject=variableObjTemp,)
318
            aFile.file_object.add_attribute_data_to_variable(temp_var_name, 'number of unmatched points',
319
                                                             len(aUnmatchedData), variableObject=variableObjTemp,)
320
321
            
            # all the b file information
322
323
            temp_var_name = b_variable_technical_name + '-collocated'
            variableObjTemp = bFile.file_object.create_new_variable(  temp_var_name,
324
325
326
                                                                      missingvalue = varRunInfo[FILL_VALUE_ALT_IN_B_KEY] if FILL_VALUE_ALT_IN_B_KEY in varRunInfo else None,
                                                                      data = bData,
                                                                      variabletocopyattributesfrom = b_variable_technical_name)
327
            bFile.file_object.add_attribute_data_to_variable(temp_var_name, 'number of multiple matches',
328
                                                             numberOfMultipleMatchesInB, variableObject=variableObjTemp,)
329
            bFile.file_object.add_attribute_data_to_variable(temp_var_name, 'number of unmatched points',
330
                                                             len(bUnmatchedData),  variableObject=variableObjTemp,)
331
            
332
            # Future, do we want any any additional statistics?
333
334
335
336
337
338
339
            
        else :
            LOG.debug(explanationName + " was not selected for colocation and will be ignored.")
        
    # the end of the loop to examine all the variables
    
    # we're done with the files, so close them up
340
341
    aFile.file_object.close()
    bFile.file_object.close()
342
343
344
    
    return

345
346
def reportGen_raw_data_simple_call (aData, bData, variableDisplayName,
                                    epsilon=0.0, missingValue=None,
347
                                    useThreads=False, includeImages=True,
348
349
350
351
352
353
                                    outputDirectory="./") :
    """
    Generate a report for a single variable given raw data and
    some minimal control settings. This method will also generate
    images for the report if includeImages is True.
    """
354

355
    LOG.warning("This utility function is now called report_raw_data_single_var_call. Please use the new name in future.")
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
    return report_raw_data_single_var_call (aData, bData, variableDisplayName,
                                            epsilon, missingValue,
                                            useThreads, includeImages,
                                            outputDirectory,)

def report_raw_data_single_var_call(aData, bData, variableDisplayName,
                                    epsilon=0.0, missingValue=None,
                                    useThreads=False, includeImages=True,
                                    outputDirectory="./"):
    """
    Generate a report for a single variable given raw data and
    some minimal control settings. This method will also generate
    images for the report if includeImages is True.
    """

371
    LOG.info("Setting up basic information")
372

373
374
375
376
    aData = array(aData)
    bData = array(bData)
    
    # set up the run info
377
    runInfo = config_organizer.get_simple_options_dict( )
378
    runInfo[DO_MAKE_IMAGES_KEY]        = includeImages
379
380
381
    runInfo[DO_MAKE_REPORT_KEY]        = True
    runInfo[DO_MAKE_FORKS_KEY]         = False
    runInfo[DO_CLEAR_MEM_THREADED_KEY] = useThreads
382
383
    
    # set up the variable specific info
384
385
386
387
388
    variableSettings = config_organizer.get_simple_variable_defaults( )
    variableSettings[EPSILON_KEY]             = epsilon
    variableSettings[FILL_VALUE_KEY]          = missingValue
    variableSettings[FILL_VALUE_ALT_IN_B_KEY] = missingValue
    variableSettings[VARIABLE_TECH_NAME_KEY]  = variableDisplayName
389
390
    
    # hang onto identification info
391
    runInfo[MACHINE_INFO_KEY], runInfo[USER_INFO_KEY], runInfo[GLANCE_VERSION_INFO_KEY] = get_run_identification_info()
392
393
    
    # deal with the output directories
394
395
    outputDirectory = clean_path(outputDirectory)
    setup_dir_if_needed(outputDirectory, "output")
396
397
398
399
400
401
402
    
    LOG.info("Analyzing " + variableDisplayName)
    
    # if things are the same shape, analyze them and make our images
    if aData.shape == bData.shape :
        
        # setup some values in the variable settings for use in the report
403
404
405
        variableSettings[VARIABLE_DIRECTORY_KEY] = outputDirectory
        variableSettings[VAR_REPORT_PATH_KEY]    = quote(os.path.join(variableDisplayName, 'index.html'))
        variableSettings[DOCUMENTATION_PATH_KEY] = quote(os.path.join(outputDirectory, './' + 'doc.html')) 
406
407
        
        # calculate the variable statistics
408
409
410
        variable_stats = statistics.StatisticalAnalysis.withSimpleData(aData, bData,
                                                                       missingValue, missingValue,
                                                                       None, None,
411
                                                                       epsilon, None)
412
413
        
        # add a little additional info
414
        variableSettings[TIME_INFO_KEY] = datetime.datetime.ctime(datetime.datetime.now()) # TODO, move this to util?
415
        didPass, epsilon_failed_fraction, \
416
417
418
419
420
421
422
423
424
425
426
        non_finite_fail_fraction, r_squared_value \
            = variable_stats.check_pass_or_fail(epsilon_failure_tolerance=variableSettings[EPSILON_FAIL_TOLERANCE_KEY] if EPSILON_FAIL_TOLERANCE_KEY in variableSettings else numpy.nan,
                                                epsilon_failure_tolerance_default=runInfo[EPSILON_FAIL_TOLERANCE_KEY],
                                                non_finite_data_tolerance=variableSettings[NONFINITE_TOLERANCE_KEY]  if NONFINITE_TOLERANCE_KEY  in variableSettings else numpy.nan,
                                                non_finite_data_tolerance_default=runInfo[NONFINITE_TOLERANCE_KEY],
                                                total_data_failure_tolerance=variableSettings[TOTAL_FAIL_TOLERANCE_KEY] if TOTAL_FAIL_TOLERANCE_KEY in variableSettings else numpy.nan,
                                                total_data_failure_tolerance_default=runInfo[TOTAL_FAIL_TOLERANCE_KEY],
                                                min_acceptable_r_squared=variableSettings[MIN_OK_R_SQUARED_COEFF_KEY] if MIN_OK_R_SQUARED_COEFF_KEY in variableSettings else numpy.nan,
                                                min_acceptable_r_squared_default=runInfo[MIN_OK_R_SQUARED_COEFF_KEY],
                                                )
        variableSettings[DID_VARIABLE_PASS_KEY] = didPass
427
428
429
        
        # to hold the names of any images created
        image_names = {
430
431
                        ORIGINAL_IMAGES_KEY: [ ],
                        COMPARED_IMAGES_KEY: [ ]
432
433
434
435
436
437
                        }
        
        # if we need the images, make them now
        if includeImages :
            
            LOG.info("Plotting images for " + variableDisplayName)
438
439
440
441

            # the various functions that will create our plots
            plotFunctionGenerationObjects = [plotcreate.BasicComparisonPlotsFunctionFactory(), # the function to make the histogram and scatter plot
                                             plotcreate.IMShowPlotFunctionFactory(), ] # the function to do basic imshow images
442
443
            
            # plot our lon/lat related info
444
            image_names[ORIGINAL_IMAGES_KEY], image_names[COMPARED_IMAGES_KEY] = \
445
446
447
448
449
450
451
452
453
454
455
456
                plot.plot_and_save_comparison_figures \
                        (aData, bData,
                         plotFunctionGenerationObjects,
                         outputDirectory,
                         variableDisplayName,
                         epsilon,
                         missingValue,
                         lonLatDataDict=None,
                         doFork=False,
                         shouldClearMemoryWithThreads=useThreads,
                         shouldUseSharedRangeForOriginal=True)
            
(no author)'s avatar
(no author) committed
457
            LOG.info("\tfinished creating figures for: " + variableDisplayName)
458
459
        
        # create a temporary files object
460
461
462
463
464
465
466
467
468
469
470
471
        files = {
                 A_FILE_TITLE_KEY: {
                                    PATH_KEY:          "raw data input",
                                    LAST_MODIFIED_KEY: "unknown",
                                    MD5SUM_KEY:        "n/a"
                                    },
                 B_FILE_TITLE_KEY: {
                                    PATH_KEY:          "raw data input",
                                    LAST_MODIFIED_KEY: "unknown",
                                    MD5SUM_KEY:        "n/a"
                                    }
                }
472
473
        
        # create our report 
474
        LOG.info ('Generating report for: ' + variableDisplayName)
475
476
477
478
479
480
481
482
        reportModule.generate_and_save_variable_report( files,
                                                        variableSettings, runInfo,
                                                        variable_stats.dictionary_form(),
                                                        { },
                                                        image_names,
                                                        outputDirectory, "index.html",
                                                        definitions=statistics.StatisticalAnalysis.doc_strings(grouped=True),
                                                        )
483
484
        
        # make the glossary page
(no author)'s avatar
(no author) committed
485
        LOG.info ('Generating glossary page')
486
        reportModule.generate_and_save_doc_page(statistics.StatisticalAnalysis.doc_strings(), outputDirectory)
487
488
489
490
491
492
        
    else :
        message = (variableDisplayName + ' ' + 
                'could not be compared. This may be because the data for this variable does not match in shape ' +
                'between the two files (file A data shape: ' + str(aData.shape) + '; file B data shape: '
                + str(bData.shape) + ').')
493
        LOG.warning(message)
494

495
496
497
498
499
500
def report_one_input_library_call (a_path, var_list=None,
                                   options_set=None,
                                   # todo, this doesn't yet do anything
                                   do_document=False,
                                   # todo, the output channel does nothing at the moment
                                   output_channel=sys.stdout) :
501
    """
502
503
    this method handles the actual work of making a report about a single file
    (previously known as an inspection report or an inspect report)
504
505
506
507
508
509
510
511
512
    and can also be used as a library routine, pass in the slightly parsed
    command line input, or call it as a library function... be sure to fill
    out the options
    
    TODO at the moment the options are very brittle and need to be fully filled
    or this method will fail badly (note: the addition of some glance defaults
    has minimized the problem, but you still need to be careful when dealing with
    optional boolean values. this needs more work.)
    """
513
514
515
516
517

    # set some values for defaults
    var_list = [ ] if var_list is None else var_list
    options_set = { } if options_set is None else options_set

518
    # load the user settings from either the command line or a user defined config file
519
520
521
    pathsTemp, runInfo, defaultValues, requestedNames, usedConfigFile = config_organizer.load_config_or_options(a_path, None, # there is no B path
                                                                                                                options_set,
                                                                                                                requestedVars = var_list)
522
    
523
    # information for debugging purposes
524
525
526
527
528
    LOG.debug('paths: ' +           str(pathsTemp))
    LOG.debug('defaults: ' +        str(defaultValues))
    LOG.debug('run information: ' + str(runInfo))
    
    # if we wouldn't generate anything, just stop now
529
    if (not runInfo[DO_MAKE_IMAGES_KEY]) and (not runInfo[DO_MAKE_REPORT_KEY]) :
530
531
        LOG.warning("User selection of no image generation and no report generation will result in no " +
                    "content being generated. Aborting generation function.")
532
533
534
        return
    
    # hang onto info to identify who/what/when/where/etc. the report is being run by/for 
535
    runInfo[MACHINE_INFO_KEY], runInfo[USER_INFO_KEY], runInfo[GLANCE_VERSION_INFO_KEY] = get_run_identification_info()
536
537
    
    # deal with the input and output files
538
    setup_dir_if_needed(pathsTemp[OUT_FILE_KEY], "output")
539
540
541
    # open the file
    files = {}
    LOG.info("Processing File A:")
542
543
    aFile = dataobj.FileInfo(pathsTemp[A_FILE_KEY])
    files[A_FILE_TITLE_KEY] = aFile.get_old_info_dictionary() # FUTURE move to actually using the file object to generate the report
544
    if aFile.file_object is None:
545
        LOG.error("Unable to continue with examination because file (" + pathsTemp[A_FILE_KEY] + ") could not be opened.")
546
        sys.exit(1)
547

548
    # get information about the names the user requested
549
    nameStats = {}
550
551
552
    finalNames, nameStats[POSSIBLE_NAMES_KEY] = config_organizer.resolve_names_one_file(aFile.file_object,
                                                                                        defaultValues, # TODO, might need a different default set
                                                                                        requestedNames,
553
554
                                                                                        usedConfigFile,
                                                                                        warnIfRequestedVarsUnavailable=runInfo[OPTIONS_WARN_MISSING_KEY],)
555
556
557
558
559

    # get info on the global attributes
    globalAttrInfo = {}
    globalAttrInfo[A_FILE_TITLE_KEY] = aFile.file_object.get_global_attributes()

560
    LOG.debug("output dir: " + str(pathsTemp[OUT_FILE_KEY]))
561
562
    
    # return for lon_lat_data variables will be in the form 
563
    #{LON_KEY: longitude_data,      LAT_KEY: latitude_data,      INVALID_MASK_KEY: spaciallyInvalidMaskData}
564
565
566
567
    # or { } if there is no lon/lat info
    lon_lat_data = { }
    spatialInfo  = { }
    try :
568
        lon_lat_data, spatialInfo = handle_lon_lat_info_for_one_file (runInfo, aFile)
569
    except ValueError as vle :
570
571
        LOG.error("Error while loading longitude or latitude: ")
        LOG.error(str(vle))
572
573
        exit(1)
    
574
    # if there is an approved lon/lat shape, hang on to that for future variable data shape checks
575
    good_shape_from_lon_lat = None
Eva Schiffer's avatar
Eva Schiffer committed
576
    if len(lon_lat_data) > 0:
577
        good_shape_from_lon_lat = lon_lat_data[LON_KEY].shape
578
579
580
    
    # go through each of the possible variables in our files
    # and make a report section with images for whichever ones we can
581
    variableInspections = { }
582
583
584
585
586
587
    for displayName in finalNames:
        
        # pull out the information for this variable analysis run
        varRunInfo = finalNames[displayName].copy()
        
        # get the various names
588
        technical_name, _, explanationName = _get_name_info_for_variable(displayName, varRunInfo)
589
        
590
591
        # make sure that it's possible to load this variable
        if not(aFile.file_object.is_loadable_type(technical_name)) :
592
593
            LOG.warning(displayName + " is of a type that cannot be loaded using current file handling libraries included with Glance." +
                        " Skipping " + displayName + ".")
594
595
            continue
        
596
597
        LOG.info('analyzing: ' + explanationName)
        
598
599
600
601
602
603
604
605
        # load the variable data if we can
        try :
            aData = load_variable_data(aFile.file_object, technical_name,
                                       dataFilter = varRunInfo[FILTER_FUNCTION_A_KEY] if FILTER_FUNCTION_A_KEY in varRunInfo else None,
                                       variableToFilterOn = varRunInfo[VAR_FILTER_NAME_A_KEY] if VAR_FILTER_NAME_A_KEY in varRunInfo else None,
                                       variableBasedFilter = varRunInfo[VAR_FILTER_FUNCTION_A_KEY] if VAR_FILTER_FUNCTION_A_KEY in varRunInfo else None,
                                       altVariableFileObject = dataobj.FileInfo(varRunInfo[VAR_FILTER_ALT_FILE_A_KEY]).file_object if VAR_FILTER_ALT_FILE_A_KEY in varRunInfo else None,
                                       fileDescriptionForDisplay = "file A")
606
        except Exception as ex :
607
            LOG.warning(displayName + " data could not be loaded. This variable will not be included in the output report. " +
608
                        "The following error was encountered while trying to load this variable:\n" + str(ex))
609
            continue
610
611
612
613
614

        # get variable attribute information for this variable
        attributeInfo = { }
        attributeInfo[A_FILE_TITLE_KEY] = aFile.file_object.get_variable_attributes(technical_name)

615
        # pre-check if this data should be plotted and if it should be compared to the longitude and latitude
616
        include_images_for_this_variable = (DO_MAKE_IMAGES_KEY not in runInfo) or (runInfo[DO_MAKE_IMAGES_KEY])
617
618
        if DO_MAKE_IMAGES_KEY in varRunInfo :
            include_images_for_this_variable = varRunInfo[DO_MAKE_IMAGES_KEY]
Eva Schiffer's avatar
Eva Schiffer committed
619
        do_not_test_with_lon_lat = (not include_images_for_this_variable) or (len(lon_lat_data) <= 0)
620
621
        
        # handle vector data
622
        isVectorData = (MAGNITUDE_VAR_NAME_KEY in varRunInfo)  and (DIRECTION_VAR_NAME_KEY  in varRunInfo)
623
        
624
625
        # check if this data can be examined 
        # (don't compare lon/lat sizes if we won't be plotting)
626
        if do_not_test_with_lon_lat or (aData.shape == good_shape_from_lon_lat) :
627
628
629
            
            # check to see if there is a directory to put information about this variable in,
            # if not then create it
630
631
632
633
634
            variableDir = os.path.join(pathsTemp[OUT_FILE_KEY], './' + displayName)
            varRunInfo[VARIABLE_DIRECTORY_KEY] = variableDir
            varRunInfo[VAR_REPORT_PATH_KEY]    = quote(os.path.join(displayName, 'index.html'))
            LOG.debug ("Directory selected for variable information: " + varRunInfo[VAR_REPORT_PATH_KEY])
            setup_dir_if_needed(variableDir, "variable")
635
636
637
            
            # form the doc and config paths relative to where the variable is
            upwardPath = './'
638
            for num in range(len(displayName.split('/'))) : # TODO this is not general to windows
639
                upwardPath = os.path.join(upwardPath, '../')
640
641
642
            varRunInfo[DOCUMENTATION_PATH_KEY] = quote(os.path.join(upwardPath, 'doc.html'))
            if CONFIG_FILE_NAME_KEY in runInfo :
                varRunInfo[CONFIG_FILE_PATH_KEY] = quote(os.path.join(upwardPath, runInfo[CONFIG_FILE_NAME_KEY]))
643
            
644
            # figure out the masks we want, and then do our statistical analysis
645
            mask_a_to_use = None if do_not_test_with_lon_lat else lon_lat_data[INVALID_MASK_KEY]
646
647
            
            variable_stats = statistics.StatisticalInspectionAnalysis.withSimpleData(aData,
648
                                                                                     missingValue=varRunInfo[FILL_VALUE_KEY],
649
                                                                                     ignoreMask=mask_a_to_use).dictionary_form()
650
651
            
            # add a little additional info to our variable run info before we squirrel it away
652
            varRunInfo[TIME_INFO_KEY] = datetime.datetime.ctime(datetime.datetime.now())  # todo is this needed?
653
654
655
            
            # to hold the names of any images created
            image_names = {
656
657
                            ORIGINAL_IMAGES_KEY: [ ],
                            COMPARED_IMAGES_KEY: [ ]
658
659
660
                            }
            
            # create the images for this variable
661
            if include_images_for_this_variable :
662
663
664
                
                plotFunctionGenerationObjects = [ ]
                
665
666
667
                # we are always going to want to draw a basic histogram of the data values to tell which
                # occur most frequently
                plotFunctionGenerationObjects.append(plotcreate.DataHistogramPlotFunctionFactory())
Eva Schiffer's avatar
Eva Schiffer committed
668

669
670
                # if it's vector data with longitude and latitude, quiver plot it on the Earth
                if isVectorData and (not do_not_test_with_lon_lat) :
Eva Schiffer's avatar
Eva Schiffer committed
671
                    plotFunctionGenerationObjects.append(plotcreate.InspectMappedQuiverPlotFunctionFactory())
672
673
                
                # if the data is one dimensional we can plot it as lines
674
                elif   len(aData.shape) == 1 :
675
676
677
                    plotFunctionGenerationObjects.append(plotcreate.InspectLinePlotsFunctionFactory())
                
                # if the data is 2D we have some options based on the type of data
678
                elif len(aData.shape) == 2 :
679
                    
680
                    # if the data is not mapped to a longitude and latitude, just show it as an image
681
                    if do_not_test_with_lon_lat :
682
                        plotFunctionGenerationObjects.append(plotcreate.InspectIMShowPlotFunctionFactory())
683
                    
684
685
686
                    # if it's 2D and mapped to the Earth, contour plot it on the earth
                    else :
                        plotFunctionGenerationObjects.append(plotcreate.InspectMappedContourPlotFunctionFactory())
687
688
                
                # if there's magnitude and direction data, figure out the u and v, otherwise these will be None
689
690
691
692
                aUData, aVData = get_UV_info_from_magnitude_direction_info (aFile.file_object,
                                                                            varRunInfo[MAGNITUDE_VAR_NAME_KEY] if (MAGNITUDE_VAR_NAME_KEY in varRunInfo)   else None,
                                                                            varRunInfo[DIRECTION_VAR_NAME_KEY] if (DIRECTION_VAR_NAME_KEY in varRunInfo)   else None,
                                                                            lon_lat_data[INVALID_MASK_KEY]     if (INVALID_MASK_KEY       in lon_lat_data) else None )
693
                
694
                # plot our images
695
                image_names[ORIGINAL_IMAGES_KEY], image_names[COMPARED_IMAGES_KEY] = \
696
                    plot.plot_and_save_comparison_figures \
697
                            (aData, None, # there is no b data
698
                             plotFunctionGenerationObjects,
699
                             varRunInfo[VARIABLE_DIRECTORY_KEY],
700
                             displayName,
701
                             None, # there is no epsilon
702
                             varRunInfo[FILL_VALUE_KEY],
703
                             lonLatDataDict=lon_lat_data,
704
705
706
707
708
709
                             dataRanges     = varRunInfo[DISPLAY_RANGES_KEY]       if DISPLAY_RANGES_KEY       in varRunInfo else None,
                             dataRangeNames = varRunInfo[DISPLAY_RANGE_NAMES_KEY]  if DISPLAY_RANGE_NAMES_KEY  in varRunInfo else None,
                             dataColors     = varRunInfo[DISPLAY_RANGE_COLORS_KEY] if DISPLAY_RANGE_COLORS_KEY in varRunInfo else None,
                             doFork=runInfo[DO_MAKE_FORKS_KEY],
                             shouldClearMemoryWithThreads=runInfo[DO_CLEAR_MEM_THREADED_KEY],
                             shouldUseSharedRangeForOriginal=runInfo[USE_SHARED_ORIG_RANGE_KEY],
710
711
                             doPlotSettingsDict = varRunInfo,
                             aUData=aUData, aVData=aVData,
712
713
714
                             fullDPI=       runInfo[DETAIL_DPI_KEY],
                             thumbDPI=      runInfo[THUMBNAIL_DPI_KEY],
                             units_a=       varRunInfo[VAR_UNITS_A_KEY] if VAR_UNITS_A_KEY in varRunInfo else None,
715
                             useBData=False,
716
                             histRange=varRunInfo[HISTOGRAM_RANGE_KEY] if HISTOGRAM_RANGE_KEY in varRunInfo else None)
717
718
719
720
                
                LOG.info("\tfinished creating figures for: " + explanationName)
            
            # create the report page for this variable
721
            if runInfo[DO_MAKE_REPORT_KEY] :
722
                
723
724
                # hang on to some info on our variable
                variableInspections[displayName] = {
725
                                                    VARIABLE_RUN_INFO_KEY: varRunInfo
726
727
                                                    }
                
728
729
                LOG.info ('\tgenerating report for: ' + explanationName)
                reportModule.generate_and_save_inspect_variable_report(files, varRunInfo, runInfo,
730
                                                                 variable_stats, spatialInfo, image_names,
731
                                                                 varRunInfo[VARIABLE_DIRECTORY_KEY], "index.html",
732
733
                                                                 variableAttrs=attributeInfo,
                                                                 definitions=statistics.StatisticalInspectionAnalysis.doc_strings(),)
734
        
735
        # if we can't do anything with the variable, we should tell the user 
736
        else :
737
738
739
            message = (explanationName + ' could not be examined. '
                     + 'This may be because the data for this variable (data shape: '
                     + str(aData.shape) + ') does not match the shape of the selected '
740
741
                     + 'longitude ' + str(good_shape_from_lon_lat) + ' and '
                     + 'latitude '  + str(good_shape_from_lon_lat) + ' variables.')
742
            LOG.warning(message)
743
744
745
746
        
    # the end of the loop to examine all the variables
    
    # generate our general report pages once we've analyzed all the variables
747
    if runInfo[DO_MAKE_REPORT_KEY] :
748
749
        
        # get the current time
750
        runInfo[TIME_INFO_KEY] = datetime.datetime.ctime(datetime.datetime.now())
751
752
        
        # TODO, create a new report generation function here
753
754
        # make the main summary report
        LOG.info ('generating summary report')
755
        reportModule.generate_and_save_inspection_summary_report (files,
756
                                                            pathsTemp[OUT_FILE_KEY], 'index.html',
757
758
759
                                                            runInfo,
                                                            variableInspections,
                                                            spatialInfo,
760
761
                                                            nameStats,
                                                            globalAttrs=globalAttrInfo,)
762
763
764
        
        # make the glossary
        LOG.info ('generating glossary')
765
        reportModule.generate_and_save_doc_page(statistics.StatisticalInspectionAnalysis.doc_strings(), pathsTemp[OUT_FILE_KEY])
766
    
767
    return 0
768

769
770
771
772
773
774
775
def report_two_inputs_library_call (a_path, b_path, var_list=None,
                                    options_set=None,
                                    # todo, this doesn't yet do anything
                                    do_document=False,
                                    # todo, the output channel does nothing at the moment
                                    output_channel=sys.stdout,
                                    do_return_summary_info=False, ) :
776
    """
777
    this method handles the actual work of making a report comparing two input files
778
779
780
    and can also be used as a library routine, pass in the slightly parsed
    command line input, or call it as a library function... be sure to fill
    out the options
781

782
    TODO at the moment the options are very brittle and need to be fully filled
783
784
785
    or this method will fail badly (note: the addition of some glance defaults
    has minimized the problem, but you still need to be careful when dealing with
    optional boolean values. this needs more work.)
786
787
788

    do_return_summary_info tells us if we also need to return the info needed to make
    the concise summary page with our return code
789
    """
790
791
792
793

    # set some values for defaults
    var_list = [ ] if var_list is None else var_list
    options_set = { } if options_set is None else options_set
794
    
(no author)'s avatar
(no author) committed
795
796
797
    # have all the variables passed test criteria set for them?
    # if no criteria were set then this will be true
    didPassAll = True
798
    do_pass_fail = options_set[DO_TEST_PASSFAIL_KEY] # todo, this is a temporary hack, should be loaded with other options
(no author)'s avatar
(no author) committed
799
    
800
    # load the user settings from either the command line or a user defined config file
801
802
803
    pathsTemp, runInfo, defaultValues, requestedNames, usedConfigFile = config_organizer.load_config_or_options(a_path, b_path,
                                                                                                                options_set,
                                                                                                                requestedVars = var_list)
804

805
806
807
808
809
810
    # note some of this information for debugging purposes
    LOG.debug('paths: ' +           str(pathsTemp))
    LOG.debug('defaults: ' +        str(defaultValues))
    LOG.debug('run information: ' + str(runInfo))
    
    # if we wouldn't generate anything, just stop now
811
    if (not runInfo[DO_MAKE_IMAGES_KEY]) and (not runInfo[DO_MAKE_REPORT_KEY]) :
812
813
        LOG.warning("User selection of no image generation and no report generation will result in no " +
                    "content being generated. Aborting generation function.")
814
815
816
        if do_pass_fail :
            return 0 # nothing went wrong, we just had nothing to do!
        else :
817
            return 0
818
    
819
    # hang onto info to identify who/what/when/where/etc. the report is being run by/for 
820
    runInfo[MACHINE_INFO_KEY], runInfo[USER_INFO_KEY], runInfo[GLANCE_VERSION_INFO_KEY] = get_run_identification_info()
821
822
    
    # deal with the input and output files
823
    setup_dir_if_needed(pathsTemp[OUT_FILE_KEY], "output")
824
825
826
    # open the files
    files = {}
    LOG.info("Processing File A:")
827
828
    aFile = dataobj.FileInfo(pathsTemp[A_FILE_KEY])
    files[A_FILE_TITLE_KEY] = aFile.get_old_info_dictionary() # FUTURE move to actually using the file object to generate the report
829
    if aFile.file_object is None:
830
        LOG.error("Unable to continue with comparison because file a (" + pathsTemp[A_FILE_KEY] + ") could not be opened.")
831
832
        sys.exit(1)
    LOG.info("Processing File B:")
833
834
    bFile = dataobj.FileInfo(pathsTemp[B_FILE_KEY]) 
    files[B_FILE_TITLE_KEY] = bFile.get_old_info_dictionary() # FUTURE move to actually using the file object to generate the report
835
    if bFile.file_object is None:
836
        LOG.error("Unable to continue with comparison because file b (" + pathsTemp[B_FILE_KEY] + ") could not be opened.")
837
838
839
        sys.exit(1)
    
    # get information about the names the user requested
840
841
842
843
    finalNames, nameStats = config_organizer.resolve_names(aFile.file_object,
                                                           bFile.file_object,
                                                           defaultValues,
                                                           requestedNames,
844
845
846
                                                           usedConfigFile,
                                                           warnIfRequestedVarsUnavailable=
                                                                runInfo[OPTIONS_WARN_MISSING_KEY],)
847
848
849
850
851
852

    # get info on the global attributes
    globalAttrInfo = {}
    globalAttrInfo[A_FILE_TITLE_KEY] = aFile.file_object.get_global_attributes()
    globalAttrInfo[B_FILE_TITLE_KEY] = bFile.file_object.get_global_attributes()

853
    LOG.debug("output dir: " + str(pathsTemp[OUT_FILE_KEY]))
854
    
855
    # return for lon_lat_data variables will be in the form 
856
    #{LON_KEY: longitude_data,      LAT_KEY: latitude_data,      INVALID_MASK_KEY: spaciallyInvalidMaskData}
857
    # or { } if there is no lon/lat info
858
859
860
    lon_lat_data = { }
    spatialInfo  = { }
    try :
861
862
863
        lon_lat_data, spatialInfo = handle_lon_lat_info (runInfo, aFile, bFile, pathsTemp[OUT_FILE_KEY],
                                                         should_make_images = runInfo[DO_MAKE_IMAGES_KEY],
                                                         fullDPI=runInfo[DETAIL_DPI_KEY], thumbDPI=runInfo[THUMBNAIL_DPI_KEY])
864
    except ValueError as vle :
865
866
        LOG.error("Error while loading longitude or latitude: ")
        LOG.error(str(vle))
867
        exit(1)
868
    except VariableComparisonError as vce :
869
870
        LOG.error("Error while comparing longitude or latitude: ")
        LOG.error(str(vce))
871
        exit(1)
Eva Schiffer's avatar
Eva Schiffer committed
872

873
874
    # if there is an approved lon/lat shape, hang on to that for future checks
    good_shape_from_lon_lat = None
Eva Schiffer's avatar
Eva Schiffer committed
875
    if len(lon_lat_data) > 0:
876
        good_shape_from_lon_lat = lon_lat_data[COMMON_KEY][LON_KEY].shape
877
    
878
879
    # this will hold information for the summary report
    # it will be in the form
880
881
882
883
884
885
    # [displayName] =  {
    #                    PASSED_EPSILON_PERCENT_KEY: percent ok with this epsilon,
    #                    FINITE_SIMILAR_PERCENT_KEY: percent with the same finiteness,
    #                    R_SQUARED_COEFF_VALUE_KEY:  the r squared correlation coefficient,
    #                    VARIABLE_RUN_INFO_KEY:      the detailed variable run information
    #                    }
886
887
888
    variableComparisons = { }
    # we will also be hanging on to some variable stats for the concise reports
    variableStatsCollection = { }
889
890
891
    
    # go through each of the possible variables in our files
    # and make a report section with images for whichever ones we can
892
    for displayName in finalNames:
893
894
895
        try:
            # pull out the information for this variable analysis run
            varRunInfo = finalNames[displayName].copy()
896
            
897
898
899
            # get the various names
            technical_name, b_variable_technical_name, \
                    explanationName = _get_name_info_for_variable(displayName, varRunInfo)
900
            
901
902
            # make sure that it's possible to load this variable
            if not(aFile.file_object.is_loadable_type(technical_name)) or not(bFile.file_object.is_loadable_type(b_variable_technical_name)) :
903
904
                LOG.warning(displayName + " is of a type that cannot be loaded using current file handling libraries included with Glance." +
                            " Skipping " + displayName + ".")
905
                continue
906
            
907
            LOG.info('analyzing: ' + explanationName)
908
            
909
            # load the variable data
910
911
912
913
914
915
916
917
918
919
920
921
922
            try:
                aData = load_variable_data(aFile.file_object, technical_name,
                                           dataFilter = varRunInfo[FILTER_FUNCTION_A_KEY] if FILTER_FUNCTION_A_KEY in varRunInfo else None,