Newer
Older
plotFunctionGenerationObjects.append(plotcreate.BasicComparisonPlotsFunctionFactory())
(no author)
committed
# if the bin and tuple are defined, try to analyze the data as complex
# multidimentional information requiring careful sampling
if (BIN_INDEX_KEY in varRunInfo) and (TUPLE_INDEX_KEY in varRunInfo) :
plotFunctionGenerationObjects.append(plotcreate.BinTupleAnalysisFunctionFactory())
(no author)
committed
else : # if it's not bin/tuple, there are lots of other posibilities
# if it's vector data with longitude and latitude, quiver plot it on the Earth
if isVectorData and (not do_not_test_with_lon_lat) :
plotFunctionGenerationObjects.append(plotcreate.MappedQuiverPlotFunctionFactory())
# if the data is one dimensional we can plot it as lines
plotFunctionGenerationObjects.append(plotcreate.LinePlotsFunctionFactory())
# if the data is 2D we have some options based on the type of data
# if the data is not mapped to a longitude and latitude, just show it as an image
if (do_not_test_with_lon_lat) :
plotFunctionGenerationObjects.append(plotcreate.IMShowPlotFunctionFactory())
# if it's 2D and mapped to the Earth, contour plot it on the earth
else :
plotFunctionGenerationObjects.append(plotcreate.MappedContourPlotFunctionFactory())
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
# plot our lon/lat related info
image_names[ORIGINAL_IMAGES_KEY], image_names[COMPARED_IMAGES_KEY] = \
plot.plot_and_save_comparison_figures \
(aData, bData,
plotFunctionGenerationObjects,
varRunInfo[VARIABLE_DIRECTORY_KEY],
displayName,
varRunInfo[EPSILON_KEY],
varRunInfo[FILL_VALUE_KEY],
missingValueAltInB = varRunInfo[FILL_VALUE_ALT_IN_B_KEY] if FILL_VALUE_ALT_IN_B_KEY in varRunInfo else None,
lonLatDataDict=lon_lat_data,
dataRanges = varRunInfo[DISPLAY_RANGES_KEY] if DISPLAY_RANGES_KEY in varRunInfo else None,
dataRangeNames = varRunInfo[DISPLAY_RANGE_NAMES_KEY] if DISPLAY_RANGE_NAMES_KEY in varRunInfo else None,
dataColors = varRunInfo[DISPLAY_RANGE_COLORS_KEY] if DISPLAY_RANGE_COLORS_KEY in varRunInfo else None,
doFork=runInfo[DO_MAKE_FORKS_KEY],
shouldClearMemoryWithThreads=runInfo[DO_CLEAR_MEM_THREADED_KEY],
shouldUseSharedRangeForOriginal=runInfo[USE_SHARED_ORIG_RANGE_KEY],
doPlotSettingsDict = varRunInfo,
aUData=aUData, aVData=aVData,
bUData=bUData, bVData=bVData,
binIndex= varRunInfo[BIN_INDEX_KEY] if BIN_INDEX_KEY in varRunInfo else None,
tupleIndex= varRunInfo[TUPLE_INDEX_KEY] if TUPLE_INDEX_KEY in varRunInfo else None,
binName= varRunInfo[BIN_NAME_KEY] if BIN_NAME_KEY in varRunInfo else 'bin',
tupleName= varRunInfo[TUPLE_NAME_KEY] if TUPLE_NAME_KEY in varRunInfo else 'tuple',
epsilonPercent=varRunInfo[EPSILON_PERCENT_KEY] if EPSILON_PERCENT_KEY in varRunInfo else None,
fullDPI= runInfo[DETAIL_DPI_KEY],
thumbDPI= runInfo[THUMBNAIL_DPI_KEY],
units_a= varRunInfo[VAR_UNITS_A_KEY] if VAR_UNITS_A_KEY in varRunInfo else None,
units_b= varRunInfo[VAR_UNITS_B_KEY] if VAR_UNITS_B_KEY in varRunInfo else None,
)#histRange= varRunInfo[HISTOGRAM_RANGE_KEY] if HISTOGRAM_RANGE_KEY in varRunInfo else None)
LOG.info("\tfinished creating figures for: " + explanationName)
(no author)
committed
# create the report page for this variable
if (runInfo[DO_MAKE_REPORT_KEY]) :
# hang on to our good % and other info to describe our comparison
epsilonPassedPercent = (1.0 - epsilon_failed_fraction) * 100.0
finitePassedPercent = (1.0 - non_finite_fail_fraction) * 100.0
variableComparisons[displayName] = {
PASSED_EPSILON_PERCENT_KEY: epsilonPassedPercent,
FINITE_SIMILAR_PERCENT_KEY: finitePassedPercent,
R_SQUARED_COEFF_VALUE_KEY: r_squared_value,
VARIABLE_RUN_INFO_KEY: varRunInfo
}
variableStatsCollection[displayName] = variable_stats.dictionary_form() # TODO, also image names?
LOG.info ('\tgenerating report for: ' + explanationName)
reportModule.generate_and_save_variable_report(files,
varRunInfo, runInfo,
variable_stats.dictionary_form(),
spatialInfo,
image_names,
varRunInfo[VARIABLE_DIRECTORY_KEY], "index.html",
variableAttrs=attributeInfo,)
# if we can't compare the variable, we should tell the user
(no author)
committed
else :
message = (explanationName + ' ' +
'could not be compared. This may be because the data for this variable does not match in shape ' +
'between the two files (file A data shape: ' + str(aData.shape) + '; file B data shape: '
+ str(bData.shape) + ')')
if do_not_test_with_lon_lat :
message = message + '.'
else :
message = (message + ' or the data may not match the shape of the selected '
+ 'longitude ' + str(good_shape_from_lon_lat) + ' and '
+ 'latitude ' + str(good_shape_from_lon_lat) + ' variables.')
LOG.warn(message)
except ValueErrorStringToFloat as e:
LOG.warn("Unable to compare "+displayName+": "+str(e))
# the end of the loop to examine all the variables
# generate our general report pages once we've analyzed all the variables
if runInfo[DO_MAKE_REPORT_KEY] :
# get the current time
(no author)
committed
runInfo[TIME_INFO_KEY] = datetime.datetime.ctime(datetime.datetime.now())
# make the main summary report
reportModule.generate_and_save_summary_report(files,
(no author)
committed
pathsTemp[OUT_FILE_KEY], 'index.html',
runInfo,
variableComparisons,
spatialInfo,
nameStats,
globalAttrs=globalAttrInfo,)
# TODO save some info we need for the concise summary report
concise_info = {
"files": files,
"runInfo": runInfo,
"var_comparisons": variableComparisons,
"var_stats": variableStatsCollection,
}
# make the glossary
reportModule.generate_and_save_doc_page(statistics.StatisticalAnalysis.doc_strings(), pathsTemp[OUT_FILE_KEY])
returnCode = 0 if didPassAll else 2 # return 2 only if some of the variables failed
(no author)
committed
# if we are reporting the pass / fail, return an appropriate status code
to_return = returnCode if do_pass_fail else 0
(no author)
committed
if do_pass_fail :
LOG.debug("Pass/Fail return code: " + str(returnCode))
if do_return_summary_info and runInfo[DO_MAKE_REPORT_KEY] :
return to_return, concise_info
return to_return
def stats_library_call(afn, bfn, var_list=None,
options_set=None,
do_document=False,
output_channel=sys.stdout):
"""
this method handles the actual work of the stats command line tool and
can also be used as a library routine, simply pass in an output channel
and/or use the returned dictionary of statistics for your own form of
display.
TODO, should this move to a different file?
"""
# set some values for defaults
var_list = [] if var_list is None else var_list
options_set = {} if options_set is None else options_set
# unpack some options
(no author)
committed
epsilon_val = options_set[EPSILON_KEY]
missing_val = options_set[OPTIONS_FILL_VALUE_KEY]
do_pass_fail = options_set[DO_TEST_PASSFAIL_KEY]
LOG.debug ("file a: " + afn)
LOG.debug ("file b: " + bfn)
# open the files
(no author)
committed
filesInfo = open_and_process_files([afn, bfn])
aFile = filesInfo[afn][FILE_OBJECT_KEY]
bFile = filesInfo[bfn][FILE_OBJECT_KEY]
(no author)
committed
# information for testing pass/fail if needed
has_failed = False
(no author)
committed
epsilon_fail_tolerance = 0.0
nonfinite_fail_tolerance = 0.0
(no author)
committed
# figure out the variable names and their individual settings
if len(var_list) <= 0 :
var_list = ['.*']
(no author)
committed
names = config_organizer.parse_varnames( filesInfo[COMMON_VAR_NAMES_KEY], var_list, epsilon_val, missing_val )
LOG.debug(str(names))
doc_each = do_document and len(names)==1
doc_atend = do_document and len(names)!=1
for name, epsilon, missing in sorted(names, key=lambda X:X[0]):
(no author)
committed
# make sure that it's possible to load this variable
if not(aFile.is_loadable_type(name)) or not(bFile.is_loadable_type(name)) :
LOG.warn(name + " is of a type that cannot be loaded using current file handling libraries included with Glance." +
" Skipping " + name + ".")
continue
Eva Schiffer
committed
try :
aData = aFile[name]
bData = bFile[name]
except io.IONonnumericalTypeError as bad_data_error :
LOG.error("Skipping variable %s because it is of a non-numerical type "
"(may indicate array of variable-length strings): %s" % (name, repr(bad_data_error)))
continue
if missing is None:
amiss = aFile.missing_value(name)
bmiss = bFile.missing_value(name)
else:
amiss,bmiss = missing,missing
LOG.debug('comparing %s with epsilon %s and missing %s,%s' % (name,epsilon,amiss,bmiss))
print('-'*32, file=output_channel,)
print(name, file=output_channel,)
print('', file=output_channel)
(no author)
committed
variable_stats = statistics.StatisticalAnalysis.withSimpleData(aData, bData, amiss, bmiss, epsilon=epsilon)
(no author)
committed
# if we're doing pass/fail testing, do that now
if do_pass_fail :
(no author)
committed
tempDefaults = config_organizer.get_simple_variable_defaults()
didPass, _, _, _ = variable_stats.check_pass_or_fail(epsilon_failure_tolerance=epsilon_fail_tolerance,
epsilon_failure_tolerance_default=tempDefaults[EPSILON_FAIL_TOLERANCE_KEY],
non_finite_data_tolerance=nonfinite_fail_tolerance,
non_finite_data_tolerance_default=tempDefaults[NONFINITE_TOLERANCE_KEY],
total_data_failure_tolerance_default=tempDefaults[TOTAL_FAIL_TOLERANCE_KEY],
min_acceptable_r_squared_default=tempDefaults[MIN_OK_R_SQUARED_COEFF_KEY],
)
(no author)
committed
has_failed = has_failed or not(didPass)
lal = list(variable_stats.dictionary_form().items())
#lal = list(statistics.summarize(aData, bData, epsilon, (amiss,bmiss)).items())
lal.sort()
for dictionary_title, dict_data in lal:
print('%s' % dictionary_title, file=output_channel,)
for each_stat in sorted(list(dict_data)):
print(' %s: %s' % (each_stat, dict_data[each_stat]), file=output_channel,)
if doc_each: print((' ' + statistics.StatisticalAnalysis.doc_strings()[each_stat]), file=output_channel,)
print('', file=output_channel, )
if doc_atend:
print(('\n\n' + statistics.get_comparison_doc_string()), file=output_channel,)
(no author)
committed
# if we are doing pass/fail, we need to return a status code
if do_pass_fail :
status_code = 0
if has_failed :
status_code = 3
LOG.debug("stats is returning status code: " + str(status_code))
return status_code
# note: if we aren't doing pass/fail, stats will not return anything
def inspect_stats_library_call (afn, var_list=None, options_set=None, do_document=False, output_channel=sys.stdout):
(no author)
committed
"""
this method handles the actual work of the inspect_stats command line tool and
can also be used as a library routine, simply pass in an output channel
and/or use the returned dictionary of statistics for your own form of
display.
TODO, should this move to a different file?
"""
# set some values for defaults
var_list = [ ] if var_list is None else var_list
options_set = { } if options_set is None else options_set
(no author)
committed
# unpack some options
(no author)
committed
missing_val = options_set[OPTIONS_FILL_VALUE_KEY]
(no author)
committed
LOG.debug ("file a: " + afn)
# open the file
(no author)
committed
filesInfo = open_and_process_files([afn])
aFile = filesInfo[afn][FILE_OBJECT_KEY]
(no author)
committed
# figure out the variable names and their individual settings
if len(var_list) <= 0 :
var_list = ['.*']
(no author)
committed
names = config_organizer.parse_varnames( filesInfo[COMMON_VAR_NAMES_KEY], var_list, epsilon=None, missing=missing_val )
(no author)
committed
LOG.debug(str(names))
doc_each = do_document and len(names)==1
doc_atend = do_document and len(names)!=1
for name, epsilon, missing in sorted(names, key=lambda X:X[0]):
(no author)
committed
# make sure that it's possible to load this variable
if not(aFile.is_loadable_type(name)) :
LOG.warn(name + " is of a type that cannot be loaded using current file handling libraries included with Glance." +
" Skipping " + name + ".")
continue
(no author)
committed
aData = aFile[name]
(no author)
committed
(no author)
committed
amiss = missing
if missing is None:
amiss = aFile.missing_value(name)
LOG.debug('analyzing %s with missing data value %s' % (name,amiss))
print('-' * 32, file=output_channel,)
print(name, file=output_channel,)
print('', file=output_channel,)
(no author)
committed
variable_stats = statistics.StatisticalInspectionAnalysis.withSimpleData(aData, amiss)
lal = list(variable_stats.dictionary_form().items())
lal.sort()
for dictionary_title, dict_data in lal:
print('%s' % dictionary_title, file=output_channel,)
(no author)
committed
for each_stat in sorted(list(dict_data)):
print(' %s: %s' % (each_stat, dict_data[each_stat]), file=output_channel, )
if doc_each: print((' ' + statistics.StatisticalInspectionAnalysis.doc_strings()[each_stat]), file=output_channel,)
print('', file=output_channel,)
(no author)
committed
if doc_atend:
print(('\n\n' + statistics.get_inspection_doc_string()), file=output_channel,)
(no author)
committed
return 0
# horray, dummy options!
options = None
commands = {}
prior = None
prior = dict(locals())
"""
The following functions represent available menu selections in glance.
"""
def info(*args):
"""list information about a list of files
Given a file (or list of files), print out the names of the available variables in the file(s).
Examples:
glance info A.hdf
glance info B.h5 C.nc
(no author)
committed
try :
lal = list(io.open(fn)())
lal.sort()
if options.parsable_output:
print("".join([fn + "\t" + x + "\n" for x in lal]))
print(fn + ': ' + ('\n ' + ' ' * len(fn)).join(lal))
(no author)
committed
except KeyError :
LOG.warn('Unable to open / process file selection: ' + fn)
problems += 1
if problems > 255:
# exit code is 8-bits, limit ourselves.
problems = 255
return problems
def stats (*args) :
"""do statistical analysis on your input
The stats command will perform various types of statistical analysis depending on what input files
you give it.
If you give it two input files, it will summarize the statistical differences between the two files.
If you give it one file it will print some informational statistics about that file.
Please note that this command does not handle any sort of directory navigation, so you will need
to give it simple paths to specific files you want to analyze.
Optionally you may also provide variable names to be analyzed. If no variable names are
given, this command will attempt to analyze all variables it can. When analyzing two files
only variables with matching names and shapes will be examined.
If you provide an output path, statistics will be saved to a stats.txt file in that directory.
Run with -v to get more detailed information about the statistics being reported.
glance stats A.hdf B.hdf
glance stats --epsilon=0.00001 C.nc B.hdf baseline_cmask_seviri_cloud_mask:0.002:
glance -w stats A.hdf D.h5 imager_prof_retr_abi_.*::-999 'nwp__.__index:0:'
glance stats A.nc cloud_top_height
glance stats C.hdf cloud_top_*::-999.0
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
# examine the args and see how many valid file paths we have
files = []
variables = []
LOG.debug("Examining arguments to see how many possible file paths we have.")
for argument_val in args:
if os.path.exists(argument_val):
# double check that this is not a directory
if os.path.isdir(argument_val):
LOG.warn("Glance statistics cannot currently analyze directories of files. Please provide specific file paths.")
return 1
LOG.debug("Argument value is an existing file path: " + argument_val)
files.append(clean_path(argument_val))
else:
LOG.debug("Argument value is not an existing file path, it will be treated as a variable name: " + argument_val)
variables.append(argument_val)
# if we have no file paths, just stop now
if len(files) < 1:
LOG.warn("Expected at least one valid file path as input data. " +
"Unable to create statistics without a file path.")
return 1
# organize our command line options and get some other informational stuff
(no author)
committed
tempOptions = config_organizer.convert_options_to_dict(options)
do_doc = (options.verbose or options.debug)
to_return = 0
# if we were given an output path use that to create the stats
toPrintTo = sys.stdout
(no author)
committed
outpath = clean_path(options.outputpath)
(no author)
committed
if outpath != clean_path('./') :
# if needed, create the directory
(no author)
committed
setup_dir_if_needed(outpath, "output")
# open the file for writing, get rid of whatever's there
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
fileForOutput = open(os.path.join(outpath, "stats.txt"), "w")
toPrintTo = fileForOutput
# if we have one file path, analyze that single file
if len(files) <= 1:
a_file_path = files[0]
LOG.info("Generating statistics for file: " + a_file_path)
to_return = inspect_stats_library_call(a_file_path,
var_list=variables,
options_set=tempOptions,
do_document=do_doc,
output_channel=toPrintTo)
# just in case, let's at least give a warning so the user has some idea why we ignored some paths
if len(files) > 2:
LOG.warn("More than two file paths were found in your command line input. "
"Only the first two will be used. The rest will be ignored.")
# if we have two file paths, create comparison statistics
if len(files) >= 2:
a_file_path = files[0]
b_file_path = files[1]
LOG.info("Generating statistical comparison for files: " + a_file_path + " " + b_file_path)
to_return = stats_library_call(a_file_path, b_file_path,
var_list=variables,
(no author)
committed
options_set=tempOptions,
do_document=do_doc,
output_channel=toPrintTo)
# close our text output file if needed
if fileForOutput is not None:
# return a return code if we have one, otherwise everything must be fine, right?
if to_return is not None:
return to_return
return 0
(no author)
committed
def plotDiffs(*args) :
"""create a set of images comparing two files (Deprecated, please use report in future.)
This command is DEPRECATED. Please use the report command in future.
Create and save images comparing variables in two files. Variables to be compared may be specified after
the names of the two input files. If no variable names are given, this command will create plots for all
variables that can be matched by name and shape between the two files.
If latitude and longitude data are present in the file and specified in the call options, the variables
will be plotted on a map. The longitude and latitude variable names may be specified with --longitude
and --latitude command line options. If no longitude or latitude names are specified Glance will attempt
to use pixel_longitude and pixel_latitude.
If you would rather plot the data without a map, use the --nolonlat option.
The created images will be saved at the provided output path in subdirectories named for each variable
analyzed. If no output path is provided, output will be saved in the current directory.
Examples:
glance plotDiffs A.hdf B.hdf
glance plotDiffs C.nc D.h5 variable_name_1:epsilon1: variable_name_2 variable_name_3:epsilon3:missing3
glance --outputpath=/path/where/output/will/be/placed/ plotDiffs A.hdf B.hdf
glance plotDiffs --longitude=lon_variable_name --latitude=lat_variable_name A.hdf B.hdf variable_name
(no author)
committed
# set the options so that a report will not be generated
options.imagesOnly = True
# make the images
reportGen(*args)
return
(no author)
committed
def reportGen(*args) :
"""create a report comparing two files (Deprecated, please use report in future.)
This command is DEPRECATED. Please use the report command in future.
Generate an html report comparing the variables in two files. Variables to be compared may be specified after
the names of the two input files. If no variable names are given, this command will create reports for all
variables that can be matched by name and shape between the two files.
If latitude and longitude data are present in the file and specified in the call options, the variable
comparison plots will be drawn on a map. The longitude and latitude variable names may be specified with
--longitude and --latitude command line options. If no longitude or latitude names are specified Glance
will attempt to use pixel_longitude and pixel_latitude.
If you would rather analyze the data without trying to place it on a map, use the --nolonlat option.
The created reports and images will be saved at the provided output path in subdirectories named for
each variable analyzed. If no output path is provided, output will be saved in the current directory.
Created images will be embedded in the report or visible as separate .png files.
If you would prefer to generate the report without images, use the --reportonly option. This option will
generate the html report but omit the images. This may be significantly faster, depending on your system,
but the differences between the files may be more difficult to interpret.
(no author)
committed
Examples:
glance reportGen A.hdf C.nc
glance reportGen A.hdf B.hdf variable_name_1:epsilon1: variable_name_2 variable_name_3:epsilon3:missing3 variable_name_4::missing4
glance --outputpath=/path/where/output/will/be/placed/ reportGen A.hdf D.h5
glance reportGen --longitude=lon_variable_name --latitude=lat_variable_name D.h5 C.nc variable_name
glance reportGen --imagesonly A.hdf B.hdf
(no author)
committed
"""
(no author)
committed
(no author)
committed
tempOptions = config_organizer.convert_options_to_dict(options)
if len(args) < 2 :
LOG.warn("Expected two paths to files to compare. "
"Unable to generate a comparison report or comparison plots without two file paths.")
return 1
(no author)
committed
a_path = clean_path(args[0])
b_path = clean_path(args[1])
return reportGen_library_call(a_path, b_path, args[2:], tempOptions)
(no author)
committed
(no author)
committed
def inspectStats(*args):
"""create statistics summary of one file (Deprecated, please use stats in future.)
This command is DEPRECATED. Please use the stats command in future.
Print statistical summaries of the variables in a file. If no variable names are given, this
command will summarize all variables found in the file.
Run with -v to get more detailed information about the statistics being reported.
(no author)
committed
Examples:
glance inspectStats A.hdf
glance inspectStats A.hdf baseline_cmask_seviri_cloud_mask
glance -w inspectStats B.nc imager_prof_retr_abi_total_precipitable_water_low::-999
"""
if len(args) < 1:
LOG.warn("Expected a path to a file to inspect. "
"Unable to generate inspection statistics without a file path.")
return 1
(no author)
committed
afn = args[0]
do_doc = (options.verbose or options.debug)
(no author)
committed
tempOptions = config_organizer.convert_options_to_dict(options)
(no author)
committed
(no author)
committed
# TODO, clean up how the output is set up
# if we were given an output path use that to create the stats
toPrintTo = sys.stdout
(no author)
committed
outpath = clean_path(options.outputpath)
(no author)
committed
if outpath != clean_path('./') :
# if needed, create the directory
(no author)
committed
setup_dir_if_needed(outpath, "output")
# open the file for writing, get rid of whatever's there
fileForOutput = open(outpath + "/stats.txt", "w") # TODO, forming the path this way won't work on windows?
toPrintTo = fileForOutput
(no author)
committed
inspect_stats_library_call(clean_path(afn), var_list=args[1:],
options_set=tempOptions, do_document=do_doc,
output_channel=toPrintTo)
if fileForOutput is not None :
fileForOutput.close()
(no author)
committed
def inspectReport(*args) :
"""create a report to inspect the contents of one file (Deprecated, please use report in future.)
This command is DEPRECATED. Please use the report command in future.
Generate an html report examining the variables in a file. Variables to be analyzed may be specified after
the name of the input file. If no variable names are given, this command will create reports for all
variables in the file.
If latitude and longitude data are present in the file and specified in the call options, the variable
plots will be drawn on a map. The longitude and latitude variable names may be specified with
--longitude and --latitude command line options. If no longitude or latitude names are specified Glance
will attempt to use pixel_longitude and pixel_latitude.
If you would rather analyze the data without trying to place it on a map, use the --nolonlat option.
The created reports and images will be saved at the provided output path in subdirectories named for
each variable analyzed. If no output path is provided, output will be saved in the current directory.
Created images will be embedded in the report or visible as separate .png files.
If you would prefer to generate the report without images, use the --reportonly option. This option will
generate the html report but omit the images. This may be significantly faster, depending on your system,
but the results may be more difficult to interpret.
glance inspect_report A.hdf variable_name_1:: variable_name_2 variable_name_3::missing3 variable_name_4::missing4
glance --outputpath=/path/where/output/will/be/placed/ inspect_report C.nc
glance inspect_report --longitude=lon_variable_name --latitude=lat_variable_name D.h5 variable_name
glance inspect_report --reportonly A.hdf
if len(args) < 1:
LOG.warn("Expected a path to a files to inspect. "
"Unable to generate a comparison report without a file path.")
return 1
(no author)
committed
tempOptions = config_organizer.convert_options_to_dict(options)
# args[0] is the path of the file to be analyzed, an other args should be variable names
(no author)
committed
return inspect_library_call(clean_path(args[0]), args[1:], tempOptions)
(no author)
committed
(no author)
committed
def colocateData(*args) :
"""colocate data from two files
(no author)
committed
Colocates data in the two given input files and saves it to separate output files.
(no author)
committed
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
Data will be colocated based on its corresponding longitude and latitude. Multiple matches may be
made between a data point in file A and those in file B if they are within the longitude/latitude epsilon.
Points from each file that could not be matched and the number of duplicate matches will also be
recorded in the output file.
The user may also use the notation variable_name::missing_value to specify the missing_value which indicates
missing data. If no missing value is given, glance will attempt to load a missing value from the input file.
If there is no missing value defined for that variable in the file, no missing value will be analyzed.
Missing value data points will not be considered for colocation.
Data which corresponds to longitude or latitude values which fall outside the earth (outside the normally
accepted valid ranges) will also be considered invalid and will not be considered for colocation.
The longitude and latitude variables may be specified with --longitude and --latitude
If no longitude or latitude are specified the pixel_latitude and pixel_longitude variables will be used.
The longitude and latitude epsilon may be specified with --llepsilon
If no longitude/latitude epsilon is given the value of 0.0 (degrees) will be used
The output data files generated by this option will appear in the selected output directory, or the current
directory if no out put directory is selected. The output files will be named originalFileName-colocation.nc
(replacing "originalFileName" with the names of your input files).
Examples:
glance colocateData A.hdf B.hdf variable_name_1 variable_name_2 variable_name_3::missing3
glance colocateData --outputpath=/path/where/output/will/be/placed/ A.nc B.nc
glance colocateData --longitude=lon_variable_name --latitude=lat_variable_name A.hdf B.hdf variable_name
glance colocateData --llepsilon=0.0001 A.nc B.hdf
(no author)
committed
"""
if len(args) < 2:
LOG.warn("Expected two paths to files to colocate. "
"Unable to generate colocation information without two file paths.")
return 1
(no author)
committed
# TODO, is this really needed?
options.imagesOnly = False
options.htmlOnly = False
options.doFork = False
(no author)
committed
(no author)
committed
tempOptions = config_organizer.convert_options_to_dict(options)
(no author)
committed
(no author)
committed
# TODO, remove this eventually
tempOptions[DO_COLOCATION_KEY] = True
(no author)
committed
(no author)
committed
a_path = clean_path(args[0])
b_path = clean_path(args[1])
colocateToFile_library_call(a_path, b_path, args[2:], tempOptions)
(no author)
committed
(no author)
committed
# Note: the figure plotting in the GUI is dependant on having selected an interactive renderer in the first "use"
# statement at the beginning of this module. (It had to be moved into this module to pre-empt other use statempents
# from imports of other glance modules.)
(no author)
committed
def gui (*args) :
"""start the Glance graphical user interface
(no author)
committed
This option launches the graphical user interface for Glance. This interface includes only some of the
functionality of the command line version of Glance and may be expanded in the future.
(no author)
committed
Files to be loaded may be specified on the command line or loaded inside the GUI.
The various logging related arguments (quiet, verbose, debug, etc.) may be used if desired.
(no author)
committed
Examples:
glance gui -w
glance gui A.hdf
glance gui A.hdf C.nc
(no author)
committed
"""
LOG.debug("Launching Glance GUI")
(no author)
committed
temp_controller = gui_control.GlanceGUIController(get_glance_version_string())
if len(args) >= 1:
temp_controller.newFileSelected(A_CONST, args[0])
if len(args) >= 2:
temp_controller.newFileSelected(B_CONST, args[1])
(no author)
committed
temp_controller.launch_gui()
Eva Schiffer
committed
def report (*args) :
"""generate reports of various types depending on your input
Depending on how many input file paths you give this command it will either generate comparison or
inspection reports. If you give it a directory instead of a file path it will search the underlying file
structure to find any appropriately typed files and attempt to create reports for each of them in the
Eva Schiffer
committed
output directory. If you include two directory paths it will attempt to automatically match files and
create comparison reports for them.
Note: If you wish to automatically match files in two directories and they do not have the same names,
you can strip out sections that may not match using '--stripfromname'. If your names have nothing in
common Glance will not be able to figure out how to match them and you will have to run separate reports
for each file pair or provide input directories of files that have been soft linked or renamed to more
compatible file names.
Eva Schiffer
committed
If latitude and longitude data are present in the file(s) and specified in the call options, the
plots will be drawn on a map. The longitude and latitude variable names may be specified with
--longitude and --latitude command line options. If no longitude or latitude names are specified Glance
will attempt to use pixel_longitude and pixel_latitude.
If you would rather plot the data without trying to place it on a map, use the --nolonlat option.
The created reports and images will be saved at the provided output path in subdirectories named for
each variable analyzed. If no output path is provided, output will be saved in the current directory.
Created images will be embedded in the report or visible as separate .png files.
Note: If you provided one or two directory paths and those paths included more than one set of files
Eva Schiffer
committed
that Glance is able to generate reports for, those reports will be placed in the output path in
separate temporarily directories. These directories are labeled numerically and with the name of
the A file. A "summary.html" summary report for the run will be created in the top level output
directory when comparing files (when simply inspecting files no summary report will be created).
Eva Schiffer
committed
If you would prefer to generate reports without images, use the --reportonly option. This option will
generate the html report but omit the images. This may be significantly faster, depending on your system,
but the differences between the files may be more difficult to interpret.
If you need to generate plots but do not want the reports, you can use the --imagesonly option. This
option will generate only the plots and omit the html reports. Unfortunately there will probably not
be much time savings, as the images are usually the bulk of the run time.
Eva Schiffer
committed
Examples:
glance report A.hdf variable_name_1:: variable_name_2 variable_name_3::missing3 variable_name_4::missing4
glance report A.nc B.hdf variable_name::missing_value
glance report C.nc --outputpath=/path/where/output/will/be/placed/
glance report --longitude=lon_variable_name --latitude=lat_variable_name D.h5 variable_name
glance report --nolonlat ./A_dir/ ./B_dir/
glance report --reportonly ./A_dir/
Eva Schiffer
committed
"""
# examine the args and see how many valid file paths we have
files = [ ]
variables = [ ]
LOG.debug("Examining arguments to see how many possible file paths we have.")
Eva Schiffer
committed
for argument_val in args :
if os.path.exists(argument_val) :
LOG.debug("Argument value is an existing file path: " + argument_val)
Eva Schiffer
committed
files.append(clean_path(argument_val))
else :
LOG.debug("Argument value is not an existing file path, it will be treated as a variable name: " + argument_val)
Eva Schiffer
committed
variables.append(argument_val)
# if we have no file paths, just stop now
if len(files) < 1 :
LOG.warn("Expected at least one valid file path as input data. " +
Eva Schiffer
committed
"Unable to generate a report without a file path.")
return 1
# organize our command line options
tempOptions = config_organizer.convert_options_to_dict(options)
Eva Schiffer
committed
# if there are requested variables and a config file, warn the user that we're going to ignore their request
if len(variables) > 0 and tempOptions[OPTIONS_CONFIG_FILE_KEY] is not None :
LOG.warn("User requested specific variables (" + str(variables) + ") on the command line that will be "
"overridden by the requested configuration file. Requested variables from the command line will be ignored. "
"Please list your desired variables in your configuration file.")
Eva Schiffer
committed
# if we have one file path, do either one or many inspect reports
if len(files) <= 1 :
a_file_path = files[0]
# check to see if the file is a dir
if os.path.isdir(a_file_path) :
a_files_list = _get_possible_files_from_dir(a_file_path)
# check if we found anything
if len(a_files_list) <= 0 :
LOG.warn("Unable to find any files to analyze in the given directory path.")
return 1
Eva Schiffer
committed
to_return = 0
Eva Schiffer
committed
temp_offset = 0
Eva Schiffer
committed
# run each of the reports, putting them in inner temp dirs
Eva Schiffer
committed
for file_path in sorted(a_files_list) :
Eva Schiffer
committed
ops_copy = tempOptions.copy()
if len(a_files_list) > 1 :
Eva Schiffer
committed
file_name_temp = os.path.splitext(os.path.basename(file_path))[0]
ops_copy[OPTIONS_OUTPUT_PATH_KEY] = os.path.join(ops_copy[OPTIONS_OUTPUT_PATH_KEY],
"report-" + str(temp_offset) + "-" + file_name_temp)
temp_offset += 1
LOG.info("Generating inspection report for file: " + file_path)
Eva Schiffer
committed
to_return += inspect_library_call(file_path, variables, ops_copy, )
return to_return
else : # in this case we just have a regular file, so run one inspect report
LOG.info("Generating inspection report for file: " + a_file_path)
Eva Schiffer
committed
return inspect_library_call(a_file_path, variables, tempOptions, )
# just in case, let's at least give a warning so the user has some idea why we ignored some paths
Eva Schiffer
committed
if len(files) > 2:
LOG.warn("More than two file paths were found in your command line input. "
"Only the first two will be used. The rest will be ignored.")
Eva Schiffer
committed
# if we have two file paths, do either one or many comparison reports
if len(files) >= 2 :
a_file_path = files[0]
b_file_path = files[1]
# check to see if the paths are dirs
if os.path.isdir(a_file_path) and os.path.isdir(b_file_path) :
Eva Schiffer
committed
file_pairs = _match_files_from_dirs(a_file_path, b_file_path, tempOptions[OPTIONS_RE_TO_STRIP_KEY],)
# if we didn't find anything, warn the user and stop
if len(file_pairs) <= 0 :
LOG.warn("Unable to match any files between the given directories. "
"Please check that the files are named the same in both directories.")
return 1
Eva Schiffer
committed
temp_offset = 0
pad_to_digits = len(str(len(file_pairs))) # how many potential reports are we making? zero pad names to the same width
Eva Schiffer
committed
to_return = 0
summary_report_info = { }
Eva Schiffer
committed
# run each of the reports, putting them in inner temp dirs
Eva Schiffer
committed
for single_a_file, single_b_file in sorted(file_pairs) :
Eva Schiffer
committed
ops_copy = tempOptions.copy()
LOG.info("Generating comparison report for files: " + single_a_file + " " + single_b_file)
Eva Schiffer
committed
if len(file_pairs) > 1 :
Eva Schiffer
committed
file_name_temp = os.path.splitext(os.path.basename(single_a_file))[0]
out_dir_name = "report-" + str(temp_offset).zfill(pad_to_digits) + "-" + file_name_temp
Eva Schiffer
committed
ops_copy[OPTIONS_OUTPUT_PATH_KEY] = os.path.join(ops_copy[OPTIONS_OUTPUT_PATH_KEY],
out_dir_name)
temp_offset += 1
temp_rc, summary_report_info[out_dir_name] = reportGen_library_call(single_a_file, single_b_file,
variables, ops_copy,
do_return_summary_info=True,)
to_return += temp_rc
else :
to_return += reportGen_library_call(single_a_file, single_b_file, variables, ops_copy, )
# if we did a bunch of reports, make a top level summary
if len(summary_report_info) > 0 :
LOG.info('Generating summary of all reports created in this run')
reportModule.generate_and_save_concise_report(tempOptions[OPTIONS_OUTPUT_PATH_KEY], 'summary.html',
summary_report_info, )
Eva Schiffer
committed
return to_return
# if both the paths are regular files, just run one report
elif os.path.isfile(a_file_path) and os.path.isfile(b_file_path) :
LOG.info("Generating comparison report for files: " + a_file_path + " " + b_file_path)
Eva Schiffer
committed
return reportGen_library_call(a_file_path, b_file_path, variables, tempOptions, )
else :
LOG.error("You have provided one directory path and one file path. "
"Please input paths to two directories or two files, not a mixture.")
Eva Schiffer
committed
return 1
# if we got to here, something has gone terribly wrong
return 1
def help(command=None):
"""print help for a specific command or list of commands
print help for a specific command or list of commands
Examples:
glance help stats
glance help report
Eva Schiffer
committed
print_all_summary = False
# check to see if the user specified a command to get info on
Eva Schiffer
committed
print_all_summary = True
# if the user did specify a command, just print that one
Eva Schiffer
committed
else:
if command.lower() in lower_locals :
split_doc = lower_locals[command.lower()].__doc__.split('\n')
for line in split_doc[1:] :
Eva Schiffer
committed
else :
print_all_summary = True
# print out a list of summaries for each command
if print_all_summary :
print("\n" + _get_all_commands_help_string(commands,))
Eva Schiffer
committed
# all the local public functions are considered part of glance, collect them up
commands.update(dict(x for x in list(locals().items()) if x[0] not in prior))
Eva Schiffer
committed
# lowercase locals
# Future: this is an awkward use and could be made more elegant
lower_locals = {}
for command_key in commands:
Eva Schiffer
committed
lower_locals[command_key.lower()] = locals()[command_key]
# get our command line argument handling set up
options = config_organizer.parse_arguments(get_glance_version_string(),
list(lower_locals.keys()),
_get_all_commands_help_string(commands,), )
args = options.misc
if options.self_test:
import doctest
doctest.testmod()
sys.exit(2)
# set up the logging level based on the options the user selected on the command line
lvl = logging.WARNING
if options.debug:
lvl = logging.DEBUG
elif options.verbose:
lvl = logging.INFO
elif options.quiet:
lvl = logging.ERROR
logging.basicConfig(level=lvl)
# if what the user asked for is not one of our existing functions, print the help
to_return = 0
if (options.command == "") or (options.command not in lower_locals):
to_return = 0
else :
options.print_help()
help()
to_return = 9
Eva Schiffer
committed
# call the function the user named, given the arguments from the command line, lowercase the request to ignore case
return_code = lower_locals[options.command](*args)
to_return = 0 if return_code is None else return_code
LOG.debug("Glance run complete, returning code: " + str(to_return))
return to_return