Commit 7497b180 authored by Eva Schiffer's avatar Eva Schiffer
Browse files

part 1/2 of getting two file stats to respect config files

parent e539b47b
......@@ -1175,15 +1175,12 @@ def stats_two_inputs_library_call(afn, bfn, var_list=None,
"""
this method handles the actual work of the stats comparing two files and
can also be used as a library routine, simply pass in an output channel
and/or use the returned dictionary of statistics for your own form of
display.
"""
# set some values for defaults
var_list = ['.*', ] if var_list is None else var_list
options_set = {} if options_set is None else options_set
"""
# load the user settings from either the command line or a user defined config file
if len(var_list) <= 0:
var_list = ['.*', ]
......@@ -1193,6 +1190,7 @@ def stats_two_inputs_library_call(afn, bfn, var_list=None,
options_set,
requestedVars=var_list)
"""
print("pathsTemp: " + str(pathsTemp))
print("runInfo: " + str(runInfo))
print("defaultValues: " + str(defaultValues))
......@@ -1201,63 +1199,133 @@ def stats_two_inputs_library_call(afn, bfn, var_list=None,
print("options_set: " + str(options_set))
"""
# unpack some options
epsilon_val = options_set[EPSILON_KEY]
missing_val = options_set[OPTIONS_FILL_VALUE_KEY]
# extract some values to use later
default_epsilon = defaultValues[EPSILON_KEY] if EPSILON_KEY in defaultValues else None
# TODO, epsilon percent?
default_fillval = defaultValues[FILL_VALUE_KEY] if FILL_VALUE_KEY in defaultValues else None
# TODO, failure tolerances?
do_pass_fail = options_set[DO_TEST_PASSFAIL_KEY]
LOG.debug ("file a: " + afn)
LOG.debug ("file b: " + bfn)
warn_missing = options_set[OPTIONS_WARN_MISSING_KEY] if OPTIONS_WARN_MISSING_KEY in options_set else False
# open the files
filesInfo = open_and_process_files([afn, bfn])
aFile = filesInfo[afn][FILE_OBJECT_KEY]
bFile = filesInfo[bfn][FILE_OBJECT_KEY]
a_file_path = pathsTemp[A_FILE_KEY]
b_file_path = pathsTemp[B_FILE_KEY]
LOG.debug("Opening files: " + a_file_path + "\n " + b_file_path)
filesInfo = open_and_process_files([a_file_path, b_file_path,])
aFile = filesInfo[a_file_path][FILE_OBJECT_KEY]
bFile = filesInfo[b_file_path][FILE_OBJECT_KEY]
a_var_names = filesInfo[a_file_path][FILE_VARIABLE_NAMES_KEY]
b_var_names = filesInfo[b_file_path][FILE_VARIABLE_NAMES_KEY]
common_variable_names = filesInfo[COMMON_VAR_NAMES_KEY]
# give info on the files and if we used a config file
print('-' * 32, file=output_channel, )
print("File A: " + a_file_path, file=output_channel, )
print("File B: " + b_file_path, file=output_channel, )
if usedConfigFile :
print("Configuration file: " + options_set[OPTIONS_CONFIG_FILE_KEY], file=output_channel, )
# information for testing pass/fail if needed
has_failed = False
# TODO, remove after we load these elsewhere
epsilon_fail_tolerance = 0.0
nonfinite_fail_tolerance = 0.0
# figure out the variable names and their individual settings
if len(var_list) <= 0 :
var_list = ['.*']
warn_missing = options_set[OPTIONS_WARN_MISSING_KEY] if OPTIONS_WARN_MISSING_KEY in options_set else False
names = config_organizer.parse_varnames( filesInfo[COMMON_VAR_NAMES_KEY], var_list,
epsilon_val, missing_val,
warn_unfound=warn_missing,)
LOG.debug(str(names))
doc_each = do_document and len(names)==1
doc_atend = do_document and len(names)!=1
for name, epsilon, missing in sorted(names, key=lambda X:X[0]):
# if we have command line input, we still need to parse the variable names
if not usedConfigFile:
temp_names = config_organizer.parse_varnames(common_variable_names, requestedNames,
epsilon=default_epsilon, missing=default_fillval,
warn_unfound=warn_missing, )
requestedNames = { }
for name, epsilon, missing in sorted(temp_names, key=lambda X: X[0]) :
requestedNames[name] = {
VARIABLE_TECH_NAME_KEY: name,
EPSILON_KEY: epsilon,
FILL_VALUE_KEY: missing,
}
num_requested_vars = len(requestedNames)
LOG.debug("Trying to analyze variable names: " + str(requestedNames))
# are we going to put documentation of what the stats mean after each stat or only at the end?
doc_each = do_document and num_requested_vars == 1
doc_atend = do_document and num_requested_vars != 1
# if we have no variables, something has gone wrong
if len(requestedNames) <= 0 and warn_missing:
LOG.warning("Unable to find any selected variables to analyze.")
for display_name in sorted(requestedNames) :
if VARIABLE_TECH_NAME_KEY not in requestedNames[display_name] :
LOG.warning("Variable with display name of \"" + display_name + "\" has no technical name given. "
"This variable cannot be processed without a techincal name.")
continue
tech_name = requestedNames[display_name][VARIABLE_TECH_NAME_KEY]
tech_b_name = tech_name if VARIABLE_B_TECH_NAME_KEY not in requestedNames[display_name] else requestedNames[display_name][VARIABLE_B_TECH_NAME_KEY]
explain_name = display_name
if tech_name != display_name or tech_b_name != display_name :
if tech_name == tech_b_name :
explain_name += "(" + tech_name + ")"
else :
explain_name += "(" + tech_name + "/" + tech_b_name + ")"
if tech_name not in a_var_names :
if warn_missing :
LOG.warning("Requested variable \"" + explain_name + "\" is not available in the A file. "
"Unable to process this variable.")
continue
if tech_b_name not in b_var_names :
if warn_missing :
LOG.warning("Requested variable \"" + explain_name + "\" is not available in the B file. "
"Unable to process this variable.")
continue
# make sure that it's possible to load this variable
if not(aFile.is_loadable_type(name)) or not(bFile.is_loadable_type(name)) :
LOG.warning(name + " is of a type that cannot be loaded using current file handling libraries included with Glance." +
" Skipping " + name + ".")
if not(aFile.is_loadable_type(tech_name)) or not(bFile.is_loadable_type(tech_b_name)) :
LOG.warning(explain_name + " is of a type that cannot be loaded using current file handling "
"libraries included with Glance. Skipping " + explain_name + ".")
continue
# load the variable data, filtering as needed
try :
aData = aFile[name]
bData = bFile[name]
aData = load_variable_data(aFile, tech_name,
dataFilter=requestedNames[display_name][FILTER_FUNCTION_A_KEY] if FILTER_FUNCTION_A_KEY in requestedNames[display_name] else None,
variableToFilterOn=requestedNames[display_name][VAR_FILTER_NAME_A_KEY] if VAR_FILTER_NAME_A_KEY in requestedNames[display_name] else None,
variableBasedFilter=requestedNames[display_name][VAR_FILTER_FUNCTION_A_KEY] if VAR_FILTER_FUNCTION_A_KEY in requestedNames[display_name] else None,
altVariableFileObject=dataobj.FileInfo(requestedNames[display_name][VAR_FILTER_ALT_FILE_A_KEY]).file_object if VAR_FILTER_ALT_FILE_A_KEY in requestedNames[display_name] else None,
fileDescriptionForDisplay="file A")
bData = load_variable_data(bFile, tech_b_name,
dataFilter=requestedNames[display_name][FILTER_FUNCTION_B_KEY] if FILTER_FUNCTION_B_KEY in requestedNames[display_name] else None,
variableToFilterOn=requestedNames[display_name][VAR_FILTER_NAME_B_KEY] if VAR_FILTER_NAME_B_KEY in requestedNames[display_name] else None,
variableBasedFilter=requestedNames[display_name][VAR_FILTER_FUNCTION_B_KEY] if VAR_FILTER_FUNCTION_B_KEY in requestedNames[display_name] else None,
altVariableFileObject=dataobj.FileInfo(requestedNames[display_name][VAR_FILTER_ALT_FILE_B_KEY]).file_object if VAR_FILTER_ALT_FILE_B_KEY in requestedNames[display_name] else None,
fileDescriptionForDisplay="file B")
except io.IONonnumericalTypeError as bad_data_error :
LOG.error("Skipping variable %s because it is of a non-numerical type "
"(may indicate array of variable-length strings): %s" % (name, repr(bad_data_error)))
"(may indicate array of variable-length strings): %s" % (explain_name, repr(bad_data_error)))
continue
if missing is None:
amiss = aFile.missing_value(name)
bmiss = bFile.missing_value(name)
else:
amiss,bmiss = missing,missing
LOG.debug('comparing %s with epsilon %s and missing %s,%s' % (name,epsilon,amiss,bmiss))
print('-'*32, file=output_channel,)
print(name, file=output_channel,)
print('', file=output_channel)
variable_stats = statistics.StatisticalAnalysis.withSimpleData(aData, bData, amiss, bmiss, epsilon=epsilon)
# figure out what fill values we're using to detect missing data
amiss = requestedNames[display_name][FILL_VALUE_KEY] if FILL_VALUE_KEY in requestedNames[display_name] \
else default_fillval
bmiss = requestedNames[display_name][FILL_VALUE_ALT_IN_B_KEY] if FILL_VALUE_ALT_IN_B_KEY in requestedNames[display_name] \
else amiss
# if we still don't have fill values, try to load them from the files
if amiss is None :
amiss = aFile.missing_value(tech_name)
if bmiss is None :
bmiss = bFile.missing_value(tech_b_name)
temp_epsilon = requestedNames[display_name][EPSILON_KEY] if EPSILON_KEY in requestedNames[display_name] \
else default_epsilon
LOG.debug('comparing %s with epsilon %s and missing %s,%s' % (explain_name,temp_epsilon,amiss,bmiss))
print('-'*32, file=output_channel,)
print(explain_name, file=output_channel,)
print('', file=output_channel,)
variable_stats = statistics.StatisticalAnalysis.withSimpleData(aData, bData, amiss, bmiss, epsilon=temp_epsilon)
# if we're doing pass/fail testing, do that now
if do_pass_fail :
if do_pass_fail : # TODO, this is probably not right now that we may have better defaults
tempDefaults = config_organizer.get_simple_variable_defaults()
didPass, _, _, _ = variable_stats.check_pass_or_fail(epsilon_failure_tolerance=epsilon_fail_tolerance,
......@@ -1288,15 +1356,15 @@ def stats_two_inputs_library_call(afn, bfn, var_list=None,
status_code = 3
LOG.debug("stats is returning status code: " + str(status_code))
return status_code
# note: if we aren't doing pass/fail, stats will not return anything
# note: if we aren't doing pass/fail, stats will return zero
return 0
def stats_one_input_library_call (afn, var_list=None, options_set=None, do_document=False,
output_channel=sys.stdout):
"""
this method handles the actual work of generating stats for a single input file and
can also be used as a library routine, simply pass in an output channel
and/or use the returned dictionary of statistics for your own form of
display.
"""
# set some values for defaults
......@@ -1707,6 +1775,11 @@ def main():
If you provide an output path, statistics will be saved to a stats.txt file in that directory.
If you provide a configuration file stats will attempt to use that file to configure the
variables and settings to use for your analysis. stats will ignore many configuration file
settings for things that are not relevant to it, like navigation information and plot
controlling settings.
Run with -v to get more detailed information about the statistics being reported.
Examples:
......@@ -1714,7 +1787,7 @@ def main():
glance stats A.hdf B.hdf
glance stats --epsilon=0.00001 C.nc B.hdf baseline_cmask_seviri_cloud_mask:0.002:
glance -w stats A.hdf D.h5 imager_prof_retr_abi_.*::-999 'nwp__.__index:0:'
glance stats A.nc cloud_top_height
glance stats -c config.py A.nc cloud_top_height
glance stats C.hdf cloud_top_*::-999.0
"""
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment