Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Eva Schiffer
UW-Glance
Commits
e539b47b
Commit
e539b47b
authored
Dec 29, 2021
by
Eva Schiffer
Browse files
stats calls with one input file will now respect config files
parent
bd4120bf
Changes
1
Hide whitespace changes
Inline
Side-by-side
pyglance/glance/compare.py
View file @
e539b47b
...
...
@@ -1177,13 +1177,30 @@ def stats_two_inputs_library_call(afn, bfn, var_list=None,
can also be used as a library routine, simply pass in an output channel
and/or use the returned dictionary of statistics for your own form of
display.
TODO, should this move to a different file?
"""
# set some values for defaults
var_list
=
[]
if
var_list
is
None
else
var_list
var_list
=
[
'.*'
,
]
if
var_list
is
None
else
var_list
options_set
=
{}
if
options_set
is
None
else
options_set
"""
# load the user settings from either the command line or a user defined config file
if len(var_list) <= 0:
var_list = ['.*', ]
pathsTemp, runInfo, defaultValues, requestedNames, usedConfigFile =
\
config_organizer.load_config_or_options(afn,
bfn,
options_set,
requestedVars=var_list)
print("pathsTemp: " + str(pathsTemp))
print("runInfo: " + str(runInfo))
print("defaultValues: " + str(defaultValues))
print("requestedNames: " + str(requestedNames))
print("usedConfigFile: " + str(usedConfigFile))
print("options_set: " + str(options_set))
"""
# unpack some options
epsilon_val
=
options_set
[
EPSILON_KEY
]
missing_val
=
options_set
[
OPTIONS_FILL_VALUE_KEY
]
...
...
@@ -1273,55 +1290,105 @@ def stats_two_inputs_library_call(afn, bfn, var_list=None,
return
status_code
# note: if we aren't doing pass/fail, stats will not return anything
def
stats_one_input_library_call
(
afn
,
var_list
=
None
,
options_set
=
None
,
do_document
=
False
,
output_channel
=
sys
.
stdout
):
def
stats_one_input_library_call
(
afn
,
var_list
=
None
,
options_set
=
None
,
do_document
=
False
,
output_channel
=
sys
.
stdout
):
"""
this method handles the actual work of generating stats for a single input file and
can also be used as a library routine, simply pass in an output channel
and/or use the returned dictionary of statistics for your own form of
display.
TODO, should this move to a different file?
"""
# set some values for defaults
var_list
=
[
]
if
var_list
is
None
else
var_list
var_list
=
[
'.*'
,
]
if
var_list
is
None
else
var_list
options_set
=
{
}
if
options_set
is
None
else
options_set
# unpack some options
missing_val
=
options_set
[
OPTIONS_FILL_VALUE_KEY
]
LOG
.
debug
(
"file a: "
+
afn
)
# load the user settings from either the command line or a user defined config file
if
len
(
var_list
)
<=
0
:
var_list
=
[
'.*'
,]
pathsTemp
,
runInfo
,
defaultValues
,
requestedNames
,
usedConfigFile
=
\
config_organizer
.
load_config_or_options
(
afn
,
None
,
# there is no B path
options_set
,
requestedVars
=
var_list
)
# open the file
filesInfo
=
open_and_process_files
([
afn
])
aFile
=
filesInfo
[
afn
][
FILE_OBJECT_KEY
]
aFilePath
=
pathsTemp
[
A_FILE_KEY
]
LOG
.
debug
(
"Opening file A: "
+
aFilePath
)
filesInfo
=
open_and_process_files
([
aFilePath
])
aFile
=
filesInfo
[
aFilePath
][
FILE_OBJECT_KEY
]
# give info on the file and if we used a config file
print
(
'-'
*
32
,
file
=
output_channel
,
)
print
(
"File: "
+
aFilePath
,
file
=
output_channel
,
)
if
usedConfigFile
:
print
(
"Configuration file: "
+
options_set
[
OPTIONS_CONFIG_FILE_KEY
],
file
=
output_channel
,
)
# figure out the variable names and their individual settings
if
len
(
var_list
)
<=
0
:
var_list
=
[
'.*'
]
warn_missing
=
options_set
[
OPTIONS_WARN_MISSING_KEY
]
if
OPTIONS_WARN_MISSING_KEY
in
options_set
else
False
names
=
config_organizer
.
parse_varnames
(
filesInfo
[
COMMON_VAR_NAMES_KEY
],
var_list
,
epsilon
=
None
,
missing
=
missing_val
,
warn_unfound
=
warn_missing
,
)
LOG
.
debug
(
str
(
names
))
doc_each
=
do_document
and
len
(
names
)
==
1
doc_atend
=
do_document
and
len
(
names
)
!=
1
available_variable_names
=
filesInfo
[
COMMON_VAR_NAMES_KEY
]
default_missing_val
=
defaultValues
[
FILL_VALUE_KEY
]
if
FILL_VALUE_KEY
in
defaultValues
else
None
# if we have command line input, we still need to parse the variable names
if
not
usedConfigFile
:
temp_names
=
config_organizer
.
parse_varnames
(
available_variable_names
,
requestedNames
,
epsilon
=
None
,
missing
=
default_missing_val
,
warn_unfound
=
warn_missing
,
)
requestedNames
=
{
}
for
name
,
epsilon
,
missing
in
sorted
(
temp_names
,
key
=
lambda
X
:
X
[
0
])
:
requestedNames
[
name
]
=
{
VARIABLE_TECH_NAME_KEY
:
name
,
# note, since this is an inspection report, the epsilon is not needed
FILL_VALUE_KEY
:
missing
,
}
num_requested_vars
=
len
(
requestedNames
)
LOG
.
debug
(
"Trying to analyze variable names: "
+
str
(
requestedNames
))
for
name
,
epsilon
,
missing
in
sorted
(
names
,
key
=
lambda
X
:
X
[
0
]):
# are we going to put documentation of what the stats mean after each stat or only at the end?
doc_each
=
do_document
and
num_requested_vars
==
1
doc_atend
=
do_document
and
num_requested_vars
!=
1
# if we have no variables, something has gone wrong
if
len
(
requestedNames
)
<=
0
and
warn_missing
:
LOG
.
warning
(
"Unable to find any selected variables to analyze."
)
for
display_name
in
sorted
(
requestedNames
)
:
if
VARIABLE_TECH_NAME_KEY
not
in
requestedNames
[
display_name
]
:
LOG
.
warning
(
"Variable with display name of
\"
"
+
display_name
+
"
\"
has no technical name given. "
"This variable cannot be processed without a techincal name."
)
continue
tech_name
=
requestedNames
[
display_name
][
VARIABLE_TECH_NAME_KEY
]
explain_name
=
display_name
+
"("
+
tech_name
+
")"
if
display_name
!=
tech_name
else
display_name
if
tech_name
not
in
available_variable_names
:
if
warn_missing
:
LOG
.
warning
(
"Requested variable
\"
"
+
explain_name
+
"
\"
is not available in the file. "
"Unable to process this variable."
)
continue
# make sure that it's possible to load this variable
if
not
(
aFile
.
is_loadable_type
(
name
))
:
LOG
.
warning
(
name
+
" is of a type that cannot be loaded using current file handling libraries included with Glance."
+
" Skipping "
+
name
+
"."
)
if
not
(
aFile
.
is_loadable_type
(
tech_name
))
:
LOG
.
warning
(
"
\"
"
+
explain_name
+
"
\"
is of a type that cannot be loaded using current file handling "
"libraries included with Glance."
+
" Skipping "
+
explain_name
+
"."
)
continue
aData
=
aFile
[
name
]
amiss
=
missing
if
missing
is
None
:
amiss
=
aFile
.
missing_value
(
name
)
LOG
.
debug
(
'analyzing %s with missing data value %s'
%
(
name
,
amiss
))
# load the variable data, filtering as needed
aData
=
load_variable_data
(
aFile
,
tech_name
,
dataFilter
=
requestedNames
[
display_name
][
FILTER_FUNCTION_A_KEY
]
if
FILTER_FUNCTION_A_KEY
in
requestedNames
[
display_name
]
else
None
,
variableToFilterOn
=
requestedNames
[
display_name
][
VAR_FILTER_NAME_A_KEY
]
if
VAR_FILTER_NAME_A_KEY
in
requestedNames
[
display_name
]
else
None
,
variableBasedFilter
=
requestedNames
[
display_name
][
VAR_FILTER_FUNCTION_A_KEY
]
if
VAR_FILTER_FUNCTION_A_KEY
in
requestedNames
[
display_name
]
else
None
,
altVariableFileObject
=
dataobj
.
FileInfo
(
requestedNames
[
display_name
][
VAR_FILTER_ALT_FILE_A_KEY
]).
file_object
if
VAR_FILTER_ALT_FILE_A_KEY
in
requestedNames
[
display_name
]
else
None
,
fileDescriptionForDisplay
=
"file A"
)
amiss
=
requestedNames
[
display_name
][
FILL_VALUE_KEY
]
if
FILL_VALUE_KEY
in
requestedNames
[
display_name
]
else
default_missing_val
# if we still don't have a fill value, try to get it from the file
if
amiss
is
None
:
amiss
=
aFile
.
missing_value
(
tech_name
)
LOG
.
debug
(
'analyzing %s with missing data value %s'
%
(
explain_name
,
amiss
))
print
(
'-'
*
32
,
file
=
output_channel
,)
print
(
name
,
file
=
output_channel
,)
print
(
explain_name
,
file
=
output_channel
,)
print
(
''
,
file
=
output_channel
,)
variable_stats
=
statistics
.
StatisticalInspectionAnalysis
.
withSimpleData
(
aData
,
amiss
)
lal
=
list
(
variable_stats
.
dictionary_form
().
items
())
...
...
@@ -1678,6 +1745,12 @@ def main():
do_doc
=
(
options
.
verbose
or
options
.
debug
)
to_return
=
0
# if there are requested variables and a config file, warn the user that we're going to ignore their request
if
len
(
variables
)
>
0
and
tempOptions
[
OPTIONS_CONFIG_FILE_KEY
]
is
not
None
:
LOG
.
warning
(
"User requested specific variables ("
+
str
(
variables
)
+
") on the command line that will be "
"overridden by the requested configuration file. Requested variables from the command line will be ignored. "
"Please list your desired variables in your configuration file."
)
# if we were given an output path use that to create the stats
toPrintTo
=
sys
.
stdout
outpath
=
clean_path
(
options
.
outputpath
)
...
...
@@ -1714,6 +1787,7 @@ def main():
options_set
=
tempOptions
,
do_document
=
do_doc
,
output_channel
=
toPrintTo
)
# TODO handle the return code differently for pass/fail testing
# close our text output file if needed
if
fileForOutput
is
not
None
:
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment