diff --git a/pyglance/glance/compare.py b/pyglance/glance/compare.py index 533e2d0e2bfa60f2077e38e6f534d30277e2f1dc..59a9d3ffceca82eb1cf5e543a1c1d7da0dc4debd 100644 --- a/pyglance/glance/compare.py +++ b/pyglance/glance/compare.py @@ -11,7 +11,7 @@ Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved. #from pprint import pprint, pformat -import os, sys, logging, re, datetime +import os, sys, logging, re, datetime, glob from numpy import * import numpy from urllib.parse import quote @@ -48,6 +48,56 @@ from glance.gui_constants import A_CONST, B_CONST LOG = logging.getLogger(__name__) +def _get_possible_files_from_dir (dir_path) : + """given a path to a directory, return all the paths to files we think we can open in that directory + + """ + + # find all the appropriate files in a_path + possible_extensions = io.get_acceptable_file_extensions() + found_files = set() + for filepath in glob.iglob(os.path.join(dir_path, "**"), recursive=True, ): + ext_txt = filepath.split(".")[-1] + + if ext_txt in possible_extensions: + found_files.add(filepath) + + return found_files + +def _match_files_from_dirs (a_path, b_path, ) : + """given two paths to directories, try to match up the files we can analyze in them + + """ + + # find all the files in the a path we might be able to open + found_a_files = _get_possible_files_from_dir(a_path) + + LOG.debug("Found " + str(len(found_a_files)) + " possible files in the A directory: ") + for filepath in found_a_files : + LOG.debug(filepath) + + # TODO, when we get to python 3.9, we can use str.removeprefix but until then + def _remove_prefix(text, prefix): + if text.startswith(prefix): + return text[len(prefix):] + return None + + # test to see if there is a matching file in the b_path for each a_path file + file_pairs = set() + for a_filepath in found_a_files : + inner_path = _remove_prefix(a_filepath, a_path) + b_filepath = os.path.join(b_path, inner_path) + if os.path.exists(b_filepath) : + file_pairs.add((a_filepath, b_filepath,)) + + # print out info on each resulting pair + #print ("Found " + str(len(file_pairs)) + " file pair(s): ") + #for a, b in file_pairs : + # print ("A: " + a) + # print ("B: " + b) + + return file_pairs + # TODO, I'd like to move this into a different file at some point def _get_name_info_for_variable(original_display_name, variable_run_info) : """ @@ -1520,7 +1570,117 @@ def main(): if len(args) >= 2: temp_controller.newFileSelected(B_CONST, args[1]) temp_controller.launch_gui() - + + def report (*args) : + """generate reports of various types depending on your input + + Depending on how many input file paths you give this command it will either generate comparison or + inspection reports. If you give it a directory instead of a file path it will search the underlying file + structure to find any appropriately typed files and attempt to create reports for each of them in the + output directory. If you include two directory paths it will attempt to automatically match files in the + same part of the sub-hierarchy with the same names and create comparison reports for them. + + If latitude and longitude data are present in the file(s) and specified in the call options, the + plots will be drawn on a map. The longitude and latitude variable names may be specified with + --longitude and --latitude command line options. If no longitude or latitude names are specified Glance + will attempt to use pixel_longitude and pixel_latitude. + + If you would rather plot the data without trying to place it on a map, use the --nolonlat option. + + The created reports and images will be saved at the provided output path in subdirectories named for + each variable analyzed. If no output path is provided, output will be saved in the current directory. + Created images will be embedded in the report or visible as separate .png files. + + Note: if you provided one or two directory paths and those paths included more than one set of files + that Glance is able to generate reports for, those reports will be placed in the output path in + separate temporarily directories. These directories are only labeled numerically at the current + time. In future we hope to have a summary report available for the run, but this does not currently + exist. + + If you would prefer to generate reports without images, use the --reportonly option. This option will + generate the html report but omit the images. This may be significantly faster, depending on your system, + but the differences between the files may be more difficult to interpret. + + Examples: + + glance inspect_report A.hdf variable_name_1:: variable_name_2 variable_name_3::missing3 variable_name_4::missing4 + glance --outputpath=/path/where/output/will/be/placed/ inspect_report C.nc + glance inspect_report --longitude=lon_variable_name --latitude=lat_variable_name D.h5 variable_name + glance inspect_report --reportonly A.hdf + """ + + # examine the args and see how many valid file paths we have + files = [ ] + variables = [ ] + for argument_val in args : + if os.path.exists(argument_val) : + LOG.debug("argument value is a file path: " + argument_val) + files.append(clean_path(argument_val)) + else : + LOG.debug("argument value is not an existing file path, it will be treated as a variable name: " + argument_val) + variables.append(argument_val) + + # if we have no file paths, just stop now + if len(files) < 1 : + LOG.warn("Expected at least one file path to input data. " + + "Unable to generate a report without a file path.") + return 1 + + # organize our command line options + tempOptions = config_organizer.convert_options_to_dict(options) + + # if we have one file path, do either one or many inspect reports + if len(files) <= 1 : + a_file_path = files[0] + # check to see if the file is a dir + if os.path.isdir(a_file_path) : + a_files_list = _get_possible_files_from_dir(a_file_path) + temp_offset = 0 + to_return = 0 + # run each of the reports, putting them in inner temp dirs + for file_path in a_files_list : + ops_copy = tempOptions.copy() + if len(a_files_list) > 1 : + ops_copy[OPTIONS_OUTPUT_PATH_KEY] = os.path.join(ops_copy[OPTIONS_OUTPUT_PATH_KEY], "tmp" + str(temp_offset)) + temp_offset += 1 + to_return += inspect_library_call(file_path, variables, ops_copy, ) + return to_return + else : # in this case we just have a regular file, so run one inspect report + return inspect_library_call(a_file_path, variables, tempOptions, ) + + # this doesn't seem super likely, but just in case, let's at least give a warning so the user has some idea why we ignored some paths + if len(files) > 2: + LOG.warn("More than two file paths were found in your command line input. " + "Only the first two will be used. The rest will be discarded.") + + # if we have two file paths, do either one or many comparison reports + if len(files) >= 2 : + a_file_path = files[0] + b_file_path = files[1] + # check to see if the paths are dirs + if os.path.isdir(a_file_path) and os.path.isdir(b_file_path) : + file_pairs = _match_files_from_dirs(a_file_path, b_file_path) + temp_offset = 0 + to_return = 0 + # run each of the reports, putting them in inner temp dirs + for single_a_file, single_b_file in file_pairs : + ops_copy = tempOptions.copy() + if len(file_pairs) > 1 : + ops_copy[OPTIONS_OUTPUT_PATH_KEY] = os.path.join(ops_copy[OPTIONS_OUTPUT_PATH_KEY], "tmp" + str(temp_offset)) + temp_offset += 1 + to_return += reportGen_library_call(single_a_file, single_b_file, variables, ops_copy, ) + return to_return + # if both the paths are regular files, just run one report + elif os.path.isfile(a_file_path) and os.path.isfile(b_file_path) : + return reportGen_library_call(a_file_path, b_file_path, variables, tempOptions, ) + else : + LOG.error("You have provided one directory path and one file path. " + "Please input two directories or two files, not a mixture.") + return 1 + + # if we got to here, something has gone terribly wrong + return 1 + def help(command=None): """print help for a specific command or list of commands e.g. help stats diff --git a/pyglance/glance/io.py b/pyglance/glance/io.py index cbdbeed6891a90196fa107b6b111df7961ef0b21..eed4437ca727495a6f51ea264bfb12275b9caa9d 100644 --- a/pyglance/glance/io.py +++ b/pyglance/glance/io.py @@ -13,9 +13,12 @@ from functools import reduce LOG = logging.getLogger(__name__) +Loadable_Types = set() + try: import pyhdf from pyhdf.SD import SD,SDC, SDS, HDF4Error + Loadable_Types.add("hdf") except: LOG.info('no pyhdf module available for HDF4') pyhdf = None @@ -25,6 +28,7 @@ except: try: import h5py from h5py import h5d + Loadable_Types.add("h5") except ImportError: LOG.info('no h5py module available for reading HDF5') h5py = None @@ -32,6 +36,7 @@ except ImportError: # the newer netCDF library that replaced pycdf try: import netCDF4 + Loadable_Types.update(["nc", "nc4", "cdf", ]) except: LOG.info("unable to import netcdf4 library") netCDF4 = None @@ -39,6 +44,7 @@ except: try: import dmv as dmvlib LOG.info('loaded dmv module for AERI data file access') + Loadable_Types.update(["cxs", "rnc", "cxv", "csv", "spc", "sum", "uvs", "aeri", ]) except ImportError: LOG.info('no AERI dmv data file format module') dmvlib = None @@ -46,6 +52,7 @@ except ImportError: try: import adl_blob LOG.info('adl_blob module found for JPSS ADL data file access') + # TODO, what is the loadable file extension? except ImportError: LOG.info('no adl_blob format handler available') adl_blob = None @@ -53,6 +60,7 @@ except ImportError: try : from osgeo import gdal LOG.info('loading osgeo module for GeoTIFF data file access') + Loadable_Types.update(["tiff", "tif", "tifa", ]) except : LOG.info('no osgeo available for reading GeoTIFF data files') gdal = None @@ -1429,6 +1437,12 @@ def open(pathname, allowWrite=False): cls = globals()[suffix] return cls(pathname, allowWrite=allowWrite) +def get_acceptable_file_extensions ( ) : + """ + Get a list of file extensions this module thinks it can open with the currently installed libraries + """ + return Loadable_Types.copy() + if __name__=='__main__': import doctest doctest.testmod()