Skip to content
Snippets Groups Projects
compare.py 107 KiB
Newer Older
(no author)'s avatar
(no author) committed
#!/usr/bin/env python
# encoding: utf-8
"""

Top-level routines to compare two files.


Created by rayg Apr 2009.
Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
"""

import os, sys, logging, datetime, glob, re
from urllib.parse import quote
(no author)'s avatar
(no author) committed

import locale
locale.setlocale(locale.LC_ALL,'') # Initialize our locale

# this is a hack to keep glance from needing pyqt unless you run the gui
        matplotlib.use('Qt5Agg')
        import glance.gui_controller as gui_control
    except ImportError :
        print ("*** Unable to import PyQt5. Please install PyQt5 and add it to your PYTHONPATH in order to use the Glance GUI. ***")
import glance.io     as io
import glance.data   as dataobj
import glance.report as reportModule
import glance.stats  as statistics
import glance.plotcreatefns as plotcreate
import glance.collocation   as collocation
import glance.config_organizer as config_organizer
from glance.util        import clean_path, rsync_or_copy_files, get_glance_version_string, get_run_identification_info, setup_dir_if_needed
from glance.load        import get_UV_info_from_magnitude_direction_info, load_variable_data, open_and_process_files, handle_lon_lat_info, handle_lon_lat_info_for_one_file, ValueErrorStringToFloat
from glance.lonlat_util import VariableComparisonError
from glance.constants   import *
from glance.gui_constants import A_CONST, B_CONST
def _get_all_commands_help_string (commands_dict, ) :
    """
    given the dictonary of commands, compose the string with brief information about all of them
    """

    to_return = "Available commands in Glance:\n"

    for command_name in commands_dict :
        short_desc = commands_dict[command_name].__doc__.split('\n')[0]
        to_return += "\t%-16s %s\n" % (command_name, short_desc)

    return to_return

def _get_possible_files_from_dir (dir_path) :
    """given a path to a directory, return all the paths to files we think we can open in that directory

    """

    # find all the appropriate files in a_path
    possible_extensions = io.get_acceptable_file_extensions()
    found_files = set()
    for filepath in glob.iglob(os.path.join(dir_path, "**"), recursive=True, ):
        ext_txt = filepath.split(".")[-1]

        if ext_txt in possible_extensions:
            found_files.add(filepath)

    return found_files

def _match_files_from_dirs (a_path, b_path, strip_expressions=None, ) :
    """given two paths to directories, try to match up the files we can analyze in them

    """

    if strip_expressions is None :
        strip_expressions = [ ]

    # find all the files in the a path we might be able to open
    found_a_files = _get_possible_files_from_dir(a_path)

    LOG.debug("Found " + str(len(found_a_files)) + " possible file(s) in the A directory: ")
    for filepath in found_a_files :
        LOG.debug(filepath)

    # TODO, when we get to python 3.9, we can use str.removeprefix but until then
    def _remove_prefix(text, prefix):
        if text.startswith(prefix):
            return text[len(prefix):]
        return None

    # test to see if there is a matching file in the b_path for each a_path file
    file_pairs = set()
    for a_filepath in found_a_files :
        inner_path = _remove_prefix(a_filepath, a_path)[1:] # for some reason this leaves a prefix / on the inner_path, so we need to remove that
        b_filepath = os.path.join(b_path, inner_path)
        if os.path.exists(b_filepath) :
            file_pairs.add((a_filepath, b_filepath,))
    """

    # find all the files in the b path we might be able to open
    found_b_files = _get_possible_files_from_dir(b_path)

    LOG.debug("Found " + str(len(found_b_files)) + " possible file(s) in the B directory: ")
    for filepath in found_a_files:
        LOG.debug(filepath)

    def strip_expressions_from_base (file_path, expressions,) :
        clean_name = os.path.basename(file_path)
        for expr in expressions :
            clean_name = re.sub(expr, '', clean_name)
        return clean_name

    # try to pair up our files if possible
    file_pairs = set()
    for a_filepath in found_a_files :
        clean_a = strip_expressions_from_base(a_filepath, strip_expressions,)
        for b_filepath in found_b_files :
            clean_b = strip_expressions_from_base(b_filepath, strip_expressions,)
            if clean_a == clean_b :
                file_pairs.add((a_filepath, b_filepath,))
# TODO, I'd like to move this into a different file at some point
def _get_name_info_for_variable (original_display_name, variable_run_info) :
    """
    based on the variable run info, figure out the various names for
    the variable and return them
    
    the various names are:
    
    technical_name -            the name the variable is listed under in the file
    b_variable_technical_name - the name the variable is listed under in the b file (may be the same as technical_name)
    explanation_name -          the more verbose name that will be shown to the user to identify the variable
    original_display_name -     the display name given by the user to describe the variable
    """
    
    # figure out the various name related info
    technical_name = variable_run_info[VARIABLE_TECH_NAME_KEY]
    explanation_name = technical_name # for now, will add to this later
    
    # if B has an alternate variable name, figure that out
    b_variable_technical_name = technical_name
    if VARIABLE_B_TECH_NAME_KEY in variable_run_info :
        b_variable_technical_name = variable_run_info[VARIABLE_B_TECH_NAME_KEY]
        # put both names in our explanation
        explanation_name = explanation_name + " / " + b_variable_technical_name
    
    # show both the display and current explanation names if they differ
    if not (original_display_name == explanation_name) :
        explanation_name = original_display_name + ' (' + explanation_name + ')'
    
    return technical_name, b_variable_technical_name, explanation_name

def colocateToFile_library_call(a_path, b_path, var_list=None,
                                options_set=None,
                                # todo, this doesn't yet do anything
                                do_document=False,
                                # todo, the output channel does nothing at the moment
                                output_channel=sys.stdout) :
    """
    this method handles the actual work of the colocateData command line tool
    and can be used as a library routine.
    
    TODO, properly document the options
    """

    # set some values for defaults
    var_list = [ ] if var_list is None else var_list
    options_set = { } if options_set is None else options_set

    # load the user settings from either the command line or a user defined config file
    pathsTemp, runInfo, defaultValues, requestedNames, usedConfigFile = config_organizer.load_config_or_options(a_path, b_path,
                                                                                                                options_set,
                                                                                                                requestedVars = var_list)
    setup_dir_if_needed(pathsTemp[OUT_FILE_KEY], "output")
    # make copies of the input files for colocation TODO, fix paths
    [pathsTemp[A_FILE_KEY], pathsTemp[B_FILE_KEY]] = rsync_or_copy_files ([pathsTemp[A_FILE_KEY], pathsTemp[B_FILE_KEY]],
                                                                          target_directory=pathsTemp[OUT_FILE_KEY],
                                                                          additionalFileNameSuffix='-collocated')
    # open the files
    LOG.info("Processing File A:")
    aFile = dataobj.FileInfo(pathsTemp[A_FILE_KEY], allowWrite=True)
Loading
Loading full blame...