aitf_ql.py

#!/usr/bin/env python
# encoding: utf-8
"""
aitf_ql.py

Purpose: Plot a dataset from an AIT Framework output file.

Minimum commandline:

    python aitf_ql.py  INPUTFILE

where...

    INPUTFILE: The fully qualified path to the AIT Framework PUG formatted output files.

Created by Eva Schiffer <eva.schiffer@ssec.wisc.edu> on 2017-01-31.
Copyright (c) 2017 University of Wisconsin Regents. All rights reserved.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""

import sys, logging, os, glob
import numpy

import matplotlib
matplotlib.use('Agg')

import plotting_util, data_util

import warnings

# every module should have a LOG object
LOG = logging.getLogger(__file__)

IMG_SIZE_MAX_INCHES = 10.0

def _argparse():
    """
    Method to encapsulate the option parsing and various setup tasks.
    """

    import argparse

    defaults = {
                 'input_files'         : None,
                 'stride'              : -1,
                 'llcrnrx'             : None,
                 'llcrnry'             : None,
                 'urcrnrx'             : None,
                 'urcrnry'             : None,
                 'image_size'          : [2000., 1600.],
                 'use_raw_data_range'  : False,
                 'custom_data_range'   : None,
                 'use_file_data_range' : False,
                 'do_no_conversions'   : False,
                 'only_var_name'       : None,
                 'unnavigated'         : False,
                 'dataonly'            : False,
                 'list_datasets'       : False,
                 'output_dir'          : './',
                 'output_file_prefix'  : '',
                }

    description = '''Create quicklooks plots from CSPP Geo NetCDF product file.'''

    usage = "usage: %prog [mandatory args] [options]"
    version = 'CSPP ABI Quicklooks v0.7'

    parser = argparse.ArgumentParser(
                                     description=description,
                                     )

    # Mandatory/positional arguments

    # the path to the file to plot from
    parser.add_argument(
                      action='store',
                      nargs='+',
                      dest='input_files',
                      type=str,
                      help='''Paths to one or more CSPP Geo NetCDF product files.
                       If a directory is given, the program will examine all files
                       in that directory to see if they are appropriate.
                       This argument will also expand meta-characters understood by python's
                       glob module (including *, ?, and +, among others). Please see
                       the documentation for glob for specific syntax.
                       '''
                      )

    # Optional arguments

    parser.add_argument('--image_size',
                      action="store",
                      dest="image_size",
                      default=defaults["image_size"],
                      type=float,
                      nargs=2,
                      metavar=('WIDTH', 'HEIGHT'),
                      help="""The size of the output image [*width*, *height*]
                      in pixels. User input will be rounded to the nearest 10 pixels.
                      [default: '{}']""".format(defaults["image_size"])
                      )

    parser.add_argument('-o','--output_dir',
                      action="store",
                      dest="output_dir",
                      default=defaults["output_dir"],
                      type=str,
                      help='''The directory where the generated quicklooks images will be put.
                      '''
                      )

    parser.add_argument('-O','--output_file_prefix',
                      action="store",
                      dest="output_file_prefix",
                      default=defaults["output_file_prefix"],
                      type=str,
                      help="""String to prepend to the automatically generated
                      png names. [default: {}]""".format(defaults["output_file_prefix"])
                      )

    parser.add_argument('-S','--stride',
                      action="store",
                      dest="stride",
                      default=defaults["stride"],
                      type=int,
                      help='''Sample every STRIDE rows and columns in the data,
                      where stride is specified as a positive integer.

                      By default or if you pass in a negative number the software will
                      generate a stride automatically to decrease your data
                      size to near the output image size (in pixels).

                      If you want to plot all the data, pass in 1 for the stride.
                      Warning: Plotting all the data can cause slow plotting and
                      high memory usage for large data sets.'''
                      )

    parser.add_argument('--customrange',
                        action="store",
                        dest="custom_data_range",
                        default=defaults["custom_data_range"],
                        type=str,
                        help='''Use a custom range for plotting quicklooks. The range should be
                                defined using the syntax min:max and will be applied to all
                                quicklooks generated during this run.

                                By default the quicklooks will be plotted with a colorbar using
                                custom range limits for some variables and the valid_range
                                attributes for other variables.

                                If --customrange and --rawrange are both present, the custom
                                range will be used.
                                If --customrange and --fileloadedrange are both present, the
                                custom range will be used.'''
                        )

    parser.add_argument('--rawrange',
                        action="store_true",
                        dest="use_raw_data_range",
                        default=defaults["use_raw_data_range"],
                        help='''Do not pay attention to the valid range or any flag information,
                                just display the colormap using the raw data range.

                                By default the quicklooks will be plotted with a colorbar using
                                custom range limits for some variables and the valid_range
                                attributes for other variables.

                                If --rawrange and --customrange are both present, the custom
                                range will be used.
                                If --rawrange and --fileloadedrange are both present, the raw
                                range will be used.'''
                        )

    parser.add_argument('--fileloadedrange',
                        action="store_true",
                        dest="use_file_data_range",
                        default=defaults["use_file_data_range"],
                        help='''Use the range data in the valid_range attribute given in the attributes
                                in the data file for plotting quicklooks.

                                By default the quicklooks will be plotted with a colorbar using
                                custom range limits for some variables and the valid_range
                                attributes for other variables.

                                If --customrange and --fileloadedrange are both present, the
                                custom range will be used.
                                if --rawrange and --fileloadedrange are both included in the command
                                line call, the raw range will be used. '''
                        )

    parser.add_argument('--no-convert',
                        action="store_true",
                        dest="do_no_conversions",
                        default=defaults["do_no_conversions"],
                        help='''Do not convert level one radiance data to reflectances or
                                brightness temperatures.'''
                        )

    parser.add_argument('--onlyvar',
                        action="store",
                        dest="only_var_name",
                        default=defaults["only_var_name"],
                        type=str,
                        help='''Process only the variable name given.
                                No other variables from the input files will be used
                                to make quicklooks. If your variable name has spaces
                                use single quotes around it.'''
                        )

    parser.add_argument('--unnavigated',
                      action="store_true",
                      dest="unnavigated",
                      default=defaults["unnavigated"],
                      help="Do not navigate the data on a map, just display it as an image."
                      )

    parser.add_argument('--dataonly',
                        action="store_true",
                        dest="dataonly",
                        default=defaults["dataonly"],
                        help="Plot the data at full resolution and nothing else. " +
                             "No labeling or navigation will appear in the output images."
                        )

    parser.add_argument("-v", "--verbosity",
                      dest='verbosity',
                      action="count",
                      default=-1,
                      help='''each occurrence increases verbosity 1 level.
                      If you do not include this argument the verbosity will default to INFO.
                      -v=ERROR -vv=WARNING -vvv=INFO -vvvv=DEBUG'''
                      )

    parser.add_argument('-V', '--version',
                      action='version',
                      version=version,
                      help='''Print the CSPP FW Quicklooks package version'''
                      )


    args = parser.parse_args()

    # Set up the logging
    levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
    lvl_desc = ["ERROR", "WARN", "INFO", "DEBUG",]
    which_idx_temp = args.verbosity if args.verbosity >= 0 else 2 # if they entered nothing, default to info
    which_idx_temp = 3 if which_idx_temp > 3 else which_idx_temp # if they had a zillion v's, just do debug
    level = levels[which_idx_temp]
    # even if we are turned up to debug, I do not want to see matplotlib's debug messages, because the font_manager writes novels
    if which_idx_temp >= 3 :
        mpl_logger = logging.getLogger('matplotlib')
        mpl_logger.setLevel(logging.WARN)
    # unless we are turned up to debug, suppress warnings on underlying libraries like matplotlib and numpy
    if which_idx_temp < 3 :
        warnings.filterwarnings("ignore")

    if level == logging.DEBUG :
        console_logFormat = '%(asctime)s.%(msecs)03d (%(levelname)s) : %(filename)s : %(funcName)s : %(lineno)d: %(message)s'
        date_format = '%Y-%m-%d %H:%M:%S'
    else:
        console_logFormat = '%(asctime)s.%(msecs)03d (%(levelname)s) : %(message)s'
        date_format = '%Y-%m-%d %H:%M:%S'

    logging.basicConfig(stream  = sys.stdout,
                        level   = level,
                        format  = console_logFormat,
                        datefmt = date_format)

    LOG.info("Logging level set to " + lvl_desc[which_idx_temp])

    return args,version

def _clean_path(string_path):
    """
    Return a clean form of the path without any '.', '..', or '~'
    """
    clean_path = None
    if string_path is not None:
        clean_path = os.path.abspath(os.path.expanduser(string_path))

    return clean_path

def _setup_dir_if_needed(dirPath, descriptionName) :
    """
    create the directory if that is needed, if not don't
    """
    if not (os.path.isdir(dirPath)) :
        LOG.info("Specified " + descriptionName + " directory (" + dirPath + ") does not exist.")
        LOG.info("Creating " + descriptionName + " directory.")
        os.makedirs(dirPath)

def _find_acceptable_paths_to_plot (input_paths) :
    """
    Given an input path to a file or a directory, determine if that file or any files in that directory are
    minimally acceptable to plot quicklooks for.

    :param input_paths: A list of paths to either a file to plot quicklooks for or a directory
    to find files to plot quicklooks for.
    :return: Either a list of paths and their types or None if we were unable to find any paths.
    """

    # the paths we think should be considered for whether they are the right file type
    paths_to_examine = []

    # For each input they gave us. Make sure it's not something that needs expansion.
    for temp_path in input_paths :

        glob_list = glob.glob(temp_path)

        # make sure to give a warning if glob thinks this path is garbage
        if glob_list is None or len(glob_list) <= 0 :
            LOG.warn("No files exist to match (" + temp_path + "). This path will not be processed.")

        for temp_glob_path in glob_list :

            # check that the input path is minimally what we expect (exists and is fully expanded)
            cleaned_path = _clean_path(temp_glob_path)
            if not os.path.exists(cleaned_path):
                LOG.warn("The input path (" + cleaned_path + ") does not exist. " +
                         "No quicklooks will be created for this path.")
            else :

                # if the path is a directory, get a list of all the files we may want to process in that directory
                if os.path.isdir(cleaned_path):
                    for inner_path in os.listdir(cleaned_path):
                        paths_to_examine.append(os.path.join(cleaned_path, inner_path))
                else:
                    paths_to_examine.append(cleaned_path)

    # go through the files we found and sort out which ones we recognize as files we can process
    paths_to_plot = []
    for curr_path in paths_to_examine:

        type_info, module_to_use = data_util.determine_file_type(os.path.basename(curr_path))
        if type_info is None or module_to_use is None :
            LOG.info("Input file (" + curr_path + ") does not match expected file types. " +
                     "No quicklooks will be generated for this file.")
        else:
            paths_to_plot.append((curr_path, type_info, module_to_use))

    return paths_to_plot

def _sort_input_files (list_of_input_paths) :
    """
    Given a list of input paths to consider, sort them into dictionaries by time and file type

    :param list_of_input_paths: A list of tuples with the file paths and their FW output types
    :return: A nested dictionary of the times and types of the data present in the list of paths.
    """

    # sort the files by date and time
    paths_by_time = {}
    for curr_path, type_info, module_to_use in list_of_input_paths :

        time_temp = data_util.get_start_datetime_from_file (curr_path)

        if time_temp is not None:
            time_key_temp = data_util.get_disp_datetime_str_from_datetime(time_temp)

            if time_key_temp not in paths_by_time:
                paths_by_time[time_key_temp] = {}

            if type_info not in paths_by_time[time_key_temp] :
                paths_by_time[time_key_temp][type_info] = [(curr_path, module_to_use), ]
            else :
                paths_by_time[time_key_temp][type_info].append((curr_path, module_to_use))

        else:
            LOG.info("Unable to read time information for file (" + curr_path + "). " +
                     "Unable to generate quicklooks for this file without time information.")

    return paths_by_time

def _save_pillow_image(var_data, invalid_mask, min_range, max_range, colormap_obj, output_path,) :
    """
    Given a numpy array and information about how it should look, use pillow to save an image of it.

    :param var_data: A numpy array of the variable data
    :param min_range: The minimum value we should be including in the shown data.
    :param max_range: The maximum value we should be including in the shown data.
    :param colormap_obj: The colormap to use.
    :param output_path: Where we should save the output image.
    :return:
    """

    LOG.debug("Attempting to save data-only version of this quicklook.")

    # make a copy of the data so we don't trash it
    copy_data = var_data.copy().astype(float)

    # deal with overflow values
    copy_data[copy_data < min_range] = min_range
    copy_data[copy_data > max_range] = max_range

    # normalize our array data to be 0.0 to 1.0
    copy_data -= min_range
    copy_data /= float(max_range - min_range)

    # if we have no data, just stop
    if len(copy_data[~invalid_mask]) <= 0 :
        LOG.debug("Could not make a data-only image with no data. No data-only image will be saved for this variable.")
        return

    # use the colormap to make our data into colors
    copy_data = colormap_obj(copy_data)

    # rescale into the 0 to 255 range and integer data
    copy_data = numpy.uint8(copy_data * 255)

    # fill in our background
    copy_data[invalid_mask] = 0 # this should make the background transparent

    # make and save the image with pillow
    import PIL
    image_obj = PIL.Image.fromarray(copy_data)
    image_obj.save(output_path)

def main():
    """
    The main method.
    """

    # Read in the options
    options, cspp_fw_version = _argparse()

    # create the output path if it doesn't exist
    out_path = _clean_path(options.output_dir)
    _setup_dir_if_needed(out_path, "output")

    # get a list of the paths we need to try to plot from
    paths_to_plot = _find_acceptable_paths_to_plot(options.input_files)
    if paths_to_plot is None or len(paths_to_plot) <= 0 :
        LOG.warn("No valid input file paths found to plot. No quicklooks will be plotted.")
        return 1

    # Figure out the image size in inches and the dpi we will use
    px_dims = options.image_size
    px_user_width, px_user_height  = px_dims
    height_in_px = int(round(px_user_height / 10.0) * 10.0) if px_user_height >= 100 else 100
    width_in_px  = int(round(px_user_width  / 10.0) * 10.0) if px_user_width  >= 100 else 100
    dpi_to_use = None
    if height_in_px > width_in_px :
        dpi_to_use = int(height_in_px / IMG_SIZE_MAX_INCHES)
    else : # the width is greater
        dpi_to_use = width_in_px  / IMG_SIZE_MAX_INCHES
    inch_dims = [(width_in_px / dpi_to_use), (height_in_px / dpi_to_use)]

    # sort our paths by datetime and type
    paths_by_time = _sort_input_files(paths_to_plot)

    # plot for each datetime
    for datetime_str in paths_by_time :

        stride_width = options.stride

        # plot the quicklooks for each type of file at this datetime
        for file_types_temp in paths_by_time[datetime_str] :

            # loop for all paths of a given type for a datetime
            for temp_path, module_to_use in paths_by_time[datetime_str][file_types_temp] :

                # if we need to add some additional text info for our plots, get that
                title_additional_str = module_to_use.get_additional_name_suffix(temp_path)
                sat_name = data_util.get_satellite_name(temp_path)
                version_msg = module_to_use.get_version_info(temp_path)
                version_msg = "Data created by " + version_msg + ". " if version_msg is not None else ""
                version_msg = version_msg + "Plotted using " + cspp_fw_version + "."

                LOG.info("-------------------------------------")
                LOG.info("Plotting quicklooks for file: " + temp_path)

                # get navigation information
                bounds, sat_ht, sat_lon0 = data_util.get_navigation_info_from_file(temp_path)

                vars_to_plot = module_to_use.get_expected_variables_to_plot_for_file(temp_path)

                # plot each variable in this file that needs a quicklook
                for variable_name in vars_to_plot :

                    # if the user requested only one variable name, make sure this variable matches that name
                    if options.only_var_name is None or (options.only_var_name.lower() == variable_name.lower() ) :

                        LOG.info ("Plotting variable: " + str(variable_name))

                        # get the variable data and information about it
                        variable_data, long_name_str, units_str, valid_range_info_from_file, flag_values, flag_names, type_change = \
                                        module_to_use.load_variable(temp_path,
                                                                    variable_name,
                                                                    no_conversions=options.do_no_conversions)
                        temp_variable_name = variable_name if type_change is None else type_change
                        colormap_obj, bk_color, scale_temp, range_override_temp = module_to_use.get_plot_settings_for_variable(temp_variable_name,
                                                                                                                               title_additional_str)

                        # figure out the stride width if we need to
                        if stride_width < 0 :
                            expected_output_size = px_dims
                            # Note: if we process arbitrary data in the future we might want to go back to max
                            #stride_width = int(max(variable_data.shape[0] / expected_output_size[0],
                            #                       variable_data.shape[1] / expected_output_size[1]))
                            stride_width = int(min(variable_data.shape[0] / expected_output_size[0],
                                                   variable_data.shape[1] / expected_output_size[1]))
                            stride_width = 1 if stride_width < 1 else stride_width
                            LOG.debug("Automatically selected stride width based on image size: " + str(stride_width))

                        # for now, use only some of the data for speed if it's large
                        if (stride_width > 1) and (not options.dataonly) :
                            LOG.debug("Applying stride (" + str(stride_width) + ") to reduce data size.")
                            mask_temp = variable_data.mask
                            variable_data = variable_data[0::stride_width, 0::stride_width,]
                            variable_data.mask = mask_temp[0::stride_width, 0::stride_width,]

                        # pull some things out for ease of calling the plotting functions
                        invalid_mask = variable_data.mask
                        sat_txt = "" if len(sat_name) <= 0 else sat_name + ", "
                        plot_title = sat_txt + long_name_str + title_additional_str + '\n' + " from " + datetime_str

                        # figure out what range we are going to be using
                        range_to_use = valid_range_info_from_file # use the valid range from the file if nothing else affects it
                        # if we have a custom data range, override the other possibilities
                        if options.custom_data_range is not None:
                            temp_limits = options.custom_data_range.split(":")
                            range_to_use = [float(x) for x in temp_limits]
                        # if the user wants the raw range, use that
                        elif options.use_raw_data_range :
                            range_to_use = None
                        # otherwise, use the range overrides unless the user wants just the ones from the file
                        elif not options.use_file_data_range :
                            range_to_use = range_override_temp if range_override_temp is not None else range_to_use

                        # plot an empty figure, a simple figure, or a mapped figure
                        figure_temp = None
                        if variable_data[~variable_data.mask].size <= 0 :

                            LOG.info("No non-fill data was found for this variable. An empty quicklook will be plotted for this variable.")
                            figure_temp = plotting_util.create_nodata_figure(plot_title,
                                                                             version_txt=version_msg,)

                        # if they want the image navigated onto a map
                        elif (not options.unnavigated) and (not options.dataonly) :

                            LOG.debug("Projection longitude, satellite height (as loaded from the file): " + str(sat_lon0) + ", " + str(sat_ht))

                            figure_temp = plotting_util.create_mapped_figure(variable_data,
                                                                             bounds,
                                                                             plot_title,
                                                                             invalidMask=invalid_mask,
                                                                             colorMap=colormap_obj,
                                                                             backColor=bk_color,
                                                                             units=units_str,
                                                                             valid_range=valid_range_info_from_file,
                                                                             range_to_use=range_to_use,
                                                                             scale_const=scale_temp,
                                                                             flag_values=flag_values,
                                                                             flag_names=flag_names,
                                                                             version_txt=version_msg,
                                                                             expected_longitude=sat_lon0,
                                                                             expected_sat_height=sat_ht,
                                                                            )

                        # if they want the full resolution image of the data without any labels of maps
                        elif options.dataonly :

                            _save_pillow_image(variable_data, invalid_mask, range_to_use[0], range_to_use[-1], colormap_obj,
                                               os.path.join(out_path,
                                                            options.output_file_prefix + os.path.basename(temp_path)
                                                            + "." + temp_variable_name + ".png"), )

                        # otherwise just show the image using matplotlib without map stuff
                        else :

                            figure_temp = plotting_util.create_simple_figure(variable_data,
                                                                             plot_title,
                                                                             invalidMask=invalid_mask,
                                                                             colorMap=colormap_obj,
                                                                             backColor=bk_color,
                                                                             units=units_str,
                                                                             valid_range=valid_range_info_from_file,
                                                                             range_to_use=range_to_use,
                                                                             scale_const=scale_temp,
                                                                             flag_values=flag_values,
                                                                             flag_names=flag_names,
                                                                             version_txt=version_msg,
                                                                             )

                        # if we have a figure, save it to the appropriate file
                        if figure_temp is not None :
                            # resize and save our figure to a file
                            figure_temp.set_size_inches(inch_dims[0], inch_dims[1])
                            figure_temp.savefig(os.path.join(out_path, options.output_file_prefix + os.path.basename(temp_path)
                                                             + "." +  temp_variable_name + ".png"), dpi=dpi_to_use)
                            plotting_util.close_figure(figure_temp)

                    else :
                        LOG.info("Variable " + variable_name + " does not match user requested variable ("
                                 + options.only_var_name + "). " + variable_name + " will not be plotted.")

    return 0

if __name__=='__main__':
    sys.exit(main())