#!/usr/bin/env python # encoding: utf-8 """ Handle parsing input from files. :author: Eva Schiffer (evas) :contact: evas@ssec.wisc.edu :organization: Space Science and Engineering Center (SSEC) :copyright: Copyright (c) 2014 University of Wisconsin SSEC. All rights reserved. :date: Jan 2014 :license: GNU GPLv3 :revision: $Id$ """ __docformat__ = "restructuredtext en" import sys, logging, os, re import numpy import stg.modis_guidebook as modis_guidebook import stg.modis_io as modis_io import stg.viirs_guidebook as viirs_guidebook import stg.viirs_io as viirs_io import stg.ctp_guidebook as ctp_guidebook import stg.ctp_io as ctp_io import stg.stg_util as stg_util from stg.constants import * from netCDF4 import Dataset LOG = logging.getLogger(__name__) # these are suffixes used for temporary files while space gridding EXPECTED_TEMP_SUFFIXES = [ "%s" + "-" + TEMP_SUFFIX_KEY, "%s" + "-" + DENSITY_SUFFIX + "-" + TEMP_SUFFIX_KEY, "%s" + "-" + NOBS_SUFFIX + "-" + TEMP_SUFFIX_KEY, ] # these are suffixes used for the final, packed files from space gridding EXPECTED_SPACE_OUT_SUFFIXES = [ "%s" + "-" + DAILY_SPACE_SUFFIX_KEY, "%s" + "-" + NOBS_SUFFIX + "-" + DAILY_SPACE_SUFFIX_KEY, ] # the strftime format for date stamping our files DATE_STAMP_FORMAT = "%Y%m%d" def open_file (file_path) : """ given a file path, open it """ file_object = None if modis_guidebook.is_MODIS_file(file_path) : file_object = modis_io.open_file(file_path) if viirs_guidebook.is_VIIRS_file(file_path) : file_object = viirs_io.open_file(file_path) if ctp_guidebook.is_CTP_file(file_path): file_object = ctp_io.open_file(file_path) return file_object def close_file (file_path, file_object) : """ given a file object, close it """ if modis_guidebook.is_MODIS_file(file_path) : modis_io.close_file(file_object) if viirs_guidebook.is_VIIRS_file(file_path) : viirs_io.close_file(file_object) if ctp_guidebook.is_CTP_file(file_path): ctp_io.close_file(file_object) def load_aux_data (file_path, minimum_scan_angle, file_object=None) : """ load the auxillary data for a given file """ temp_aux_data = None if modis_guidebook.is_MODIS_file(file_path) : file_object, temp_aux_data = modis_io.load_aux_data(file_path, minimum_scan_angle, file_object=file_object) if viirs_guidebook.is_VIIRS_file(file_path) : file_object, temp_aux_data = viirs_io.load_aux_data(file_path, minimum_scan_angle, file_object=file_object) if ctp_guidebook.is_CTP_file(file_path): file_object, temp_aux_data = ctp_io.load_aux_data(file_path, minimum_scan_angle, file_object=file_object) return file_object, temp_aux_data def get_expected_abstract_sets (instrument_constant, separate_day_night=True) : expected_data_sets = { } if instrument_constant == INST_MODIS : expected_data_sets = modis_io.get_abstract_data_sets (do_separate_day_night=separate_day_night) if instrument_constant == INST_VIIRS : expected_data_sets = viirs_io.get_abstract_data_sets(do_separate_day_night=separate_day_night) # FUTURE, needs a statement for ctp return expected_data_sets def get_expected_data_sets_from_aux_data (instrument_constant, aux_data, do_separate_day_night=True) : """given aux data in the form returned by load_aux_data and the grid degrees constant, return the data sets to be processed Each data set is defined by a constant name, a mask to select that set, it's expected suffixes for temporary density/nobs/data and it's expected suffix for the final output data/nobs """ expected_data_sets = { } if instrument_constant == INST_MODIS : expected_data_sets = modis_io.determine_data_sets(aux_data, do_separate_day_night=do_separate_day_night) if instrument_constant == INST_VIIRS : expected_data_sets = viirs_io.determine_data_sets(aux_data, do_separate_day_night=do_separate_day_night) # FUTURE, needs a statement for ctp return expected_data_sets def load_variable_from_file (variable_name, file_path=None, file_object=None, data_type_for_output=DEFAULT_DATA_TYPE) : """ load a given variable from a file path or file object """ temp_data = None if modis_guidebook.is_MODIS_file(file_path) : file_object, temp_data = modis_io.load_variable_from_file (variable_name, file_path=file_path, file_object=file_object, data_type_for_output=data_type_for_output) if viirs_guidebook.is_VIIRS_file(file_path) : file_object, temp_data = viirs_io.load_variable_from_file(variable_name, file_path=file_path, file_object=file_object, data_type_for_output=data_type_for_output) if ctp_guidebook.is_CTP_file(file_path) : file_object, temp_data = ctp_io.load_variable_from_file (variable_name, file_path=file_path, file_object=file_object, data_type_for_output=data_type_for_output) return file_object, temp_data def build_name_stem (variable_name, date_time=None, satellite=None, suffix=None) : """given information on what's in the file, build a file stem if there's extra info like the date time, satellite, algorithm name, or a suffix include that in the file stem as well the name format is: datestamp_satellite_variablename_suffix """ # the basic stem name is just the variable stem_name = variable_name # if we have a satellite, add that to the beginning stem_name = satellite + "_" + stem_name if satellite is not None else stem_name # if we have a date time, add a time stamp at the beginning stem_name = date_time.strftime(DATE_STAMP_FORMAT) + "_" + stem_name if date_time is not None else stem_name # if we have a suffix, add that too stem_name = stem_name + "_" + suffix if suffix is not None else stem_name return stem_name # date_stamp + "_" + var_name + suffix def get_datestamp (date_time_ob) : """given a date time object, return a date time string to use """ return date_time_ob.strftime(DATE_STAMP_FORMAT) if date_time_ob is not None else "" def parse_flatfile_name (flatfile_name) : """given a flat file name, parse out the things we expect from the stem The shape of the variable will also be returned as a convenience. Note: there is some ambiguity in the flat file names because some variables use "_", so this method is only guaranteed to work correctly when the date_time, satellite, and suffix were included in the original flat file name stem """ LOG.debug("Parsing flat file name: " + flatfile_name) var_name = None datetimestr = None satellite = None suffix = None var_shape = None file_type = None # strip off the shape and data type from the end temp_name = flatfile_name.split(".") if len(temp_name) >= 4 : #data_type = temp_name[1] # TODO, would it improve the algorithm to use this? var_shape = tuple(reversed(temp_name[2:])) temp_name = temp_name[0] # extract the name stem # parse the part of the name not related to the flat file structure _split = temp_name.split("_") if len(_split) >= 4 : # detect if there is a date on the front, if so, strip that try : int(_split[0]) datetimestr = _split[0] _split = _split[1:] except ValueError : LOG.debug("Unable to strip date from flat file name: " + flatfile_name) # detect if there is a satellite on the front, if so, strip that temp_sat = _split[0] if temp_sat in ALL_SATS : satellite = temp_sat _split = _split[1:] else : LOG.debug("Unable to identify satellite name from flat file name: " + flatfile_name) # detect if this is a space gridded file if _split[-1].find(DAILY_SPACE_SUFFIX_KEY) >= 0 : file_type = DAILY_SPACE_TYPE suffix = _split[-1] _split = _split[:-1] # detect if this is a daily time gridded file elif _split[-1].find(DAILY_TIME_SUFFIX_KEY) >= 0 : file_type = DAILY_TIME_TYPE suffix = _split[-1] _split = _split[:-1] # detect if this is a multi-day time gridded file elif _split[-1].find(MULTI_TIME_SUFFIX_KEY) >= 0 : file_type = MULTIDAY_TIME_TYPE suffix = _split[-1] _split = _split[:-1] # detect if this is a nobs look up table elif _split[-1].find(NOBS_LUT_SUFFIX) >= 0 : file_type = NOBS_LUT_TYPE suffix = _split[-1] _split = _split[:-1] else : LOG.debug("Unable to determine file type from suffix in flat file name: " + flatfile_name) # the remaining part of the name should be the variable name if len(_split) > 0 : var_name = "" for piece in _split : var_name = var_name + "_" + piece var_name = var_name[1:] # take off the first, unneeded underscore return file_type, var_name, datetimestr, satellite, suffix, var_shape def get_date_stamp_from_file_name (file_name) : """given a file name starting with a name stem created by build_name_stem, determine the date stamp for the file """ date_stamp_to_return = None # make sure we only have the stem stem = file_name.split('.')[0] # break the stem up by underscores split_stem = stem.split('_') for section in split_stem : if re.match(r'\d\d\d\d\d\d\d\d', section) : date_stamp_to_return = section return date_stamp_to_return def is_stg_output_file(file_name, specific_type=None) : """determine if the input file name is formatted like an stg output file if specific_type is given, return true only if the file is also of that type """ to_return = True # if this isn't a netcdf, then it isn't one of our files if not (file_name.endswith("." + NETCDF_SUFFIX)) : to_return = False # if the user asked for a specific type, check that if specific_type is not None : if file_name.find(specific_type) < 0 : to_return = False else : # if they didn't ask for a specific type, check for all our types good_type = False for file_type in ALL_STG_FILE_TYPES : if file_name.find(file_type) >= 0 : good_type = True to_return = to_return and good_type return to_return def sort_variable_names(file_object) : """organize the variable names by their categories and associate nobs with the right variables returns in the form {var name: {"cats": <category list string>, "nobs": the nobs variable name}} """ # get all the variable names all_var_names = file_object.variables.keys() # separate the variables into nobs and data variables data_var_names = set() nobs_var_names = set() for var_name in all_var_names : # check to see if this is an nobs file if var_name.endswith(NOBS_SUFFIX) : nobs_var_names.add(var_name) else : data_var_names.add(var_name) # sort the variables by any categories they have and match nobs with the variable it belongs to to_return = { } for var_name in data_var_names : expected_nobs_name = var_name + "_" + NOBS_SUFFIX if expected_nobs_name not in nobs_var_names : LOG.debug("Unable to find matching number of observations variable for variable (" + var_name + "). " + "This variable will not be processed.") else : # get the category information temp_cat_string = file_object.variables[var_name].categories if var_name not in to_return : to_return[var_name] = { } to_return[var_name] = {"cats": temp_cat_string, "nobs": expected_nobs_name} return to_return def create_netCDF_output_file (output_path, file_title, do_ovewrite=False) : """given an output path, create a netCDF file with the given title """ # set up the output directory if needed stg_util.setup_dir_if_needed(output_path, "output") # figure out the full output path with the file name out_file_path = os.path.abspath(os.path.expanduser(os.path.join(output_path, file_title + ".nc"))) # if the file we're creating already exists, make sure the caller wanted it overwritten if os.path.exists(out_file_path) and not do_ovewrite : LOG.error("Output file already exists: " + out_file_path) # create a blank nc file LOG.debug("Creating output file: " + out_file_path) out_file = None try : out_file = Dataset(out_file_path, mode='w', format='NETCDF4', clobber=not do_ovewrite) except Exception : LOG.critical("Unable to create output file ") return out_file def set_up_dimensions_and_global_attrs_in_netCDF (file_object, lat_size, lon_size, grid_type_constant, date_time_string, global_attrs={ }) : """build the standard dimensions and global attributes for this file """ # declare our lat and lon dimensions file_object.createDimension("latitude", lat_size) file_object.createDimension("longitude", lon_size) # create some global attributes setattr(file_object, "GriddingType", grid_type_constant) setattr(file_object, "DateTime", date_time_string) # if we have any additional global attrs, create those too for attr_key in global_attrs : setattr(file_object, attr_key, global_attrs[attr_key]) # create the lat/lon grids LOG.debug("Adding coordinate variable information to output file.") lat_array = numpy.linspace( -90.0, 90.0, lat_size) lon_array = numpy.linspace(-180.0, 180.0, lon_size+1)[0:-1] # since -180 and 180 are the same point, only include one of the two lat_data = numpy.array([lat_array,]*lon_size).transpose() lon_data = numpy.array([lon_array,]*lat_size) # create the one dimensional arrays of latitude and longitude values out_var_obj = file_object.createVariable("latitude", numpy.float32, ("latitude"), fill_value=numpy.nan) out_var_obj.set_auto_maskandscale(False) out_var_obj[:] = lat_array out_var_obj = file_object.createVariable("longitude", numpy.float32, ("longitude"), fill_value=numpy.nan) out_var_obj.set_auto_maskandscale(False) out_var_obj[:] = lon_array # create tiled 2D arrays of the latitude and longitude for ease of plotting out_var_obj = file_object.createVariable("latitude-grid", numpy.float32, ("latitude", "longitude"), fill_value=numpy.nan) out_var_obj.set_auto_maskandscale(False) out_var_obj[:] = lat_data out_var_obj = file_object.createVariable("longitude-grid", numpy.float32, ("latitude", "longitude"), fill_value=numpy.nan) out_var_obj.set_auto_maskandscale(False) out_var_obj[:] = lon_data return file_object def get_nc_latlon_sizes (file_object) : """given a file with attributes in the format that stg makes, get the lat and lon sizes """ # get the sizes from the file lat_size = len(file_object.dimensions["latitude"]) lon_size = len(file_object.dimensions["longitude"]) return lat_size, lon_size def add_variable_to_netCDF (file_object, var_name, data, fill_value=numpy.nan, suffix_list = [] ) : """add a variable with data and attrs to an already opened netCDF file object Note: It is assumed that the variable will be either 2D and in the shape (1, lat_size, lon_size) or 3D and in the shape (x, lat_size, lon_size). If it is 3D a "_count" dimension will be created for the first dimension of the variable. """ # pull lon/lat info for size comparisons lat_size, lon_size = get_nc_latlon_sizes(file_object) # figure out the full variable name we're going to use in the file out_variable_name = var_name #+ "_" + satellite suffix_str = "" for suffix in suffix_list : if isinstance(suffix, list) or isinstance(suffix, tuple) : temp_suffix = "" for part in suffix : temp_suffix += "_" + part temp_suffix = temp_suffix[1:] suffix = temp_suffix out_variable_name += "_" + suffix suffix_str += " " + suffix suffix_str = suffix_str[1:] # remove the very first space # do some checks on the data shape temp_shape = data.shape #print("temp shape: " + str(temp_shape)) #print("lat size: " + str(lat_size)) #print("lon size: " + str(lon_size)) third_dim = None if len(temp_shape) == 2 : assert (temp_shape[0] == lat_size) assert (temp_shape[1] == lon_size) elif len(temp_shape) == 3 : assert (temp_shape[1] == lat_size) assert (temp_shape[2] == lon_size) # if we need another dimension, generate that if temp_shape[0] > 1 : third_dim = out_variable_name + "_count" file_object.createDimension(third_dim, temp_shape[0]) else : data = data[0] # remove the empty first dimension else : LOG.error("Unexpected data shape: " + str(temp_shape)) # create the variable with the appropriate dimensions dims = (third_dim, "latitude", "longitude") if third_dim is not None else ("latitude", "longitude") out_var_obj = file_object.createVariable(out_variable_name, numpy.float32, dims, fill_value=fill_value) out_var_obj.set_auto_maskandscale(False) # set the variable attributes setattr(out_var_obj, "categories", suffix_str) setattr(out_var_obj, "originalVarName", var_name) # set the variable data out_var_obj[:] = data def main(): import optparse #from pprint import pprint usage = """ %prog [options] filename1.hdf """ parser = optparse.OptionParser(usage) parser.add_option('-v', '--verbose', dest='verbosity', action="count", default=0, help='each occurrence increases verbosity 1 level through ERROR-WARNING-INFO-DEBUG') parser.add_option('-r', '--no-read', dest='read_hdf', action='store_false', default=True, help="don't read or look for the hdf file, only analyze the filename") (options, args) = parser.parse_args() levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG] logging.basicConfig(level = levels[min(3, options.verbosity)]) LOG.info("Currently no command line tests are set up for this module.") if __name__ == '__main__': sys.exit(main())