Skip to content
Snippets Groups Projects
Commit b1f35a55 authored by Nick Bearson's avatar Nick Bearson
Browse files

Merge branch '5-handle-fewer-than-nominal-number-of-inputs-differently' into 'master'

group inputs by minute and process accordingly

See merge request !26
parents 840520fe d9e6fe95
No related branches found
Tags v1.0-20220512.1
1 merge request!26group inputs by minute and process accordingly
......@@ -118,6 +118,11 @@ def get_goes_position(filenames):
# we require that all files are from the same sensor and raise an exception if not
raise ValueError("could not determine GOES position - did you provide a mix of satellites?")
def glm_filename_to_minute(glm_filename):
glminfo = parse_glm_filename(os.path.basename(glm_filename))
ftime = glminfo[3]
ftime = ftime.replace(second = 0, microsecond=0)
return ftime
def get_start_end(filenames, start_time=None, end_time=None):
"""Compute start and end time of data based on filenames."""
......@@ -187,7 +192,7 @@ def get_outpath_base(args):
return dsname
def grid_setup(args, work_dir=os.getcwd()):
def grid_setup(glm_files, args, work_dir=os.getcwd()):
# When passed None for the minimum event or group counts, the gridder will skip
# the check, saving a bit of time.
min_events = None
......@@ -216,10 +221,10 @@ def grid_setup(args, work_dir=os.getcwd()):
exit(1)
try:
start_time, end_time = get_start_end(args.filenames)
start_time, end_time = get_start_end(glm_files)
except:
log.error("Could not parse start & end times from one or more of the files provided:")
log.error(", ".join(args.filenames))
log.error(", ".join(glm_files))
exit(1)
base_date = datetime(start_time.year, start_time.month, start_time.day)
......@@ -227,7 +232,7 @@ def grid_setup(args, work_dir=os.getcwd()):
outputpath = os.path.join(work_dir, get_outpath_base(args)) # GLMTools expects a template in addition to the path
goes_position = get_goes_position(args.filenames)
goes_position = get_goes_position(glm_files)
if "meso" in args.goes_sector:
view = "meso"
......@@ -296,7 +301,7 @@ def grid_setup(args, work_dir=os.getcwd()):
if (proj_name == 'pixel_grid') or (proj_name == 'geos'):
grid_kwargs['pixel_coords'] = fixed_grid
grid_kwargs['ellipse_rev'] = -1 # -1 (default) = infer from date in each GLM file
return gridder, args.filenames, start_time, end_time, grid_kwargs
return gridder, glm_files, start_time, end_time, grid_kwargs
def get_cspp_gglm_version():
......@@ -338,6 +343,33 @@ if __name__ == '__main__':
log.info("Starting GLM Gridding")
log.debug("Starting script with: %s", sys.argv)
# handle the realtime flag
if args.realtime:
if len(args.filenames) != 1:
log.error("realtime mode only accepts one input file")
exit(1)
glminfo = parse_glm_filename(os.path.basename(args.filenames[0]))
globstring = "{}_{}_{}_s{}*".format(glminfo[0], glminfo[1], glminfo[2], glminfo[3].strftime("%Y%j%H%M"))
fileglob = glob(os.path.join(os.path.dirname(args.filenames[0]), globstring))
if len(fileglob) != 3:
log.error("There are not (yet) three GLM files from this minute. This may be expected. Exiting.")
exit(0)
# this allows a user to use realtime mode to process a large directory of GLM without
# creating the same output file multiple times
if sorted(fileglob)[-1] != args.filenames[0]:
log.error("This is not the last file from this minute. Exiting.")
exit(0)
args.filenames = fileglob
# check that all of our inputs exist
for f in args.filenames:
if not os.path.exists(f):
log.error("Tried to grid file that does not exist: {}".format(f))
exit(1)
# set up output dir
os.makedirs(args.output_dir, exist_ok=True)
......@@ -347,63 +379,82 @@ if __name__ == '__main__':
# clean our temporary dir on exit
atexit.register(shutil.rmtree, tempdir_path)
# do the gridding
gridder, glm_filenames, start_time, end_time, grid_kwargs = grid_setup(args, work_dir=tempdir_path)
gridder_return = gridder(glm_filenames, start_time, end_time, **grid_kwargs)
gridded_files = []
for subgrid in gridder_return:
for gridded_file in subgrid[1]:
gridded_files.append(gridded_file)
# we need to add attributes here due to an issue where satpy (or its dependencies) are
# holding the input gridded file open until the process exits
for f in gridded_files:
add_gglm_attrs(f, glm_filenames)
# (optionally) do tiling
if args.create_tiles:
sector = get_goes_position(glm_filenames)
if sector == "east":
sector_id = "GOES_EAST"
elif sector == "west":
sector_id = "GOES_WEST"
else:
raise RuntimeError("could not determine sector_id")
from satpy import Scene
for gridded_file in gridded_files:
log.info("TILING: {}".format(gridded_files))
scn = Scene(reader='glm_l2', filenames=[gridded_file]) # n.b. satpy requires a list of filenames
scn.load([
'DQF',
'flash_extent_density',
'minimum_flash_area',
'total_energy',
])
scn.save_datasets(writer='awips_tiled',
template='glm_l2_radf',
sector_id=sector_id, # sector_id becomes an attribute in the output files and may be another legacy kind of thing. I'm not sure how much is is actually used here.
source_name="", # You could probably make source_name an empty string. I think it is required by the writer for legacy reasons but isn't actually used for the glm output
base_dir=tempdir_path, # base_dir is the output directory. I think blank is the same as current directory.
tile_size=(506, 904), # tile_size is set to the size of the GLMF sample tiles we were given and should match the full disk ABI tiles which is what they wanted
check_categories=False, # check_categories is there because of that issue I mentioned where DQF is all valid all the time so there is no way to detect empty tiles unless we ignore the "category" products
environment_prefix=args.system_environment_prefix_tiles,
compress=True)
# pick up output files from the tempdir
# output looks like: CG_GLM-L2-GLMC-M3_G17_T03_20200925160040.nc
log.debug("files in {}".format(tempdir_path))
log.debug(os.listdir(tempdir_path))
log.debug("moving output to {}".format(args.output_dir))
tiled_path = os.path.join(tempdir_path, '{}_GLM-L2-GLM*-M?_G??_T??_*.nc'.format(args.system_environment_prefix_tiles))
tiled_files = glob(tiled_path)
for f in tiled_files:
add_gglm_attrs(f, glm_filenames)
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
for f in gridded_files:
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
minutes = []
for f in args.filenames:
m = glm_filename_to_minute(f)
if m not in minutes:
minutes.append(m)
for m in minutes:
# grab all input files for this minute
minute_files = []
for f in args.filenames:
if glm_filename_to_minute(f) == m:
minute_files.append(f)
minute_files = sorted(minute_files)
# do we have three input files for this minute?
if len(minute_files) != 3:
log.error("Minute {} only has {} input file(s). A gridded file will not be generated.".format(m, len(minute_files)))
continue
# do the gridding
gridder, glm_filenames, start_time, end_time, grid_kwargs = grid_setup(minute_files, args, work_dir=tempdir_path)
gridder_return = gridder(glm_filenames, start_time, end_time, **grid_kwargs)
gridded_files = []
for subgrid in gridder_return:
for gridded_file in subgrid[1]:
gridded_files.append(gridded_file)
# we need to add attributes here due to an issue where satpy (or its dependencies) are
# holding the input gridded file open until the process exits
for f in gridded_files:
add_gglm_attrs(f, glm_filenames)
# (optionally) do tiling
if args.create_tiles:
sector = get_goes_position(glm_filenames)
if sector == "east":
sector_id = "GOES_EAST"
elif sector == "west":
sector_id = "GOES_WEST"
else:
raise RuntimeError("could not determine sector_id")
from satpy import Scene
for gridded_file in gridded_files:
log.info("TILING: {}".format(gridded_files))
scn = Scene(reader='glm_l2', filenames=[gridded_file]) # n.b. satpy requires a list of filenames
scn.load([
'DQF',
'flash_extent_density',
'minimum_flash_area',
'total_energy',
])
scn.save_datasets(writer='awips_tiled',
template='glm_l2_radf',
sector_id=sector_id, # sector_id becomes an attribute in the output files and may be another legacy kind of thing. I'm not sure how much is is actually used here.
source_name="", # You could probably make source_name an empty string. I think it is required by the writer for legacy reasons but isn't actually used for the glm output
base_dir=tempdir_path, # base_dir is the output directory. I think blank is the same as current directory.
tile_size=(506, 904), # tile_size is set to the size of the GLMF sample tiles we were given and should match the full disk ABI tiles which is what they wanted
check_categories=False, # check_categories is there because of that issue I mentioned where DQF is all valid all the time so there is no way to detect empty tiles unless we ignore the "category" products
environment_prefix=args.system_environment_prefix_tiles,
compress=True)
# pick up output files from the tempdir
# output looks like: CG_GLM-L2-GLMC-M3_G17_T03_20200925160040.nc
log.debug("files in {}".format(tempdir_path))
log.debug(os.listdir(tempdir_path))
log.debug("moving output to {}".format(args.output_dir))
tiled_path = os.path.join(tempdir_path, '{}_GLM-L2-GLM*-M?_G??_T??_*.nc'.format(args.system_environment_prefix_tiles))
tiled_files = glob(tiled_path)
for f in tiled_files:
add_gglm_attrs(f, glm_filenames)
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
for f in gridded_files:
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
# tempdir cleans itself up via atexit, above
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment