From 306ae4e41039ceb829e7b83db59cbd174178dbe0 Mon Sep 17 00:00:00 2001 From: Nick Bearson <nickb@ssec.wisc.edu> Date: Fri, 22 Jul 2022 10:47:18 -0500 Subject: [PATCH] move the handling of each minute into its own function - grid_minute() - which can then be try/catch'd or potentially run in parallel --- gridded_glm/libexec/_minute_gridder.py | 163 +++++++++++++------------ 1 file changed, 83 insertions(+), 80 deletions(-) diff --git a/gridded_glm/libexec/_minute_gridder.py b/gridded_glm/libexec/_minute_gridder.py index c4a2622..faf3412 100644 --- a/gridded_glm/libexec/_minute_gridder.py +++ b/gridded_glm/libexec/_minute_gridder.py @@ -319,10 +319,90 @@ def add_gglm_attrs(netcdf_filename, input_filenames): def alarm_handler(signum, frame): raise OSError("Timeout exceeded!") -if __name__ == '__main__': +def grid_minute(minute, args): signal.signal(signal.SIGALRM, alarm_handler) - signal.alarm(10*60) # timeout if we're not done after 10 minutes + signal.alarm(10*60) # timeout if we're not done gridding after 10 minutes + # set up temporary dir + tempdir_path = tempfile.mkdtemp(suffix=None, prefix="tmp-glm-grids-", dir=os.getcwd()) + log.info("working in: {}".format(tempdir_path)) + # clean our temporary dir on exit + atexit.register(shutil.rmtree, tempdir_path) + + # grab all input files for this minute + minute_files = [] + for f in args.filenames: + if glm_filename_to_minute(f) == minute: + minute_files.append(f) + minute_files = sorted(minute_files) + + # do we have three input files for this minute? + if len(minute_files) != 3: + log.error("Minute {} only has {} input file(s). A gridded file will not be generated.".format(minute, len(minute_files))) + return + + # do the gridding + gridder, glm_filenames, start_time, end_time, grid_kwargs = grid_setup(minute_files, args, work_dir=tempdir_path) + gridder_return = gridder(glm_filenames, start_time, end_time, **grid_kwargs) + + gridded_files = [] + for subgrid in gridder_return: + for gridded_file in subgrid[1]: + gridded_files.append(gridded_file) + + # we need to add attributes here due to an issue where satpy (or its dependencies) are + # holding the input gridded file open until the process exits + for f in gridded_files: + add_gglm_attrs(f, glm_filenames) + + # (optionally) do tiling + if args.create_tiles: + + sector = get_goes_position(glm_filenames) + if sector == "east": + sector_id = "GOES_EAST" + elif sector == "west": + sector_id = "GOES_WEST" + else: + raise RuntimeError("could not determine sector_id") + + from satpy import Scene + for gridded_file in gridded_files: + log.info("TILING: {}".format(gridded_files)) + scn = Scene(reader='glm_l2', filenames=[gridded_file]) # n.b. satpy requires a list of filenames + scn.load([ + 'DQF', + 'flash_extent_density', + 'minimum_flash_area', + 'total_energy', + ]) + + scn.save_datasets(writer='awips_tiled', + template='glm_l2_radf', + sector_id=sector_id, # sector_id becomes an attribute in the output files and may be another legacy kind of thing. I'm not sure how much is is actually used here. + source_name="", # You could probably make source_name an empty string. I think it is required by the writer for legacy reasons but isn't actually used for the glm output + base_dir=tempdir_path, # base_dir is the output directory. I think blank is the same as current directory. + tile_size=(506, 904), # tile_size is set to the size of the GLMF sample tiles we were given and should match the full disk ABI tiles which is what they wanted + check_categories=False, # check_categories is there because of that issue I mentioned where DQF is all valid all the time so there is no way to detect empty tiles unless we ignore the "category" products + environment_prefix=args.system_environment_prefix_tiles, + compress=True) + + # pick up output files from the tempdir + # output looks like: CG_GLM-L2-GLMC-M3_G17_T03_20200925160040.nc + log.debug("files in {}".format(tempdir_path)) + log.debug(os.listdir(tempdir_path)) + log.debug("moving output to {}".format(args.output_dir)) + tiled_path = os.path.join(tempdir_path, '{}_GLM-L2-GLM*-M?_G??_T??_*.nc'.format(args.system_environment_prefix_tiles)) + tiled_files = glob(tiled_path) + for f in tiled_files: + add_gglm_attrs(f, glm_filenames) + shutil.move(f, os.path.join(args.output_dir, os.path.basename(f))) + for f in gridded_files: + shutil.move(f, os.path.join(args.output_dir, os.path.basename(f))) + + # tempdir cleans itself up via atexit, above + +if __name__ == '__main__': # freeze_support() # nb. I don't think this is needed as we're not making windows execs at this time parser = create_parser() args = parser.parse_args() @@ -367,12 +447,6 @@ if __name__ == '__main__': # set up output dir os.makedirs(args.output_dir, exist_ok=True) - # set up temporary dir - tempdir_path = tempfile.mkdtemp(suffix=None, prefix="tmp-glm-grids-", dir=os.getcwd()) - log.info("working in: {}".format(tempdir_path)) - # clean our temporary dir on exit - atexit.register(shutil.rmtree, tempdir_path) - minutes = [] for f in args.filenames: m = glm_filename_to_minute(f) @@ -380,75 +454,4 @@ if __name__ == '__main__': minutes.append(m) for m in minutes: - # grab all input files for this minute - minute_files = [] - for f in args.filenames: - if glm_filename_to_minute(f) == m: - minute_files.append(f) - minute_files = sorted(minute_files) - - # do we have three input files for this minute? - if len(minute_files) != 3: - log.error("Minute {} only has {} input file(s). A gridded file will not be generated.".format(m, len(minute_files))) - continue - - # do the gridding - gridder, glm_filenames, start_time, end_time, grid_kwargs = grid_setup(minute_files, args, work_dir=tempdir_path) - gridder_return = gridder(glm_filenames, start_time, end_time, **grid_kwargs) - - gridded_files = [] - for subgrid in gridder_return: - for gridded_file in subgrid[1]: - gridded_files.append(gridded_file) - - # we need to add attributes here due to an issue where satpy (or its dependencies) are - # holding the input gridded file open until the process exits - for f in gridded_files: - add_gglm_attrs(f, glm_filenames) - - # (optionally) do tiling - if args.create_tiles: - - sector = get_goes_position(glm_filenames) - if sector == "east": - sector_id = "GOES_EAST" - elif sector == "west": - sector_id = "GOES_WEST" - else: - raise RuntimeError("could not determine sector_id") - - from satpy import Scene - for gridded_file in gridded_files: - log.info("TILING: {}".format(gridded_files)) - scn = Scene(reader='glm_l2', filenames=[gridded_file]) # n.b. satpy requires a list of filenames - scn.load([ - 'DQF', - 'flash_extent_density', - 'minimum_flash_area', - 'total_energy', - ]) - - scn.save_datasets(writer='awips_tiled', - template='glm_l2_radf', - sector_id=sector_id, # sector_id becomes an attribute in the output files and may be another legacy kind of thing. I'm not sure how much is is actually used here. - source_name="", # You could probably make source_name an empty string. I think it is required by the writer for legacy reasons but isn't actually used for the glm output - base_dir=tempdir_path, # base_dir is the output directory. I think blank is the same as current directory. - tile_size=(506, 904), # tile_size is set to the size of the GLMF sample tiles we were given and should match the full disk ABI tiles which is what they wanted - check_categories=False, # check_categories is there because of that issue I mentioned where DQF is all valid all the time so there is no way to detect empty tiles unless we ignore the "category" products - environment_prefix=args.system_environment_prefix_tiles, - compress=True) - - # pick up output files from the tempdir - # output looks like: CG_GLM-L2-GLMC-M3_G17_T03_20200925160040.nc - log.debug("files in {}".format(tempdir_path)) - log.debug(os.listdir(tempdir_path)) - log.debug("moving output to {}".format(args.output_dir)) - tiled_path = os.path.join(tempdir_path, '{}_GLM-L2-GLM*-M?_G??_T??_*.nc'.format(args.system_environment_prefix_tiles)) - tiled_files = glob(tiled_path) - for f in tiled_files: - add_gglm_attrs(f, glm_filenames) - shutil.move(f, os.path.join(args.output_dir, os.path.basename(f))) - for f in gridded_files: - shutil.move(f, os.path.join(args.output_dir, os.path.basename(f))) - - # tempdir cleans itself up via atexit, above \ No newline at end of file + grid_minute(m, args) -- GitLab