From 306ae4e41039ceb829e7b83db59cbd174178dbe0 Mon Sep 17 00:00:00 2001
From: Nick Bearson <nickb@ssec.wisc.edu>
Date: Fri, 22 Jul 2022 10:47:18 -0500
Subject: [PATCH] move the handling of each minute into its own function -
 grid_minute() - which can then be try/catch'd or potentially run in parallel

---
 gridded_glm/libexec/_minute_gridder.py | 163 +++++++++++++------------
 1 file changed, 83 insertions(+), 80 deletions(-)

diff --git a/gridded_glm/libexec/_minute_gridder.py b/gridded_glm/libexec/_minute_gridder.py
index c4a2622..faf3412 100644
--- a/gridded_glm/libexec/_minute_gridder.py
+++ b/gridded_glm/libexec/_minute_gridder.py
@@ -319,10 +319,90 @@ def add_gglm_attrs(netcdf_filename, input_filenames):
 def alarm_handler(signum, frame):
   raise OSError("Timeout exceeded!")
 
-if __name__ == '__main__':
+def grid_minute(minute, args):
     signal.signal(signal.SIGALRM, alarm_handler)
-    signal.alarm(10*60) # timeout if we're not done after 10 minutes
+    signal.alarm(10*60) # timeout if we're not done gridding after 10 minutes
 
+    # set up temporary dir
+    tempdir_path = tempfile.mkdtemp(suffix=None, prefix="tmp-glm-grids-", dir=os.getcwd())
+    log.info("working in: {}".format(tempdir_path))
+    # clean our temporary dir on exit
+    atexit.register(shutil.rmtree, tempdir_path)
+
+    # grab all input files for this minute
+    minute_files = []
+    for f in args.filenames:
+        if glm_filename_to_minute(f) == minute:
+            minute_files.append(f)
+    minute_files = sorted(minute_files)
+
+    # do we have three input files for this minute?
+    if len(minute_files) != 3:
+        log.error("Minute {} only has {} input file(s). A gridded file will not be generated.".format(minute, len(minute_files)))
+        return
+
+    # do the gridding
+    gridder, glm_filenames, start_time, end_time, grid_kwargs = grid_setup(minute_files, args, work_dir=tempdir_path)
+    gridder_return = gridder(glm_filenames, start_time, end_time, **grid_kwargs)
+
+    gridded_files = []
+    for subgrid in gridder_return:
+        for gridded_file in subgrid[1]:
+            gridded_files.append(gridded_file)
+
+    # we need to add attributes here due to an issue where satpy (or its dependencies) are
+    # holding the input gridded file open until the process exits
+    for f in gridded_files:
+        add_gglm_attrs(f, glm_filenames)
+
+    # (optionally) do tiling
+    if args.create_tiles:
+
+        sector = get_goes_position(glm_filenames)
+        if sector == "east":
+            sector_id = "GOES_EAST"
+        elif sector == "west":
+            sector_id = "GOES_WEST"
+        else:
+            raise RuntimeError("could not determine sector_id")
+
+        from satpy import Scene
+        for gridded_file in gridded_files:
+            log.info("TILING: {}".format(gridded_files))
+            scn = Scene(reader='glm_l2', filenames=[gridded_file]) # n.b. satpy requires a list of filenames
+            scn.load([
+                'DQF',
+                'flash_extent_density',
+                'minimum_flash_area',
+                'total_energy',
+            ])
+
+            scn.save_datasets(writer='awips_tiled',
+                              template='glm_l2_radf', 
+                              sector_id=sector_id, # sector_id becomes an attribute in the output files and may be another legacy kind of thing. I'm not sure how much is is actually used here.
+                              source_name="", # You could probably make source_name an empty string. I think it is required by the writer for legacy reasons but isn't actually used for the glm output
+                              base_dir=tempdir_path, # base_dir is the output directory. I think blank is the same as current directory.
+                              tile_size=(506, 904), # tile_size is set to the size of the GLMF sample tiles we were given and should match the full disk ABI tiles which is what they wanted
+                              check_categories=False, # check_categories is there because of that issue I mentioned where DQF is all valid all the time so there is no way to detect empty tiles unless we ignore the "category" products
+                              environment_prefix=args.system_environment_prefix_tiles,
+                              compress=True)
+
+    # pick up output files from the tempdir
+    # output looks like: CG_GLM-L2-GLMC-M3_G17_T03_20200925160040.nc
+    log.debug("files in {}".format(tempdir_path))
+    log.debug(os.listdir(tempdir_path))
+    log.debug("moving output to {}".format(args.output_dir))
+    tiled_path = os.path.join(tempdir_path, '{}_GLM-L2-GLM*-M?_G??_T??_*.nc'.format(args.system_environment_prefix_tiles))
+    tiled_files = glob(tiled_path)
+    for f in tiled_files:
+        add_gglm_attrs(f, glm_filenames)
+        shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
+    for f in gridded_files:
+        shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
+
+    # tempdir cleans itself up via atexit, above
+
+if __name__ == '__main__':
 #    freeze_support() # nb. I don't think this is needed as we're not making windows execs at this time
     parser = create_parser()
     args = parser.parse_args()
@@ -367,12 +447,6 @@ if __name__ == '__main__':
     # set up output dir
     os.makedirs(args.output_dir, exist_ok=True)
 
-    # set up temporary dir
-    tempdir_path = tempfile.mkdtemp(suffix=None, prefix="tmp-glm-grids-", dir=os.getcwd())
-    log.info("working in: {}".format(tempdir_path))
-    # clean our temporary dir on exit
-    atexit.register(shutil.rmtree, tempdir_path)
-
     minutes = []
     for f in args.filenames:
         m = glm_filename_to_minute(f)
@@ -380,75 +454,4 @@ if __name__ == '__main__':
             minutes.append(m)
 
     for m in minutes:
-        # grab all input files for this minute
-        minute_files = []
-        for f in args.filenames:
-            if glm_filename_to_minute(f) == m:
-                minute_files.append(f)
-        minute_files = sorted(minute_files)
-
-        # do we have three input files for this minute?
-        if len(minute_files) != 3:
-            log.error("Minute {} only has {} input file(s). A gridded file will not be generated.".format(m, len(minute_files)))
-            continue
-
-        # do the gridding
-        gridder, glm_filenames, start_time, end_time, grid_kwargs = grid_setup(minute_files, args, work_dir=tempdir_path)
-        gridder_return = gridder(glm_filenames, start_time, end_time, **grid_kwargs)
-
-        gridded_files = []
-        for subgrid in gridder_return:
-            for gridded_file in subgrid[1]:
-                gridded_files.append(gridded_file)
-
-        # we need to add attributes here due to an issue where satpy (or its dependencies) are
-        # holding the input gridded file open until the process exits
-        for f in gridded_files:
-            add_gglm_attrs(f, glm_filenames)
-
-        # (optionally) do tiling
-        if args.create_tiles:
-
-            sector = get_goes_position(glm_filenames)
-            if sector == "east":
-                sector_id = "GOES_EAST"
-            elif sector == "west":
-                sector_id = "GOES_WEST"
-            else:
-                raise RuntimeError("could not determine sector_id")
-
-            from satpy import Scene
-            for gridded_file in gridded_files:
-                log.info("TILING: {}".format(gridded_files))
-                scn = Scene(reader='glm_l2', filenames=[gridded_file]) # n.b. satpy requires a list of filenames
-                scn.load([
-                    'DQF',
-                    'flash_extent_density',
-                    'minimum_flash_area',
-                    'total_energy',
-                ])
-
-                scn.save_datasets(writer='awips_tiled',
-                                  template='glm_l2_radf', 
-                                  sector_id=sector_id, # sector_id becomes an attribute in the output files and may be another legacy kind of thing. I'm not sure how much is is actually used here.
-                                  source_name="", # You could probably make source_name an empty string. I think it is required by the writer for legacy reasons but isn't actually used for the glm output
-                                  base_dir=tempdir_path, # base_dir is the output directory. I think blank is the same as current directory.
-                                  tile_size=(506, 904), # tile_size is set to the size of the GLMF sample tiles we were given and should match the full disk ABI tiles which is what they wanted
-                                  check_categories=False, # check_categories is there because of that issue I mentioned where DQF is all valid all the time so there is no way to detect empty tiles unless we ignore the "category" products
-                                  environment_prefix=args.system_environment_prefix_tiles,
-                                  compress=True)
-
-        # pick up output files from the tempdir
-        # output looks like: CG_GLM-L2-GLMC-M3_G17_T03_20200925160040.nc
-        log.debug("files in {}".format(tempdir_path))
-        log.debug(os.listdir(tempdir_path))
-        log.debug("moving output to {}".format(args.output_dir))
-        tiled_path = os.path.join(tempdir_path, '{}_GLM-L2-GLM*-M?_G??_T??_*.nc'.format(args.system_environment_prefix_tiles))
-        tiled_files = glob(tiled_path)
-        for f in tiled_files:
-            add_gglm_attrs(f, glm_filenames)
-            shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
-        for f in gridded_files:
-            shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
-
-    # tempdir cleans itself up via atexit, above
\ No newline at end of file
+        grid_minute(m, args)
-- 
GitLab