Skip to content
Snippets Groups Projects
Commit 306ae4e4 authored by Nick Bearson's avatar Nick Bearson
Browse files

move the handling of each minute into its own function - grid_minute() - which...

move the handling of each minute into its own function - grid_minute() - which can then be try/catch'd or potentially run in parallel
parent ed776a1a
No related branches found
No related tags found
1 merge request!28replace exits with raises in grid_setup, and remove the realtime check
......@@ -319,10 +319,90 @@ def add_gglm_attrs(netcdf_filename, input_filenames):
def alarm_handler(signum, frame):
raise OSError("Timeout exceeded!")
if __name__ == '__main__':
def grid_minute(minute, args):
signal.signal(signal.SIGALRM, alarm_handler)
signal.alarm(10*60) # timeout if we're not done after 10 minutes
signal.alarm(10*60) # timeout if we're not done gridding after 10 minutes
# set up temporary dir
tempdir_path = tempfile.mkdtemp(suffix=None, prefix="tmp-glm-grids-", dir=os.getcwd())
log.info("working in: {}".format(tempdir_path))
# clean our temporary dir on exit
atexit.register(shutil.rmtree, tempdir_path)
# grab all input files for this minute
minute_files = []
for f in args.filenames:
if glm_filename_to_minute(f) == minute:
minute_files.append(f)
minute_files = sorted(minute_files)
# do we have three input files for this minute?
if len(minute_files) != 3:
log.error("Minute {} only has {} input file(s). A gridded file will not be generated.".format(minute, len(minute_files)))
return
# do the gridding
gridder, glm_filenames, start_time, end_time, grid_kwargs = grid_setup(minute_files, args, work_dir=tempdir_path)
gridder_return = gridder(glm_filenames, start_time, end_time, **grid_kwargs)
gridded_files = []
for subgrid in gridder_return:
for gridded_file in subgrid[1]:
gridded_files.append(gridded_file)
# we need to add attributes here due to an issue where satpy (or its dependencies) are
# holding the input gridded file open until the process exits
for f in gridded_files:
add_gglm_attrs(f, glm_filenames)
# (optionally) do tiling
if args.create_tiles:
sector = get_goes_position(glm_filenames)
if sector == "east":
sector_id = "GOES_EAST"
elif sector == "west":
sector_id = "GOES_WEST"
else:
raise RuntimeError("could not determine sector_id")
from satpy import Scene
for gridded_file in gridded_files:
log.info("TILING: {}".format(gridded_files))
scn = Scene(reader='glm_l2', filenames=[gridded_file]) # n.b. satpy requires a list of filenames
scn.load([
'DQF',
'flash_extent_density',
'minimum_flash_area',
'total_energy',
])
scn.save_datasets(writer='awips_tiled',
template='glm_l2_radf',
sector_id=sector_id, # sector_id becomes an attribute in the output files and may be another legacy kind of thing. I'm not sure how much is is actually used here.
source_name="", # You could probably make source_name an empty string. I think it is required by the writer for legacy reasons but isn't actually used for the glm output
base_dir=tempdir_path, # base_dir is the output directory. I think blank is the same as current directory.
tile_size=(506, 904), # tile_size is set to the size of the GLMF sample tiles we were given and should match the full disk ABI tiles which is what they wanted
check_categories=False, # check_categories is there because of that issue I mentioned where DQF is all valid all the time so there is no way to detect empty tiles unless we ignore the "category" products
environment_prefix=args.system_environment_prefix_tiles,
compress=True)
# pick up output files from the tempdir
# output looks like: CG_GLM-L2-GLMC-M3_G17_T03_20200925160040.nc
log.debug("files in {}".format(tempdir_path))
log.debug(os.listdir(tempdir_path))
log.debug("moving output to {}".format(args.output_dir))
tiled_path = os.path.join(tempdir_path, '{}_GLM-L2-GLM*-M?_G??_T??_*.nc'.format(args.system_environment_prefix_tiles))
tiled_files = glob(tiled_path)
for f in tiled_files:
add_gglm_attrs(f, glm_filenames)
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
for f in gridded_files:
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
# tempdir cleans itself up via atexit, above
if __name__ == '__main__':
# freeze_support() # nb. I don't think this is needed as we're not making windows execs at this time
parser = create_parser()
args = parser.parse_args()
......@@ -367,12 +447,6 @@ if __name__ == '__main__':
# set up output dir
os.makedirs(args.output_dir, exist_ok=True)
# set up temporary dir
tempdir_path = tempfile.mkdtemp(suffix=None, prefix="tmp-glm-grids-", dir=os.getcwd())
log.info("working in: {}".format(tempdir_path))
# clean our temporary dir on exit
atexit.register(shutil.rmtree, tempdir_path)
minutes = []
for f in args.filenames:
m = glm_filename_to_minute(f)
......@@ -380,75 +454,4 @@ if __name__ == '__main__':
minutes.append(m)
for m in minutes:
# grab all input files for this minute
minute_files = []
for f in args.filenames:
if glm_filename_to_minute(f) == m:
minute_files.append(f)
minute_files = sorted(minute_files)
# do we have three input files for this minute?
if len(minute_files) != 3:
log.error("Minute {} only has {} input file(s). A gridded file will not be generated.".format(m, len(minute_files)))
continue
# do the gridding
gridder, glm_filenames, start_time, end_time, grid_kwargs = grid_setup(minute_files, args, work_dir=tempdir_path)
gridder_return = gridder(glm_filenames, start_time, end_time, **grid_kwargs)
gridded_files = []
for subgrid in gridder_return:
for gridded_file in subgrid[1]:
gridded_files.append(gridded_file)
# we need to add attributes here due to an issue where satpy (or its dependencies) are
# holding the input gridded file open until the process exits
for f in gridded_files:
add_gglm_attrs(f, glm_filenames)
# (optionally) do tiling
if args.create_tiles:
sector = get_goes_position(glm_filenames)
if sector == "east":
sector_id = "GOES_EAST"
elif sector == "west":
sector_id = "GOES_WEST"
else:
raise RuntimeError("could not determine sector_id")
from satpy import Scene
for gridded_file in gridded_files:
log.info("TILING: {}".format(gridded_files))
scn = Scene(reader='glm_l2', filenames=[gridded_file]) # n.b. satpy requires a list of filenames
scn.load([
'DQF',
'flash_extent_density',
'minimum_flash_area',
'total_energy',
])
scn.save_datasets(writer='awips_tiled',
template='glm_l2_radf',
sector_id=sector_id, # sector_id becomes an attribute in the output files and may be another legacy kind of thing. I'm not sure how much is is actually used here.
source_name="", # You could probably make source_name an empty string. I think it is required by the writer for legacy reasons but isn't actually used for the glm output
base_dir=tempdir_path, # base_dir is the output directory. I think blank is the same as current directory.
tile_size=(506, 904), # tile_size is set to the size of the GLMF sample tiles we were given and should match the full disk ABI tiles which is what they wanted
check_categories=False, # check_categories is there because of that issue I mentioned where DQF is all valid all the time so there is no way to detect empty tiles unless we ignore the "category" products
environment_prefix=args.system_environment_prefix_tiles,
compress=True)
# pick up output files from the tempdir
# output looks like: CG_GLM-L2-GLMC-M3_G17_T03_20200925160040.nc
log.debug("files in {}".format(tempdir_path))
log.debug(os.listdir(tempdir_path))
log.debug("moving output to {}".format(args.output_dir))
tiled_path = os.path.join(tempdir_path, '{}_GLM-L2-GLM*-M?_G??_T??_*.nc'.format(args.system_environment_prefix_tiles))
tiled_files = glob(tiled_path)
for f in tiled_files:
add_gglm_attrs(f, glm_filenames)
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
for f in gridded_files:
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
# tempdir cleans itself up via atexit, above
\ No newline at end of file
grid_minute(m, args)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment