Skip to content
Snippets Groups Projects

replace exits with raises in grid_setup, and remove the realtime check

Merged Nick Bearson requested to merge 20220712-realtime-fixes into master
@@ -45,6 +45,7 @@ import atexit
from glob import glob
import socket
import signal
import traceback
from netCDF4 import Dataset
#from multiprocessing import freeze_support # https://docs.python.org/2/library/multiprocessing.html#multiprocessing.freeze_support
from functools import partial
@@ -206,34 +207,20 @@ def grid_setup(glm_files, args, work_dir=os.getcwd()):
min_events = None
min_groups = None
if args.realtime:
if len(args.filenames) != 1:
log.error("realtime mode only accepts one input file")
exit(1)
glminfo = parse_glm_filename(os.path.basename(args.filenames[0]))
globstring = "{}_{}_{}_s{}*".format(glminfo[0], glminfo[1], glminfo[2], glminfo[3].strftime("%Y%j%H%M"))
fileglob = glob(os.path.join(os.path.dirname(args.filenames[0]), globstring))
if len(fileglob) != 3:
log.error("There are not (yet) three GLM files from this minute. This may be expected. Exiting.")
exit(0)
args.filenames = fileglob
if len(glm_files) != 3:
raise RuntimeError("expected three files for this minute, only received {}".format(len(glm_files)))
for f in args.filenames:
for f in glm_files:
if not os.path.exists(f):
log.error("Tried to grid file that does not exist: {}".format(f))
exit(1)
raise RuntimeError("Tried to grid file that does not exist: {}".format(f))
if "meso" in args.goes_sector and (args.ctr_lat == None or args.ctr_lon == None):
log.error("sector 'meso' requires --ctr-lat & --ctr-lon")
exit(1)
raise RuntimeError("sector 'meso' requires --ctr-lat & --ctr-lon")
try:
start_time, end_time = get_start_end(glm_files)
except:
log.error("Could not parse start & end times from one or more of the files provided:")
log.error(", ".join(glm_files))
exit(1)
raise RuntimeError("Could not parse start & end times from one or more of the files provided: {}".format(", ".join(glm_files)))
base_date = datetime(start_time.year, start_time.month, start_time.day)
proj_name = 'geos'
@@ -333,10 +320,90 @@ def add_gglm_attrs(netcdf_filename, input_filenames):
def alarm_handler(signum, frame):
raise OSError("Timeout exceeded!")
if __name__ == '__main__':
def grid_minute(minute, args):
signal.signal(signal.SIGALRM, alarm_handler)
signal.alarm(10*60) # timeout if we're not done after 10 minutes
signal.alarm(10*60) # timeout if we're not done gridding after 10 minutes
# set up temporary dir
tempdir_path = tempfile.mkdtemp(suffix=None, prefix="tmp-glm-grids-", dir=os.getcwd())
log.info("working in: {}".format(tempdir_path))
# clean our temporary dir on exit
atexit.register(shutil.rmtree, tempdir_path)
# grab all input files for this minute
minute_files = []
for f in args.filenames:
if glm_filename_to_minute(f) == minute:
minute_files.append(f)
minute_files = sorted(minute_files)
# do we have three input files for this minute?
if len(minute_files) != 3:
log.error("Minute {} only has {} input file(s). A gridded file will not be generated.".format(minute, len(minute_files)))
return
# do the gridding
gridder, glm_filenames, start_time, end_time, grid_kwargs = grid_setup(minute_files, args, work_dir=tempdir_path)
gridder_return = gridder(glm_filenames, start_time, end_time, **grid_kwargs)
gridded_files = []
for subgrid in gridder_return:
for gridded_file in subgrid[1]:
gridded_files.append(gridded_file)
# we need to add attributes here due to an issue where satpy (or its dependencies) are
# holding the input gridded file open until the process exits
for f in gridded_files:
add_gglm_attrs(f, glm_filenames)
# (optionally) do tiling
if args.create_tiles:
sector = get_goes_position(glm_filenames)
if sector == "east":
sector_id = "GOES_EAST"
elif sector == "west":
sector_id = "GOES_WEST"
else:
raise RuntimeError("could not determine sector_id")
from satpy import Scene
for gridded_file in gridded_files:
log.info("TILING: {}".format(gridded_files))
scn = Scene(reader='glm_l2', filenames=[gridded_file]) # n.b. satpy requires a list of filenames
scn.load([
'DQF',
'flash_extent_density',
'minimum_flash_area',
'total_energy',
])
scn.save_datasets(writer='awips_tiled',
template='glm_l2_radf',
sector_id=sector_id, # sector_id becomes an attribute in the output files and may be another legacy kind of thing. I'm not sure how much is is actually used here.
source_name="", # You could probably make source_name an empty string. I think it is required by the writer for legacy reasons but isn't actually used for the glm output
base_dir=tempdir_path, # base_dir is the output directory. I think blank is the same as current directory.
tile_size=(506, 904), # tile_size is set to the size of the GLMF sample tiles we were given and should match the full disk ABI tiles which is what they wanted
check_categories=False, # check_categories is there because of that issue I mentioned where DQF is all valid all the time so there is no way to detect empty tiles unless we ignore the "category" products
environment_prefix=args.system_environment_prefix_tiles,
compress=True)
# pick up output files from the tempdir
# output looks like: CG_GLM-L2-GLMC-M3_G17_T03_20200925160040.nc
log.debug("files in {}".format(tempdir_path))
log.debug(os.listdir(tempdir_path))
log.debug("moving output to {}".format(args.output_dir))
tiled_path = os.path.join(tempdir_path, '{}_GLM-L2-GLM*-M?_G??_T??_*.nc'.format(args.system_environment_prefix_tiles))
tiled_files = glob(tiled_path)
for f in tiled_files:
add_gglm_attrs(f, glm_filenames)
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
for f in gridded_files:
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
# tempdir cleans itself up via atexit, above
if __name__ == '__main__':
# freeze_support() # nb. I don't think this is needed as we're not making windows execs at this time
parser = create_parser()
args = parser.parse_args()
@@ -381,12 +448,6 @@ if __name__ == '__main__':
# set up output dir
os.makedirs(args.output_dir, exist_ok=True)
# set up temporary dir
tempdir_path = tempfile.mkdtemp(suffix=None, prefix="tmp-glm-grids-", dir=os.getcwd())
log.info("working in: {}".format(tempdir_path))
# clean our temporary dir on exit
atexit.register(shutil.rmtree, tempdir_path)
minutes = []
for f in args.filenames:
m = glm_filename_to_minute(f)
@@ -394,75 +455,10 @@ if __name__ == '__main__':
minutes.append(m)
for m in minutes:
# grab all input files for this minute
minute_files = []
for f in args.filenames:
if glm_filename_to_minute(f) == m:
minute_files.append(f)
minute_files = sorted(minute_files)
# do we have three input files for this minute?
if len(minute_files) != 3:
log.error("Minute {} only has {} input file(s). A gridded file will not be generated.".format(m, len(minute_files)))
continue
# do the gridding
gridder, glm_filenames, start_time, end_time, grid_kwargs = grid_setup(minute_files, args, work_dir=tempdir_path)
gridder_return = gridder(glm_filenames, start_time, end_time, **grid_kwargs)
gridded_files = []
for subgrid in gridder_return:
for gridded_file in subgrid[1]:
gridded_files.append(gridded_file)
# we need to add attributes here due to an issue where satpy (or its dependencies) are
# holding the input gridded file open until the process exits
for f in gridded_files:
add_gglm_attrs(f, glm_filenames)
# (optionally) do tiling
if args.create_tiles:
sector = get_goes_position(glm_filenames)
if sector == "east":
sector_id = "GOES_EAST"
elif sector == "west":
sector_id = "GOES_WEST"
else:
raise RuntimeError("could not determine sector_id")
from satpy import Scene
for gridded_file in gridded_files:
log.info("TILING: {}".format(gridded_files))
scn = Scene(reader='glm_l2', filenames=[gridded_file]) # n.b. satpy requires a list of filenames
scn.load([
'DQF',
'flash_extent_density',
'minimum_flash_area',
'total_energy',
])
scn.save_datasets(writer='awips_tiled',
template='glm_l2_radf',
sector_id=sector_id, # sector_id becomes an attribute in the output files and may be another legacy kind of thing. I'm not sure how much is is actually used here.
source_name="", # You could probably make source_name an empty string. I think it is required by the writer for legacy reasons but isn't actually used for the glm output
base_dir=tempdir_path, # base_dir is the output directory. I think blank is the same as current directory.
tile_size=(506, 904), # tile_size is set to the size of the GLMF sample tiles we were given and should match the full disk ABI tiles which is what they wanted
check_categories=False, # check_categories is there because of that issue I mentioned where DQF is all valid all the time so there is no way to detect empty tiles unless we ignore the "category" products
environment_prefix=args.system_environment_prefix_tiles,
compress=True)
# pick up output files from the tempdir
# output looks like: CG_GLM-L2-GLMC-M3_G17_T03_20200925160040.nc
log.debug("files in {}".format(tempdir_path))
log.debug(os.listdir(tempdir_path))
log.debug("moving output to {}".format(args.output_dir))
tiled_path = os.path.join(tempdir_path, '{}_GLM-L2-GLM*-M?_G??_T??_*.nc'.format(args.system_environment_prefix_tiles))
tiled_files = glob(tiled_path)
for f in tiled_files:
add_gglm_attrs(f, glm_filenames)
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
for f in gridded_files:
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
# tempdir cleans itself up via atexit, above
\ No newline at end of file
try:
grid_minute(m, args)
except Exception as e:
log.error(e)
log.debug(traceback.format_exc())
log.error(f"Gridding minute {m} failed.")
continue
\ No newline at end of file
Loading