Skip to content
Snippets Groups Projects

group inputs by minute and process accordingly

1 file
+ 114
63
Compare changes
  • Side-by-side
  • Inline
@@ -118,6 +118,11 @@ def get_goes_position(filenames):
@@ -118,6 +118,11 @@ def get_goes_position(filenames):
# we require that all files are from the same sensor and raise an exception if not
# we require that all files are from the same sensor and raise an exception if not
raise ValueError("could not determine GOES position - did you provide a mix of satellites?")
raise ValueError("could not determine GOES position - did you provide a mix of satellites?")
 
def glm_filename_to_minute(glm_filename):
 
glminfo = parse_glm_filename(os.path.basename(glm_filename))
 
ftime = glminfo[3]
 
ftime = ftime.replace(second = 0, microsecond=0)
 
return ftime
def get_start_end(filenames, start_time=None, end_time=None):
def get_start_end(filenames, start_time=None, end_time=None):
"""Compute start and end time of data based on filenames."""
"""Compute start and end time of data based on filenames."""
@@ -187,7 +192,7 @@ def get_outpath_base(args):
@@ -187,7 +192,7 @@ def get_outpath_base(args):
return dsname
return dsname
def grid_setup(args, work_dir=os.getcwd()):
def grid_setup(glm_files, args, work_dir=os.getcwd()):
# When passed None for the minimum event or group counts, the gridder will skip
# When passed None for the minimum event or group counts, the gridder will skip
# the check, saving a bit of time.
# the check, saving a bit of time.
min_events = None
min_events = None
@@ -216,10 +221,10 @@ def grid_setup(args, work_dir=os.getcwd()):
@@ -216,10 +221,10 @@ def grid_setup(args, work_dir=os.getcwd()):
exit(1)
exit(1)
try:
try:
start_time, end_time = get_start_end(args.filenames)
start_time, end_time = get_start_end(glm_files)
except:
except:
log.error("Could not parse start & end times from one or more of the files provided:")
log.error("Could not parse start & end times from one or more of the files provided:")
log.error(", ".join(args.filenames))
log.error(", ".join(glm_files))
exit(1)
exit(1)
base_date = datetime(start_time.year, start_time.month, start_time.day)
base_date = datetime(start_time.year, start_time.month, start_time.day)
@@ -227,7 +232,7 @@ def grid_setup(args, work_dir=os.getcwd()):
@@ -227,7 +232,7 @@ def grid_setup(args, work_dir=os.getcwd()):
outputpath = os.path.join(work_dir, get_outpath_base(args)) # GLMTools expects a template in addition to the path
outputpath = os.path.join(work_dir, get_outpath_base(args)) # GLMTools expects a template in addition to the path
goes_position = get_goes_position(args.filenames)
goes_position = get_goes_position(glm_files)
if "meso" in args.goes_sector:
if "meso" in args.goes_sector:
view = "meso"
view = "meso"
@@ -296,7 +301,7 @@ def grid_setup(args, work_dir=os.getcwd()):
@@ -296,7 +301,7 @@ def grid_setup(args, work_dir=os.getcwd()):
if (proj_name == 'pixel_grid') or (proj_name == 'geos'):
if (proj_name == 'pixel_grid') or (proj_name == 'geos'):
grid_kwargs['pixel_coords'] = fixed_grid
grid_kwargs['pixel_coords'] = fixed_grid
grid_kwargs['ellipse_rev'] = -1 # -1 (default) = infer from date in each GLM file
grid_kwargs['ellipse_rev'] = -1 # -1 (default) = infer from date in each GLM file
return gridder, args.filenames, start_time, end_time, grid_kwargs
return gridder, glm_files, start_time, end_time, grid_kwargs
def get_cspp_gglm_version():
def get_cspp_gglm_version():
@@ -338,6 +343,33 @@ if __name__ == '__main__':
@@ -338,6 +343,33 @@ if __name__ == '__main__':
log.info("Starting GLM Gridding")
log.info("Starting GLM Gridding")
log.debug("Starting script with: %s", sys.argv)
log.debug("Starting script with: %s", sys.argv)
 
# handle the realtime flag
 
if args.realtime:
 
if len(args.filenames) != 1:
 
log.error("realtime mode only accepts one input file")
 
exit(1)
 
glminfo = parse_glm_filename(os.path.basename(args.filenames[0]))
 
 
globstring = "{}_{}_{}_s{}*".format(glminfo[0], glminfo[1], glminfo[2], glminfo[3].strftime("%Y%j%H%M"))
 
fileglob = glob(os.path.join(os.path.dirname(args.filenames[0]), globstring))
 
if len(fileglob) != 3:
 
log.error("There are not (yet) three GLM files from this minute. This may be expected. Exiting.")
 
exit(0)
 
 
# this allows a user to use realtime mode to process a large directory of GLM without
 
# creating the same output file multiple times
 
if sorted(fileglob)[-1] != args.filenames[0]:
 
log.error("This is not the last file from this minute. Exiting.")
 
exit(0)
 
 
args.filenames = fileglob
 
 
# check that all of our inputs exist
 
for f in args.filenames:
 
if not os.path.exists(f):
 
log.error("Tried to grid file that does not exist: {}".format(f))
 
exit(1)
 
# set up output dir
# set up output dir
os.makedirs(args.output_dir, exist_ok=True)
os.makedirs(args.output_dir, exist_ok=True)
@@ -347,63 +379,82 @@ if __name__ == '__main__':
@@ -347,63 +379,82 @@ if __name__ == '__main__':
# clean our temporary dir on exit
# clean our temporary dir on exit
atexit.register(shutil.rmtree, tempdir_path)
atexit.register(shutil.rmtree, tempdir_path)
# do the gridding
minutes = []
gridder, glm_filenames, start_time, end_time, grid_kwargs = grid_setup(args, work_dir=tempdir_path)
for f in args.filenames:
gridder_return = gridder(glm_filenames, start_time, end_time, **grid_kwargs)
m = glm_filename_to_minute(f)
if m not in minutes:
gridded_files = []
minutes.append(m)
for subgrid in gridder_return:
for gridded_file in subgrid[1]:
for m in minutes:
gridded_files.append(gridded_file)
# grab all input files for this minute
minute_files = []
# we need to add attributes here due to an issue where satpy (or its dependencies) are
for f in args.filenames:
# holding the input gridded file open until the process exits
if glm_filename_to_minute(f) == m:
for f in gridded_files:
minute_files.append(f)
add_gglm_attrs(f, glm_filenames)
minute_files = sorted(minute_files)
# (optionally) do tiling
# do we have three input files for this minute?
if args.create_tiles:
if len(minute_files) != 3:
log.error("Minute {} only has {} input file(s). A gridded file will not be generated.".format(m, len(minute_files)))
sector = get_goes_position(glm_filenames)
continue
if sector == "east":
sector_id = "GOES_EAST"
# do the gridding
elif sector == "west":
gridder, glm_filenames, start_time, end_time, grid_kwargs = grid_setup(minute_files, args, work_dir=tempdir_path)
sector_id = "GOES_WEST"
gridder_return = gridder(glm_filenames, start_time, end_time, **grid_kwargs)
else:
raise RuntimeError("could not determine sector_id")
gridded_files = []
for subgrid in gridder_return:
from satpy import Scene
for gridded_file in subgrid[1]:
for gridded_file in gridded_files:
gridded_files.append(gridded_file)
log.info("TILING: {}".format(gridded_files))
scn = Scene(reader='glm_l2', filenames=[gridded_file]) # n.b. satpy requires a list of filenames
# we need to add attributes here due to an issue where satpy (or its dependencies) are
scn.load([
# holding the input gridded file open until the process exits
'DQF',
for f in gridded_files:
'flash_extent_density',
add_gglm_attrs(f, glm_filenames)
'minimum_flash_area',
'total_energy',
# (optionally) do tiling
])
if args.create_tiles:
scn.save_datasets(writer='awips_tiled',
sector = get_goes_position(glm_filenames)
template='glm_l2_radf',
if sector == "east":
sector_id=sector_id, # sector_id becomes an attribute in the output files and may be another legacy kind of thing. I'm not sure how much is is actually used here.
sector_id = "GOES_EAST"
source_name="", # You could probably make source_name an empty string. I think it is required by the writer for legacy reasons but isn't actually used for the glm output
elif sector == "west":
base_dir=tempdir_path, # base_dir is the output directory. I think blank is the same as current directory.
sector_id = "GOES_WEST"
tile_size=(506, 904), # tile_size is set to the size of the GLMF sample tiles we were given and should match the full disk ABI tiles which is what they wanted
else:
check_categories=False, # check_categories is there because of that issue I mentioned where DQF is all valid all the time so there is no way to detect empty tiles unless we ignore the "category" products
raise RuntimeError("could not determine sector_id")
environment_prefix=args.system_environment_prefix_tiles,
compress=True)
from satpy import Scene
for gridded_file in gridded_files:
# pick up output files from the tempdir
log.info("TILING: {}".format(gridded_files))
# output looks like: CG_GLM-L2-GLMC-M3_G17_T03_20200925160040.nc
scn = Scene(reader='glm_l2', filenames=[gridded_file]) # n.b. satpy requires a list of filenames
log.debug("files in {}".format(tempdir_path))
scn.load([
log.debug(os.listdir(tempdir_path))
'DQF',
log.debug("moving output to {}".format(args.output_dir))
'flash_extent_density',
tiled_path = os.path.join(tempdir_path, '{}_GLM-L2-GLM*-M?_G??_T??_*.nc'.format(args.system_environment_prefix_tiles))
'minimum_flash_area',
tiled_files = glob(tiled_path)
'total_energy',
for f in tiled_files:
])
add_gglm_attrs(f, glm_filenames)
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
scn.save_datasets(writer='awips_tiled',
for f in gridded_files:
template='glm_l2_radf',
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
sector_id=sector_id, # sector_id becomes an attribute in the output files and may be another legacy kind of thing. I'm not sure how much is is actually used here.
 
source_name="", # You could probably make source_name an empty string. I think it is required by the writer for legacy reasons but isn't actually used for the glm output
 
base_dir=tempdir_path, # base_dir is the output directory. I think blank is the same as current directory.
 
tile_size=(506, 904), # tile_size is set to the size of the GLMF sample tiles we were given and should match the full disk ABI tiles which is what they wanted
 
check_categories=False, # check_categories is there because of that issue I mentioned where DQF is all valid all the time so there is no way to detect empty tiles unless we ignore the "category" products
 
environment_prefix=args.system_environment_prefix_tiles,
 
compress=True)
 
 
# pick up output files from the tempdir
 
# output looks like: CG_GLM-L2-GLMC-M3_G17_T03_20200925160040.nc
 
log.debug("files in {}".format(tempdir_path))
 
log.debug(os.listdir(tempdir_path))
 
log.debug("moving output to {}".format(args.output_dir))
 
tiled_path = os.path.join(tempdir_path, '{}_GLM-L2-GLM*-M?_G??_T??_*.nc'.format(args.system_environment_prefix_tiles))
 
tiled_files = glob(tiled_path)
 
for f in tiled_files:
 
add_gglm_attrs(f, glm_filenames)
 
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
 
for f in gridded_files:
 
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
# tempdir cleans itself up via atexit, above
# tempdir cleans itself up via atexit, above
 
\ No newline at end of file
Loading