Merge branch 'nickb-add-satpy-PR' into 'master'

This PR grew a bit beyond just adding satpy to our buildbucket!

See merge request !4
parents 0a140142 9e25607f
......@@ -15,4 +15,16 @@ dependencies:
- scipy
- setuptools
- xarray
- zarr
\ No newline at end of file
- zarr
# satpy dependencies:
- configobj
- pykdtree
- pyresample
- trollsift
- trollimage
- shapely
- pip:
- git+
- git+
- git+
- git+
\ No newline at end of file
......@@ -25,9 +25,6 @@ fi
......@@ -53,20 +50,11 @@ which python
conda info -a
conda list --export
echo "Version specified: ${version}"
echo "lmatools reference: ${LMATOOLS_REF}"
echo "glmtools reference: ${GLMTOOLS_REF}"
# Turn on command printing here because we don't need all of the `conda`
# internal bash commands to pollute our output
set -x
# Install glmtools and related packages
# All dependencies should have been built with the buildbucket
# Careful: Could result in missing dependencies but we want reproducibility
pip install --no-deps git+${LMATOOLS_REF}
pip install --no-deps git+
pip install --no-deps git+${GLMTOOLS_REPOS}@${GLMTOOLS_REF}
# get the current packages files
# if the caller mounted the repository already then use that
......@@ -76,19 +64,20 @@ fi
# Build a tarball version of the current conda environment
# TODO: Add conda cleanup commands similar to what Polar2Grid uses to save space
conda clean -ay # remove unnecessary things from conda environment
conda pack --n-threads $(nproc) -n build -o ${conda_tb}
# Build up our package directory
mkdir -p ${pkg_name}
cd ${pkg_name}
mkdir -p bin opt/conda
mkdir -p bin libexec/python_runtime
# Copy package scripts/data to package directory
cp $GGLM_DIR/ ./
cp $GGLM_DIR/bin/* ./bin/
cp $GGLM_DIR/libexec/* ./libexec/
# Untar the tarball so we can put things where we want
tar -xz -C ./opt/conda -f ../${conda_tb}
tar -xz -C ./libexec/python_runtime -f ../${conda_tb}
# Go back to original work directory
cd ..
# CSPP Geo Gridded GLM
\ No newline at end of file
The following scripts are located in the `bin/` directory.
A simple wrapper around that takes a single input file and:
1) determines if it is the last file in a minute
2) if so, finds all files from that minute and passes them to
3) creates both grids and tiles for that minute
This script creates one minute grids from any number of input files.
A wrapper for the `` "kitchen sink" example script provided by glmtools.
\ No newline at end of file
......@@ -26,7 +26,7 @@ if [ -z "$CSPP_GEO_GGLM_HOME" ]; then
# Setup necessary environments
source $CSPP_GEO_GGLM_HOME/bin/
source $CSPP_GEO_GGLM_HOME/libexec/
# Call the python module to do the processing, passing all arguments
python3 $CSPP_GEO_GGLM_HOME/bin/ "$@"
python3 $CSPP_GEO_GGLM_HOME/libexec/ "$@"
#!/usr/bin/env bash
# encoding: utf-8
# Copyright (C) 2019 Space Science and Engineering Center (SSEC),
# University of Wisconsin-Madison.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <>.
# This file is part of the CSPP Geo Gridded GLM software package. CSPP Geo
# Gridded GLM takes GOES GLM Level 2 LCFA files and grids them to the ABI
# fixed grid. It does this using the open source glmtools python package by
# Eric Bruning.
if [ -z "$CSPP_GEO_GGLM_HOME" ]; then
export CSPP_GEO_GGLM_HOME="$( cd -P "$( dirname "${BASH_SOURCE[0]}" )" && cd .. && pwd )"
# Setup necessary environments
source $CSPP_GEO_GGLM_HOME/libexec/
# Call the python module to do the processing, passing all arguments
python3 $CSPP_GEO_GGLM_HOME/libexec/ "$@"
#!/usr/bin/env bash
# encoding: utf-8
if [ -z "$CSPP_GEO_GGLM_HOME" ]; then
export CSPP_GEO_GGLM_HOME="$( cd -P "$( dirname "${BASH_SOURCE[0]}" )" && cd .. && pwd )"
# Setup necessary environments
source $CSPP_GEO_GGLM_HOME/libexec/
# Call the python module to do the processing, passing all arguments
python3 $CSPP_GEO_GGLM_HOME/libexec/ "$@"
#!/usr/bin/env python3
import sys
import os
from glob import glob
from datetime import datetime
import subprocess
from import parse_glm_filename
if __name__ == '__main__':
# FIXME: handle args better if this goes beyond my testing
_, glmfile = sys.argv
# bad (start of period)
# glmfile = '/scratch/nickb/cspp-geo-grb-1.0/output/product/'
# good (end of period)
# glmfile = '/scratch/nickb/cspp-geo-grb-1.0/output/product/'
# glmfile = '/data/users/nickb/cspp-geo-gridded-glm/testing/2020-11-05/CLASS/'
# check that glmfile is actually a GLM file
glminfo = parse_glm_filename(os.path.basename(glmfile))
filename_starts = glminfo[3]
filename_ends = glminfo[4]
# if this isn't the last file of the minute, exit
if filename_starts.second != 40:
print("This is not the last GLM file from this minute. Exiting.")
# if this is the last file of the minute, grab all files of the minute (three)
# example trio:
globstring = "{}_{}_{}_s{}*".format(glminfo[0], glminfo[1], glminfo[2], glminfo[3].strftime("%Y%j%H%M"))
fileglob = glob(os.path.join(os.path.dirname(glmfile), globstring))
print("Gridding {} files".format(len(fileglob)))
# and run gridded glm a'la:
# \
# --goes-position auto --goes-sector full \
# --create-tiles \
# ./2020-11-05/CLASS/ \
# ./2020-11-05/CLASS/ \
# ./2020-11-05/CLASS/["",
"--goes-position", "auto",
"--goes-sector", "full",
fileglob[0], fileglob[1], fileglob[2]]
#!/usr/bin/env python3
# Based on
parse_desc = """Grid the past X minutes of GLM flash data, given a single input file.
import numpy as np
from datetime import datetime, timedelta
import os
import sys
import tempfile
import shutil
import atexit
from glob import glob
#from multiprocessing import freeze_support #
from functools import partial
from lmatools.grid.make_grids import write_cf_netcdf_latlon, write_cf_netcdf_noproj, write_cf_netcdf_fixedgrid
from lmatools.grid.make_grids import dlonlat_at_grid_center, grid_h5flashfiles
from glmtools.grid.make_grids import grid_GLM_flashes
from import parse_glm_filename
from lmatools.grid.fixed import get_GOESR_grid, get_GOESR_coordsys
import logging
log = logging.getLogger(__name__)
def create_parser():
import argparse
parser = argparse.ArgumentParser(description=parse_desc)
parser.add_argument('-v', '--verbose', dest='verbosity', action="count", default=0,
help='each occurrence increases verbosity 1 level through ERROR-WARNING-INFO-DEBUG (default INFO)')
parser.add_argument('-l', '--log', dest="log_fn", default=None,
help="specify the log filename")
# from Requirements: "Output is Gridded GLM in the native glmtools NetCDF4 format, with a user option to produce AWIPS-compatible NetCDF tiles as described below"
parser.add_argument('-o', '--output-dir', metavar='output directory',
parser.add_argument('--goes-sector', default="full",
help="One of [full|conus|meso]. "
"Requires goes_position. If sector is "
"meso, ctr_lon and ctr_lat are interpreted as "
"the ctr_x and ctr_y of the fixed grid")
parser.add_argument('--goes-position', default="auto",
help="One of [east|west|test|auto]. "
"Requires '--goes-sector'.")
parser.add_argument("-t", "--create-tiles", default=False, action='store_true',
help="create AWIPS-compatible tiles") # FIXME: improve this help text
parser.add_argument('--ctr-lat', metavar='latitude',
type=float, help='center latitude (required for meso)')
parser.add_argument('--ctr-lon', metavar='longitude',
type=float, help='center longitude (required for meso)')
# from Requirements: "Input is one or more GLM LCFA (L2+) files in mission standard format (nominally three 20-second input files)"
parser.add_argument(dest='filenames', metavar='filename', nargs='+')
return parser
old arguments for reference
FIXME: remove this whole comment once everything is working
parser.add_argument('--dx', metavar='km',
default=10.0, type=float,
help='approximate east-west grid spacing')
parser.add_argument('--dy', metavar='km',
default=10.0, type=float,
help='approximate north-south grid spacing')
parser.add_argument('--dt', metavar='seconds',
default=60.0, type=float,
help='frame duration')
parser.add_argument('--width', metavar='distance in km',
type=float, help='total width of the grid')
parser.add_argument('--height', metavar='distance in km',
type=float, help='total height of the grid')
parser.add_argument('--nevents', metavar='minimum events per flash',
type=int, dest='min_events', default=1,
help='minimum number of events per flash')
parser.add_argument('--ngroups', metavar='minimum groups per flash',
type=int, dest='min_groups', default=1,
help='minimum number of groups per flash')
parser.add_argument('--subdivide-grid', metavar='sqrt(number of subgrids)',
type=int, default=1,
help="subdivide the grid this many times along "
"each dimension")
def get_resolution(args):
closest_resln = 2.0 # hardcoding resolution to 2.0 for now. see nearest_resolution in make_glm_grids for how we could expose this if we change our minds.
resln = '{0:4.1f}km'.format(closest_resln).replace(' ', '')
return resln
# if provided "auto" position, we determine the sensor from the filename
def get_goes_position(filenames):
if all("_G16_" in f for f in filenames):
return "east"
if all("_G17_" in f for f in filenames):
return "west"
# we require that all files are from the same sensor and raise an exception if not
raise ValueError("position 'auto' but could not determine position - did you provide a mix of satellites?")
def get_start_end(filenames, start_time=None, end_time=None):
"""Compute start and end time of data based on filenames."""
base_filenames = [os.path.basename(p) for p in filenames]
filename_infos = [parse_glm_filename(f) for f in base_filenames]
# opsenv, algorithm, platform, start, end, created = parse_glm_filename(f)
filename_starts = [info[3] for info in filename_infos]
filename_ends = [info[4] for info in filename_infos]
start_time = min(filename_starts)
# Used to use max(filename_ends), but on 27 Oct 2020, the filename
# ends started to report the time of the last event in the file,
# causing a slight leakage (usually less than a second) into the
# next minute. This caused two minutes of grids to be produced for every
# three twenty second files passed to this script.
# Instead, we now assume every LCFA file is 20 s long, beginning with
# the start time. No doubt in the future we will see filenames that no
# longer start on an even minute boundary.
end_time = max(filename_starts) + timedelta(0, 20)
if start_time is None or end_time is None:
raise ValueError("Could not determine start/end time")
return start_time, end_time
def grid_setup(args, work_dir=os.getcwd()):
# When passed None for the minimum event or group counts, the gridder will skip
# the check, saving a bit of time.
min_events = None
min_groups = None
start_time, end_time = get_start_end(args.filenames)
except ValueError:
log.error("Non-standard filenames provided, use --start and --end to specify data times.")
base_date = datetime(start_time.year, start_time.month,
proj_name = 'geos'
outputpath = os.path.join(work_dir, "{dataset_name}") # GLMTools expects a template in addition to the path
if args.goes_position == "auto":
goes_position = get_goes_position(args.filenames)
goes_position = args.goes_position
resln = get_resolution(args)
view = get_GOESR_grid(position=goes_position,
nadir_lon = view['nadir_lon']
dx = dy = view['resolution']
nx, ny = view['pixelsEW'], view['pixelsNS']
geofixcs, grs80lla = get_GOESR_coordsys(sat_lon_nadir=nadir_lon)
if 'centerEW' in view:
x_ctr, y_ctr = view['centerEW'], view['centerNS']
elif args.goes_sector == 'meso':
# use ctr_lon, ctr_lat to get the center of the mesoscale FOV
x_ctr, y_ctr, z_ctr = geofixcs.fromECEF(
*grs80lla.toECEF(args.ctr_lon, args.ctr_lat, 0.0))
# FIXME: is it possible to get here? if so, what should happen?
raise RuntimeError
# Need to use +1 here to convert to xedge, yedge expected by gridder
# instead of the pixel centroids that will result in the final image
nx += 1
ny += 1
x_bnd = (np.arange(nx, dtype='float') - (nx) / 2.0) * dx + x_ctr + 0.5 * dx
y_bnd = (np.arange(ny, dtype='float') - (ny) / 2.0) * dy + y_ctr + 0.5 * dy
log.debug(("initial x,y_ctr", x_ctr, y_ctr))
log.debug(("initial x,y_bnd", x_bnd.shape, y_bnd.shape))
x_bnd = np.asarray([x_bnd.min(), x_bnd.max()])
y_bnd = np.asarray([y_bnd.min(), y_bnd.max()])
geofixcs, grs80lla = get_GOESR_coordsys(sat_lon_nadir=nadir_lon)
ctr_lon, ctr_lat, ctr_alt = grs80lla.fromECEF(
*geofixcs.toECEF(x_ctr, y_ctr, 0.0))
fixed_grid = geofixcs
log.debug((x_bnd, y_bnd, dx, dy, nx, ny))
output_writer = partial(write_cf_netcdf_fixedgrid, nadir_lon=nadir_lon)
gridder = grid_GLM_flashes
output_filename_prefix = 'GLM'
grid_kwargs = dict(proj_name=proj_name,
base_date=base_date, do_3d=False,
dx=dx, dy=dy, frame_interval=60.0,
x_bnd=x_bnd, y_bnd=y_bnd,
ctr_lat=ctr_lat, ctr_lon=ctr_lon, outpath=outputpath,
output_writer=output_writer, subdivide=1, # subdivide the grid this many times along each dimension
output_kwargs={'scale_and_offset': False},
#if args.fixed_grid:
# grid_kwargs['fixed_grid'] = True
# grid_kwargs['nadir_lon'] = nadir_lon
# if args.split_events:
grid_kwargs['clip_events'] = True
if min_groups is not None:
grid_kwargs['min_groups_per_flash'] = min_groups
grid_kwargs['energy_grids'] = ('total_energy',)
if (proj_name == 'pixel_grid') or (proj_name == 'geos'):
grid_kwargs['pixel_coords'] = fixed_grid
grid_kwargs['ellipse_rev'] = -1 # -1 (default) = infer from date in each GLM file
return gridder, args.filenames, start_time, end_time, grid_kwargs
if __name__ == '__main__':
# freeze_support() # nb. I don't think this is needed as we're not making windows execs at this time
parser = create_parser()
args = parser.parse_args()
# Configure logging
levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
logging.basicConfig(level=levels[min(3, args.verbosity)], filename=args.log_fn)
if levels[min(3, args.verbosity)] > logging.DEBUG:
import warnings
warnings.filterwarnings("ignore")"Starting GLM Gridding")
log.debug("Starting script with: %s", sys.argv)
# set up output dir
os.makedirs(args.output_dir, exist_ok=True)
# set up temporary dir
tempdir_path = tempfile.mkdtemp(suffix=None, prefix="tmp-glm-grids-", dir=os.getcwd())"working in: {}".format(tempdir_path))
# clean our temporary dir on exit
atexit.register(shutil.rmtree, tempdir_path)
# do the gridding
gridder, glm_filenames, start_time, end_time, grid_kwargs = grid_setup(args, work_dir=tempdir_path)
gridder(glm_filenames, start_time, end_time, **grid_kwargs)
# pick up gridded files from the tempdir
# output looks like:
log.debug("gridded files in {}".format(tempdir_path))
gridded_path = os.path.join(tempdir_path, 'OR_GLM-L2-GLM?-M?_G??_s*_e*_c*.nc')
gridded_files = glob(gridded_path)
# (optionally) do tiling
if args.create_tiles:
from satpy import Scene
for gridded_file in gridded_files:"TILING: {}".format(gridded_files))
scn = Scene(reader='glm_l2', filenames=[gridded_file]) # n.b. satpy requires a list of filenames
sector_id="GOES_EAST", # sector_id becomes an attribute in the output files and may be another legacy kind of thing. I'm not sure how much is is actually used here.
source_name="", # You could probably make source_name an empty string. I think it is required by the writer for legacy reasons but isn't actually used for the glm output
base_dir=tempdir_path, # base_dir is the output directory. I think blank is the same as current directory.
tile_size=(506, 904), # tile_size is set to the size of the GLMF sample tiles we were given and should match the full disk ABI tiles which is what they wanted
check_categories=False) # check_categories is there because of that issue I mentioned where DQF is all valid all the time so there is no way to detect empty tiles unless we ignore the "category" products
# pick up output files from the tempdir
# output looks like:
log.debug("files in {}".format(tempdir_path))
log.debug("moving output to {}".format(args.output_dir))
tiled_path = os.path.join(tempdir_path, 'OR_GLM-L2-GLM?-M?_G??_T??_*.nc')
tiled_files = glob(tiled_path)
for f in tiled_files:
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
for f in gridded_files:
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
# tempdir cleans itself up via atexit, above
\ No newline at end of file
......@@ -28,4 +28,4 @@ fi
\ No newline at end of file
export PATH=$CSPP_GEO_GGLM_HOME/bin:$CSPP_GEO_GGLM_HOME/libexec/python_runtime/bin:$PATH
\ No newline at end of file
