Skip to content
Snippets Groups Projects
Commit 1e1de04c authored by Nick Bearson's avatar Nick Bearson
Browse files

Merge branch 'nickb-add-satpy-PR' into 'master'

This PR grew a bit beyond just adding satpy to our buildbucket!

See merge request !4
parents 0a140142 9e25607f
No related branches found
No related tags found
1 merge request!4add satpy to our buildbucket
......@@ -15,4 +15,16 @@ dependencies:
- scipy
- setuptools
- xarray
- zarr
\ No newline at end of file
- zarr
# satpy dependencies:
- configobj
- pykdtree
- pyresample
- trollsift
- trollimage
- shapely
- pip:
- git+https://github.com/deeplycloudy/lmatools.git@minvaluegrids
- git+https://github.com/deeplycloudy/stormdrain.git
- git+https://github.com/deeplycloudy/glmtools.git@master
- git+https://github.com/pytroll/satpy.git@master
\ No newline at end of file
......@@ -25,9 +25,6 @@ fi
pkg_name=cspp-geo-gridded-glm-${version}
DIST=${DIST:-"/dock"}
LMATOOLS_REF=${LMATOOLS_REF:-"minvaluegrids"}
GLMTOOLS_REF=${GLMTOOLS_REF:-"master"}
GLMTOOLS_REPOS=${GLMTOOLS_REPOS:-"https://github.com/deeplycloudy/glmtools.git"}
GGLM_REPOS=${GGLM_REPOS:-"https://gitlab.ssec.wisc.edu/cspp_geo/cspp-geo-gridded-glm.git"}
GGLM_REF=${GGLM_REF:-"master"}
MINIFY_TARBALL=${MINIFY_TARBALL:-1}
......@@ -53,20 +50,11 @@ which python
conda info -a
conda list --export
echo "Version specified: ${version}"
echo "lmatools reference: ${LMATOOLS_REF}"
echo "glmtools reference: ${GLMTOOLS_REF}"
# Turn on command printing here because we don't need all of the `conda`
# internal bash commands to pollute our output
set -x
# Install glmtools and related packages
# All dependencies should have been built with the buildbucket
# Careful: Could result in missing dependencies but we want reproducibility
pip install --no-deps git+https://github.com/deeplycloudy/lmatools.git@${LMATOOLS_REF}
pip install --no-deps git+https://github.com/deeplycloudy/stormdrain.git
pip install --no-deps git+${GLMTOOLS_REPOS}@${GLMTOOLS_REF}
# get the current packages files
# if the caller mounted the repository already then use that
GGLM_DIR="/work/cspp-geo-gridded-glm/gridded_glm"
......@@ -76,19 +64,20 @@ fi
# Build a tarball version of the current conda environment
# TODO: Add conda cleanup commands similar to what Polar2Grid uses to save space
conda_tb=conda_lmatools-${LMATOOLS_REF}_glmtools-${GLMTOOLS_REF}.tar.gz
conda_tb=conda_gglm.tar.gz
conda clean -ay # remove unnecessary things from conda environment
conda pack --n-threads $(nproc) -n build -o ${conda_tb}
# Build up our package directory
mkdir -p ${pkg_name}
cd ${pkg_name}
mkdir -p bin opt/conda
mkdir -p bin libexec/python_runtime
# Copy package scripts/data to package directory
cp $GGLM_DIR/PACKAGE_README.md ./README.md
cp $GGLM_DIR/bin/* ./bin/
cp $GGLM_DIR/libexec/* ./libexec/
# Untar the tarball so we can put things where we want
tar -xz -C ./opt/conda -f ../${conda_tb}
tar -xz -C ./libexec/python_runtime -f ../${conda_tb}
# Go back to original work directory
cd ..
......
# CSPP Geo Gridded GLM
TODO
\ No newline at end of file
USAGE
The following scripts are located in the `bin/` directory.
cspp-geo-gglm-trio-picker.sh
A simple wrapper around cspp-geo-gglm-minute-gridder.sh that takes a single input file and:
1) determines if it is the last file in a minute
2) if so, finds all files from that minute and passes them to cspp-geo-gglm-minute-gridder.sh
3) creates both grids and tiles for that minute
cspp-geo-gglm-minute-gridder.sh
This script creates one minute grids from any number of input files.
cspp-geo-gglm-make-grids.sh
A wrapper for the `make_GLM_grids.py` "kitchen sink" example script provided by glmtools.
\ No newline at end of file
......@@ -26,7 +26,7 @@ if [ -z "$CSPP_GEO_GGLM_HOME" ]; then
fi
# Setup necessary environments
source $CSPP_GEO_GGLM_HOME/bin/env.sh
source $CSPP_GEO_GGLM_HOME/libexec/env.sh
# Call the python module to do the processing, passing all arguments
python3 $CSPP_GEO_GGLM_HOME/bin/_make_glm_grids.py "$@"
python3 $CSPP_GEO_GGLM_HOME/libexec/_make_glm_grids.py "$@"
#!/usr/bin/env bash
# encoding: utf-8
# Copyright (C) 2019 Space Science and Engineering Center (SSEC),
# University of Wisconsin-Madison.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# This file is part of the CSPP Geo Gridded GLM software package. CSPP Geo
# Gridded GLM takes GOES GLM Level 2 LCFA files and grids them to the ABI
# fixed grid. It does this using the open source glmtools python package by
# Eric Bruning.
if [ -z "$CSPP_GEO_GGLM_HOME" ]; then
export CSPP_GEO_GGLM_HOME="$( cd -P "$( dirname "${BASH_SOURCE[0]}" )" && cd .. && pwd )"
fi
# Setup necessary environments
source $CSPP_GEO_GGLM_HOME/libexec/env.sh
# Call the python module to do the processing, passing all arguments
python3 $CSPP_GEO_GGLM_HOME/libexec/_minute_gridder.py "$@"
#!/usr/bin/env bash
# encoding: utf-8
if [ -z "$CSPP_GEO_GGLM_HOME" ]; then
export CSPP_GEO_GGLM_HOME="$( cd -P "$( dirname "${BASH_SOURCE[0]}" )" && cd .. && pwd )"
fi
# Setup necessary environments
source $CSPP_GEO_GGLM_HOME/libexec/env.sh
# Call the python module to do the processing, passing all arguments
python3 $CSPP_GEO_GGLM_HOME/libexec/_glm_trio_picker.py "$@"
#!/usr/bin/env python3
import sys
import os
from glob import glob
from datetime import datetime
import subprocess
from glmtools.io.glm import parse_glm_filename
if __name__ == '__main__':
# FIXME: handle args better if this goes beyond my testing
_, glmfile = sys.argv
# bad (start of period)
# glmfile = '/scratch/nickb/cspp-geo-grb-1.0/output/product/OR_GLM-L2-LCFA_G16_s20210131854000_e20210131854204_c20210131854225.nc'
# good (end of period)
# glmfile = '/scratch/nickb/cspp-geo-grb-1.0/output/product/OR_GLM-L2-LCFA_G16_s20210131854400_e20210131855004_c20210131855027.nc'
# glmfile = '/data/users/nickb/cspp-geo-gridded-glm/testing/2020-11-05/CLASS/OR_GLM-L2-LCFA_G16_s20203101529400_e20203101530004_c20203101530021.nc'
# check that glmfile is actually a GLM file
glminfo = parse_glm_filename(os.path.basename(glmfile))
filename_starts = glminfo[3]
filename_ends = glminfo[4]
# if this isn't the last file of the minute, exit
if filename_starts.second != 40:
print("This is not the last GLM file from this minute. Exiting.")
exit(0)
# if this is the last file of the minute, grab all files of the minute (three)
# example trio:
# OR_GLM-L2-LCFA_G16_s20203101529000_e20203101529205_c20203101529215.nc
# OR_GLM-L2-LCFA_G16_s20203101529200_e20203101529405_c20203101529430.nc
# OR_GLM-L2-LCFA_G16_s20203101529400_e20203101530004_c20203101530021.nc
globstring = "{}_{}_{}_s{}*".format(glminfo[0], glminfo[1], glminfo[2], glminfo[3].strftime("%Y%j%H%M"))
fileglob = glob(os.path.join(os.path.dirname(glmfile), globstring))
print("Gridding {} files".format(len(fileglob)))
# and run gridded glm a'la:
# minute_gridder.sh \
# --goes-position auto --goes-sector full \
# --create-tiles \
# ./2020-11-05/CLASS/OR_GLM-L2-LCFA_G16_s20203101529000_e20203101529205_c20203101529215.nc \
# ./2020-11-05/CLASS/OR_GLM-L2-LCFA_G16_s20203101529200_e20203101529405_c20203101529430.nc \
# ./2020-11-05/CLASS/OR_GLM-L2-LCFA_G16_s20203101529400_e20203101530004_c20203101530021.nc
subprocess.run(["cspp-geo-gglm-minute-gridder.sh",
"--goes-position", "auto",
"--goes-sector", "full",
"--create-tiles",
fileglob[0], fileglob[1], fileglob[2]]
)
#!/usr/bin/env python3
# Based on https://github.com/deeplycloudy/glmtools/blob/master/examples/grid/make_GLM_grids.py
parse_desc = """Grid the past X minutes of GLM flash data, given a single input file.
"""
import numpy as np
from datetime import datetime, timedelta
import os
import sys
import tempfile
import shutil
import atexit
from glob import glob
#from multiprocessing import freeze_support # https://docs.python.org/2/library/multiprocessing.html#multiprocessing.freeze_support
from functools import partial
from lmatools.grid.make_grids import write_cf_netcdf_latlon, write_cf_netcdf_noproj, write_cf_netcdf_fixedgrid
from lmatools.grid.make_grids import dlonlat_at_grid_center, grid_h5flashfiles
from glmtools.grid.make_grids import grid_GLM_flashes
from glmtools.io.glm import parse_glm_filename
from lmatools.grid.fixed import get_GOESR_grid, get_GOESR_coordsys
import logging
log = logging.getLogger(__name__)
def create_parser():
import argparse
parser = argparse.ArgumentParser(description=parse_desc)
parser.add_argument('-v', '--verbose', dest='verbosity', action="count", default=0,
help='each occurrence increases verbosity 1 level through ERROR-WARNING-INFO-DEBUG (default INFO)')
parser.add_argument('-l', '--log', dest="log_fn", default=None,
help="specify the log filename")
# from Requirements: "Output is Gridded GLM in the native glmtools NetCDF4 format, with a user option to produce AWIPS-compatible NetCDF tiles as described below"
parser.add_argument('-o', '--output-dir', metavar='output directory',
default=os.getcwd())
parser.add_argument('--goes-sector', default="full",
help="One of [full|conus|meso]. "
"Requires goes_position. If sector is "
"meso, ctr_lon and ctr_lat are interpreted as "
"the ctr_x and ctr_y of the fixed grid")
parser.add_argument('--goes-position', default="auto",
help="One of [east|west|test|auto]. "
"Requires '--goes-sector'.")
parser.add_argument("-t", "--create-tiles", default=False, action='store_true',
help="create AWIPS-compatible tiles") # FIXME: improve this help text
parser.add_argument('--ctr-lat', metavar='latitude',
type=float, help='center latitude (required for meso)')
parser.add_argument('--ctr-lon', metavar='longitude',
type=float, help='center longitude (required for meso)')
# from Requirements: "Input is one or more GLM LCFA (L2+) files in mission standard format (nominally three 20-second input files)"
parser.add_argument(dest='filenames', metavar='filename', nargs='+')
return parser
"""
old arguments for reference
FIXME: remove this whole comment once everything is working
parser.add_argument('--dx', metavar='km',
default=10.0, type=float,
help='approximate east-west grid spacing')
parser.add_argument('--dy', metavar='km',
default=10.0, type=float,
help='approximate north-south grid spacing')
parser.add_argument('--dt', metavar='seconds',
default=60.0, type=float,
help='frame duration')
parser.add_argument('--width', metavar='distance in km',
default=400.0,
type=float, help='total width of the grid')
parser.add_argument('--height', metavar='distance in km',
default=400.0,
type=float, help='total height of the grid')
parser.add_argument('--nevents', metavar='minimum events per flash',
type=int, dest='min_events', default=1,
help='minimum number of events per flash')
parser.add_argument('--ngroups', metavar='minimum groups per flash',
type=int, dest='min_groups', default=1,
help='minimum number of groups per flash')
parser.add_argument('--subdivide-grid', metavar='sqrt(number of subgrids)',
type=int, default=1,
help="subdivide the grid this many times along "
"each dimension")
"""
def get_resolution(args):
closest_resln = 2.0 # hardcoding resolution to 2.0 for now. see nearest_resolution in make_glm_grids for how we could expose this if we change our minds.
resln = '{0:4.1f}km'.format(closest_resln).replace(' ', '')
return resln
# if provided "auto" position, we determine the sensor from the filename
def get_goes_position(filenames):
if all("_G16_" in f for f in filenames):
return "east"
if all("_G17_" in f for f in filenames):
return "west"
# we require that all files are from the same sensor and raise an exception if not
raise ValueError("position 'auto' but could not determine position - did you provide a mix of satellites?")
def get_start_end(filenames, start_time=None, end_time=None):
"""Compute start and end time of data based on filenames."""
base_filenames = [os.path.basename(p) for p in filenames]
filename_infos = [parse_glm_filename(f) for f in base_filenames]
# opsenv, algorithm, platform, start, end, created = parse_glm_filename(f)
filename_starts = [info[3] for info in filename_infos]
filename_ends = [info[4] for info in filename_infos]
start_time = min(filename_starts)
# Used to use max(filename_ends), but on 27 Oct 2020, the filename
# ends started to report the time of the last event in the file,
# causing a slight leakage (usually less than a second) into the
# next minute. This caused two minutes of grids to be produced for every
# three twenty second files passed to this script.
# Instead, we now assume every LCFA file is 20 s long, beginning with
# the start time. No doubt in the future we will see filenames that no
# longer start on an even minute boundary.
end_time = max(filename_starts) + timedelta(0, 20)
if start_time is None or end_time is None:
raise ValueError("Could not determine start/end time")
return start_time, end_time
def grid_setup(args, work_dir=os.getcwd()):
# When passed None for the minimum event or group counts, the gridder will skip
# the check, saving a bit of time.
min_events = None
min_groups = None
try:
start_time, end_time = get_start_end(args.filenames)
except ValueError:
log.error("Non-standard filenames provided, use --start and --end to specify data times.")
raise
base_date = datetime(start_time.year, start_time.month, start_time.day)
proj_name = 'geos'
outputpath = os.path.join(work_dir, "{dataset_name}") # GLMTools expects a template in addition to the path
if args.goes_position == "auto":
goes_position = get_goes_position(args.filenames)
else:
goes_position = args.goes_position
resln = get_resolution(args)
view = get_GOESR_grid(position=goes_position,
view=args.goes_sector,
resolution=resln)
nadir_lon = view['nadir_lon']
dx = dy = view['resolution']
nx, ny = view['pixelsEW'], view['pixelsNS']
geofixcs, grs80lla = get_GOESR_coordsys(sat_lon_nadir=nadir_lon)
if 'centerEW' in view:
x_ctr, y_ctr = view['centerEW'], view['centerNS']
elif args.goes_sector == 'meso':
# use ctr_lon, ctr_lat to get the center of the mesoscale FOV
x_ctr, y_ctr, z_ctr = geofixcs.fromECEF(
*grs80lla.toECEF(args.ctr_lon, args.ctr_lat, 0.0))
else:
# FIXME: is it possible to get here? if so, what should happen?
raise RuntimeError
# Need to use +1 here to convert to xedge, yedge expected by gridder
# instead of the pixel centroids that will result in the final image
nx += 1
ny += 1
x_bnd = (np.arange(nx, dtype='float') - (nx) / 2.0) * dx + x_ctr + 0.5 * dx
y_bnd = (np.arange(ny, dtype='float') - (ny) / 2.0) * dy + y_ctr + 0.5 * dy
log.debug(("initial x,y_ctr", x_ctr, y_ctr))
log.debug(("initial x,y_bnd", x_bnd.shape, y_bnd.shape))
x_bnd = np.asarray([x_bnd.min(), x_bnd.max()])
y_bnd = np.asarray([y_bnd.min(), y_bnd.max()])
geofixcs, grs80lla = get_GOESR_coordsys(sat_lon_nadir=nadir_lon)
ctr_lon, ctr_lat, ctr_alt = grs80lla.fromECEF(
*geofixcs.toECEF(x_ctr, y_ctr, 0.0))
fixed_grid = geofixcs
log.debug((x_bnd, y_bnd, dx, dy, nx, ny))
output_writer = partial(write_cf_netcdf_fixedgrid, nadir_lon=nadir_lon)
gridder = grid_GLM_flashes
output_filename_prefix = 'GLM'
grid_kwargs = dict(proj_name=proj_name,
base_date=base_date, do_3d=False,
dx=dx, dy=dy, frame_interval=60.0,
x_bnd=x_bnd, y_bnd=y_bnd,
ctr_lat=ctr_lat, ctr_lon=ctr_lon, outpath=outputpath,
min_points_per_flash=min_events,
output_writer=output_writer, subdivide=1, # subdivide the grid this many times along each dimension
output_filename_prefix=output_filename_prefix,
output_kwargs={'scale_and_offset': False},
spatial_scale_factor=1.0)
#if args.fixed_grid:
# grid_kwargs['fixed_grid'] = True
# grid_kwargs['nadir_lon'] = nadir_lon
# if args.split_events:
grid_kwargs['clip_events'] = True
if min_groups is not None:
grid_kwargs['min_groups_per_flash'] = min_groups
grid_kwargs['energy_grids'] = ('total_energy',)
if (proj_name == 'pixel_grid') or (proj_name == 'geos'):
grid_kwargs['pixel_coords'] = fixed_grid
grid_kwargs['ellipse_rev'] = -1 # -1 (default) = infer from date in each GLM file
return gridder, args.filenames, start_time, end_time, grid_kwargs
if __name__ == '__main__':
# freeze_support() # nb. I don't think this is needed as we're not making windows execs at this time
parser = create_parser()
args = parser.parse_args()
# Configure logging
levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
logging.basicConfig(level=levels[min(3, args.verbosity)], filename=args.log_fn)
if levels[min(3, args.verbosity)] > logging.DEBUG:
import warnings
warnings.filterwarnings("ignore")
log.info("Starting GLM Gridding")
log.debug("Starting script with: %s", sys.argv)
# set up output dir
os.makedirs(args.output_dir, exist_ok=True)
# set up temporary dir
tempdir_path = tempfile.mkdtemp(suffix=None, prefix="tmp-glm-grids-", dir=os.getcwd())
log.info("working in: {}".format(tempdir_path))
# clean our temporary dir on exit
atexit.register(shutil.rmtree, tempdir_path)
# do the gridding
gridder, glm_filenames, start_time, end_time, grid_kwargs = grid_setup(args, work_dir=tempdir_path)
gridder(glm_filenames, start_time, end_time, **grid_kwargs)
# pick up gridded files from the tempdir
# output looks like: OR_GLM-L2-GLMC-M3_G17_s20202691559400_e20202691600400_c20210120141010.nc
log.debug("gridded files in {}".format(tempdir_path))
gridded_path = os.path.join(tempdir_path, 'OR_GLM-L2-GLM?-M?_G??_s*_e*_c*.nc')
log.debug(gridded_path)
gridded_files = glob(gridded_path)
log.debug(gridded_files)
# (optionally) do tiling
if args.create_tiles:
from satpy import Scene
for gridded_file in gridded_files:
log.info("TILING: {}".format(gridded_files))
scn = Scene(reader='glm_l2', filenames=[gridded_file]) # n.b. satpy requires a list of filenames
scn.load([
'DQF',
'flash_extent_density',
'minimum_flash_area',
'total_energy',
])
scn.save_datasets(writer='awips_tiled',
template='glm_l2_radf',
sector_id="GOES_EAST", # sector_id becomes an attribute in the output files and may be another legacy kind of thing. I'm not sure how much is is actually used here.
source_name="", # You could probably make source_name an empty string. I think it is required by the writer for legacy reasons but isn't actually used for the glm output
base_dir=tempdir_path, # base_dir is the output directory. I think blank is the same as current directory.
tile_size=(506, 904), # tile_size is set to the size of the GLMF sample tiles we were given and should match the full disk ABI tiles which is what they wanted
check_categories=False) # check_categories is there because of that issue I mentioned where DQF is all valid all the time so there is no way to detect empty tiles unless we ignore the "category" products
# pick up output files from the tempdir
# output looks like: OR_GLM-L2-GLMC-M3_G17_T03_20200925160040.nc
log.debug("files in {}".format(tempdir_path))
log.debug(os.listdir(tempdir_path))
log.debug("moving output to {}".format(args.output_dir))
tiled_path = os.path.join(tempdir_path, 'OR_GLM-L2-GLM?-M?_G??_T??_*.nc')
tiled_files = glob(tiled_path)
for f in tiled_files:
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
for f in gridded_files:
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
# tempdir cleans itself up via atexit, above
\ No newline at end of file
......@@ -28,4 +28,4 @@ fi
unset PYTHONPATH
unset LD_LIBRARY_PATH
export PATH=$PATH:$CSPP_GEO_GGLM_HOME/bin:$CSPP_GEO_GGLM_HOME/opt/conda/bin
\ No newline at end of file
export PATH=$CSPP_GEO_GGLM_HOME/bin:$CSPP_GEO_GGLM_HOME/libexec/python_runtime/bin:$PATH
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment