From 78bfc1a610532920e5a0d9a785a0deeabbe0cd71 Mon Sep 17 00:00:00 2001 From: David Hoese <david.hoese@ssec.wisc.edu> Date: Thu, 24 Oct 2019 14:05:29 -0500 Subject: [PATCH] Clean up command line arguments and minify package tarball --- buildbucket/README.md | 2 +- buildbucket/package.sh | 21 +++++++++-- gridded_glm/README.md | 4 +- gridded_glm/bin/_make_glm_grids.py | 60 +++++++++++++----------------- 4 files changed, 47 insertions(+), 40 deletions(-) diff --git a/buildbucket/README.md b/buildbucket/README.md index a18f479..d93b959 100644 --- a/buildbucket/README.md +++ b/buildbucket/README.md @@ -26,7 +26,7 @@ docker push gitlab.ssec.wisc.edu:5555/cspp_geo/cspp-geo-gridded-glm/buildbucket: To create the Gridded GLM package with the version number 1.0.0 in your current directory: ```bash -docker run --rm -v "${PWD}":/dock gitlab.ssec.wisc.edu:5555/cspp_geo/cspp-geo-gridded-glm/buildbucket:latest package.sh v1.0.0 +docker run --rm -v "${PWD}":/dock gitlab.ssec.wisc.edu:5555/cspp_geo/cspp-geo-gridded-glm/buildbucket:latest package.sh 1.0.0 ``` If some things need to be customized you can specify various environment diff --git a/buildbucket/package.sh b/buildbucket/package.sh index 9a8178b..59e8a0d 100755 --- a/buildbucket/package.sh +++ b/buildbucket/package.sh @@ -7,8 +7,10 @@ # GLMTOOLS_REPOS: git repository URL to use for glmtools (default: https://github.com/deeplycloudy/glmtools.git) # GGLM_REF: git reference (branch name) to use for this package's bash scripts (default: master) # GGLM_REPOS: git repository URL to use for this package's bash scripts -# If a "/work/cspp-geo-gridded-glm" directory is already mounted -# then that will be used and GGLM_REF will be ignored. +# If a "/work/cspp-geo-gridded-glm" directory is already mounted +# then that will be used and GGLM_REF will be ignored. +# MINIFY_TARBALL: perform extra "risky" operations to make the tarball as +# small as possible. 0 = False; True otherwise (default: True) # note: The shebang at the top of this file is needed as-is. The '-l' will # load the .bashrc which allows us to use conda. The '-e' triggers exit # on error behavior. We cannot use '/usr/bin/env bash -le' since most @@ -28,6 +30,7 @@ GLMTOOLS_REF=${GLMTOOLS_REF:-"ugf-newgrid"} GLMTOOLS_REPOS=${GLMTOOLS_REPOS:-"https://github.com/deeplycloudy/glmtools.git"} GGLM_REPOS=${GGLM_REPOS:-"https://gitlab.ssec.wisc.edu/cspp_geo/cspp-geo-gridded-glm.git"} GGLM_REF=${GGLM_REF:-"master"} +MINIFY_TARBALL=${MINIFY_TARBALL:-1} make_dockerfile() { cat >$DIST/Dockerfile <<EOF @@ -43,7 +46,6 @@ EOF } - conda activate build # Debug Info @@ -75,6 +77,7 @@ fi # Build a tarball version of the current conda environment # TODO: Add conda cleanup commands similar to what Polar2Grid uses to save space conda_tb=conda_lmatools-${LMATOOLS_REF}_glmtools-${GLMTOOLS_REF}.tar.gz +conda clean -ay # remove unnecessary things from conda environment conda pack --n-threads $(nproc) -n build -o ${conda_tb} # Build up our package directory @@ -89,6 +92,18 @@ tar -xz -C ./opt/conda -f ../${conda_tb} # Go back to original work directory cd .. +# Perform extra "risky" operations to make the tarball as small as possible +# Taken from https://jcrist.github.io/conda-docker-tips.html +if [ $MINIFY_TARBALL -ne 0 ]; then + find ${pkg_name} -follow -type f -name '*.a' -delete + find ${pkg_name} -follow -type f -name '*.pyc' -delete + find ${pkg_name} -follow -type f -name '*.js.map' -delete + bokeh_dir="${pkg_name}/lib/python*/site-packages/bokeh/server/static" + if [ -d $bokeh_dir ]; then + find $bokeh_dir -follow -type f -name '*.js' ! -name '*.min.js' -delete + fi +fi + # Create tarball of package directory XZ_DEFAULTS="--threads=$(nproc)" tar -Jc --owner 0 --group 0 -f ${pkg_name}.tar.xz ${pkg_name} mv ${pkg_name}.tar.xz ${DIST}/ diff --git a/gridded_glm/README.md b/gridded_glm/README.md index 84e1dfb..cad0b10 100644 --- a/gridded_glm/README.md +++ b/gridded_glm/README.md @@ -7,8 +7,8 @@ Gridded GLM package. ```bash docker run -it -v $PWD:/work --rm cspp_geo_gridded_glm/run_package:r20191024 \ - make_glm_grids.sh -o /work --fixed_grid --split_events \ - --goes_position east --goes_sector conus --dx=2.0 --dy=2.0 OR*.nc + make_glm_grids.sh -o /work --fixed-grid --split-events \ + --goes-position east --goes-sector conus --dx=2.0 --dy=2.0 OR*.nc ``` Note that "OR*.nc" must exist in the current directory to make bash expand diff --git a/gridded_glm/bin/_make_glm_grids.py b/gridded_glm/bin/_make_glm_grids.py index 3987597..2078f33 100644 --- a/gridded_glm/bin/_make_glm_grids.py +++ b/gridded_glm/bin/_make_glm_grids.py @@ -16,35 +16,33 @@ be written to a standardized directory structure. def create_parser(): parser = argparse.ArgumentParser(description=parse_desc) - parser.add_argument(dest='filenames', metavar='filename', nargs='*') - parser.add_argument('-o', '--output_dir', metavar='directory', - required=True, dest='outdir', action='store', ) - parser.add_argument('--ctr_lat', metavar='latitude', required=False, - dest='ctr_lat', action='store', type=float, - help='center latitude') - parser.add_argument('--ctr_lon', metavar='longitude', required=False, - dest='ctr_lon', action='store', type=float, - help='center longitude') + parser.add_argument(dest='filenames', metavar='filename', nargs='+') + parser.add_argument('-o', '--output-dir', metavar='directory', + required=True, action='store') + parser.add_argument('--ctr-lat', metavar='latitude', required=False, + action='store', type=float, help='center latitude') + parser.add_argument('--ctr-lon', metavar='longitude', required=False, + action='store', type=float, help='center longitude') parser.add_argument('--start', metavar='yyyy-mm-ddThh:mm:ss', - dest='start', action='store', + action='store', help='UTC start time, e.g., 2017-07-04T08:00:00') parser.add_argument('--end', metavar='yyyy-mm-ddThh:mm:ss', - dest='end', action='store', + action='store', help='UTC end time, e.g., 2017-07-04T09:00:00') parser.add_argument('--dx', metavar='km', - dest='dx', action='store', default=10.0, type=float, + action='store', default=10.0, type=float, help='approximate east-west grid spacing') parser.add_argument('--dy', metavar='km', - dest='dy', action='store', default=10.0, type=float, + action='store', default=10.0, type=float, help='approximate north-south grid spacing') parser.add_argument('--dt', metavar='seconds', - dest='dt', action='store', default=60.0, type=float, + action='store', default=60.0, type=float, help='frame duration') parser.add_argument('--width', metavar='distance in km', - dest='width', action='store', default=400.0, + action='store', default=400.0, type=float, help='total width of the grid') parser.add_argument('--height', metavar='distance in km', - dest='height', action='store', default=400.0, + action='store', default=400.0, type=float, help='total height of the grid') parser.add_argument('--nevents', metavar='minimum events per flash', type=int, dest='min_events', action='store', default=1, @@ -52,29 +50,26 @@ def create_parser(): parser.add_argument('--ngroups', metavar='minimum groups per flash', type=int, dest='min_groups', action='store', default=1, help='minimum number of groups per flash') - parser.add_argument('--fixed_grid', - action='store_true', dest='fixed_grid', + parser.add_argument('--fixed-grid', action='store_true', help='grid to the geostationary fixed grid') - parser.add_argument('--subdivide_grid', metavar='sqrt(number of subgrids)', - action='store', dest='subdivide_grid', - type=int, default=1, + parser.add_argument('--subdivide-grid', metavar='sqrt(number of subgrids)', + action='store', type=int, default=1, help=("subdivide the grid this many times along " "each dimension")) - parser.add_argument('--goes_position', default='none', - action='store', dest='goes_position', + parser.add_argument('--goes-position', default='none', action='store', help=("One of [east|west|test]. " "Also requires goes_sector.")) - parser.add_argument('--goes_sector', default='none', - action='store', dest='goes_sector', + parser.add_argument('--goes-sector', default='none', + action='store', help=("One of [full|conus|meso]. " "Also requires goes_position. If sector is " "meso, ctr_lon and ctr_lat are interpreted as " "the ctr_x and ctr_y of the fixed grid")) - parser.add_argument('--corner_points', metavar='filename.pickle', - action='store', dest='corner_points', + parser.add_argument('--corner-points', metavar='filename.pickle', + action='store', help=("name of file containing a pickled " "corner point lookup table")) - parser.add_argument('--split_events', dest='split_events', + parser.add_argument('--split-events', action='store_true', help='Split GLM event polygons when gridding') parser.add_argument('--ellipse', dest='ellipse_rev', default=-1, @@ -82,7 +77,7 @@ def create_parser(): help='Lightning ellipse revision. -1 (default)=infer' ' from date in each GLM file, 0=value at launch,' ' 1=late 2018 revision') - parser.add_argument('--float_output', dest='output_scale_and_offset', + parser.add_argument('--float-output', dest='output_scale_and_offset', default=True, action='store_false', help='write all output variables as floating point') @@ -193,10 +188,7 @@ def grid_setup(args): date = datetime(start_time.year, start_time.month, start_time.day) # grid_dir = os.path.join('/data/LCFA-production/', 'grid_test') # outpath = grid_dir+'/20%s' %(date.strftime('%y/%b/%d')) - outpath = os.path.join(args.outdir, '20%s' % (date.strftime('%y/%b/%d'))) - if os.path.exists(outpath) == False: - os.makedirs(outpath) - # subprocess.call(['chmod', 'a+w', outpath, grid_dir+'/20%s' %(date.strftime('%y/%b')), grid_dir+'/20%s' %(date.strftime('%y'))]) + os.makedirs(args.output_dir, exist_ok=True) if args.fixed_grid: proj_name = 'geos' @@ -288,7 +280,7 @@ def grid_setup(args): base_date=date, do_3d=False, dx=dx, dy=dy, frame_interval=float(args.dt), x_bnd=x_bnd, y_bnd=y_bnd, - ctr_lat=ctr_lat, ctr_lon=ctr_lon, outpath=outpath, + ctr_lat=ctr_lat, ctr_lon=ctr_lon, outpath=args.output_dir, min_points_per_flash=min_events, output_writer=output_writer, subdivide=args.subdivide_grid, output_filename_prefix=output_filename_prefix, -- GitLab