From 78bfc1a610532920e5a0d9a785a0deeabbe0cd71 Mon Sep 17 00:00:00 2001
From: David Hoese <david.hoese@ssec.wisc.edu>
Date: Thu, 24 Oct 2019 14:05:29 -0500
Subject: [PATCH] Clean up command line arguments and minify package tarball

---
 buildbucket/README.md              |  2 +-
 buildbucket/package.sh             | 21 +++++++++--
 gridded_glm/README.md              |  4 +-
 gridded_glm/bin/_make_glm_grids.py | 60 +++++++++++++-----------------
 4 files changed, 47 insertions(+), 40 deletions(-)

diff --git a/buildbucket/README.md b/buildbucket/README.md
index a18f479..d93b959 100644
--- a/buildbucket/README.md
+++ b/buildbucket/README.md
@@ -26,7 +26,7 @@ docker push gitlab.ssec.wisc.edu:5555/cspp_geo/cspp-geo-gridded-glm/buildbucket:
 To create the Gridded GLM package with the version number 1.0.0 in your current directory:
 
 ```bash
-docker run --rm -v "${PWD}":/dock gitlab.ssec.wisc.edu:5555/cspp_geo/cspp-geo-gridded-glm/buildbucket:latest package.sh v1.0.0
+docker run --rm -v "${PWD}":/dock gitlab.ssec.wisc.edu:5555/cspp_geo/cspp-geo-gridded-glm/buildbucket:latest package.sh 1.0.0
 ```
 
 If some things need to be customized you can specify various environment
diff --git a/buildbucket/package.sh b/buildbucket/package.sh
index 9a8178b..59e8a0d 100755
--- a/buildbucket/package.sh
+++ b/buildbucket/package.sh
@@ -7,8 +7,10 @@
 #   GLMTOOLS_REPOS: git repository URL to use for glmtools (default: https://github.com/deeplycloudy/glmtools.git)
 #   GGLM_REF: git reference (branch name) to use for this package's bash scripts (default: master)
 #   GGLM_REPOS: git repository URL to use for this package's bash scripts
-#               If a "/work/cspp-geo-gridded-glm" directory is already mounted
-#               then that will be used and GGLM_REF will be ignored.
+#       If a "/work/cspp-geo-gridded-glm" directory is already mounted
+#       then that will be used and GGLM_REF will be ignored.
+#   MINIFY_TARBALL: perform extra "risky" operations to make the tarball as
+#       small as possible. 0 = False; True otherwise (default: True)
 # note: The shebang at the top of this file is needed as-is. The '-l' will
 #       load the .bashrc which allows us to use conda. The '-e' triggers exit
 #       on error behavior. We cannot use '/usr/bin/env bash -le' since most
@@ -28,6 +30,7 @@ GLMTOOLS_REF=${GLMTOOLS_REF:-"ugf-newgrid"}
 GLMTOOLS_REPOS=${GLMTOOLS_REPOS:-"https://github.com/deeplycloudy/glmtools.git"}
 GGLM_REPOS=${GGLM_REPOS:-"https://gitlab.ssec.wisc.edu/cspp_geo/cspp-geo-gridded-glm.git"}
 GGLM_REF=${GGLM_REF:-"master"}
+MINIFY_TARBALL=${MINIFY_TARBALL:-1}
 
 make_dockerfile() {
     cat >$DIST/Dockerfile <<EOF
@@ -43,7 +46,6 @@ EOF
 
 }
 
-
 conda activate build
 
 # Debug Info
@@ -75,6 +77,7 @@ fi
 # Build a tarball version of the current conda environment
 # TODO: Add conda cleanup commands similar to what Polar2Grid uses to save space
 conda_tb=conda_lmatools-${LMATOOLS_REF}_glmtools-${GLMTOOLS_REF}.tar.gz
+conda clean -ay  # remove unnecessary things from conda environment
 conda pack --n-threads $(nproc) -n build -o ${conda_tb}
 
 # Build up our package directory
@@ -89,6 +92,18 @@ tar -xz -C ./opt/conda -f ../${conda_tb}
 # Go back to original work directory
 cd ..
 
+# Perform extra "risky" operations to make the tarball as small as possible
+# Taken from https://jcrist.github.io/conda-docker-tips.html
+if [ $MINIFY_TARBALL -ne 0 ]; then
+    find ${pkg_name} -follow -type f -name '*.a' -delete
+    find ${pkg_name} -follow -type f -name '*.pyc' -delete
+    find ${pkg_name} -follow -type f -name '*.js.map' -delete
+    bokeh_dir="${pkg_name}/lib/python*/site-packages/bokeh/server/static"
+    if [ -d $bokeh_dir ]; then
+        find $bokeh_dir -follow -type f -name '*.js' ! -name '*.min.js' -delete
+    fi
+fi
+
 # Create tarball of package directory
 XZ_DEFAULTS="--threads=$(nproc)" tar -Jc --owner 0 --group 0 -f ${pkg_name}.tar.xz ${pkg_name}
 mv ${pkg_name}.tar.xz ${DIST}/
diff --git a/gridded_glm/README.md b/gridded_glm/README.md
index 84e1dfb..cad0b10 100644
--- a/gridded_glm/README.md
+++ b/gridded_glm/README.md
@@ -7,8 +7,8 @@ Gridded GLM package.
 
 ```bash
 docker run -it -v $PWD:/work --rm cspp_geo_gridded_glm/run_package:r20191024 \
-    make_glm_grids.sh -o /work --fixed_grid --split_events \
-    --goes_position east --goes_sector conus --dx=2.0 --dy=2.0 OR*.nc
+    make_glm_grids.sh -o /work --fixed-grid --split-events \
+    --goes-position east --goes-sector conus --dx=2.0 --dy=2.0 OR*.nc
 ```
 
 Note that "OR*.nc" must exist in the current directory to make bash expand
diff --git a/gridded_glm/bin/_make_glm_grids.py b/gridded_glm/bin/_make_glm_grids.py
index 3987597..2078f33 100644
--- a/gridded_glm/bin/_make_glm_grids.py
+++ b/gridded_glm/bin/_make_glm_grids.py
@@ -16,35 +16,33 @@ be written to a standardized directory structure.
 
 def create_parser():
     parser = argparse.ArgumentParser(description=parse_desc)
-    parser.add_argument(dest='filenames', metavar='filename', nargs='*')
-    parser.add_argument('-o', '--output_dir', metavar='directory',
-                        required=True, dest='outdir', action='store', )
-    parser.add_argument('--ctr_lat', metavar='latitude', required=False,
-                        dest='ctr_lat', action='store', type=float,
-                        help='center latitude')
-    parser.add_argument('--ctr_lon', metavar='longitude', required=False,
-                        dest='ctr_lon', action='store', type=float,
-                        help='center longitude')
+    parser.add_argument(dest='filenames', metavar='filename', nargs='+')
+    parser.add_argument('-o', '--output-dir', metavar='directory',
+                        required=True, action='store')
+    parser.add_argument('--ctr-lat', metavar='latitude', required=False,
+                        action='store', type=float, help='center latitude')
+    parser.add_argument('--ctr-lon', metavar='longitude', required=False,
+                        action='store', type=float, help='center longitude')
     parser.add_argument('--start', metavar='yyyy-mm-ddThh:mm:ss',
-                        dest='start', action='store',
+                        action='store',
                         help='UTC start time, e.g., 2017-07-04T08:00:00')
     parser.add_argument('--end', metavar='yyyy-mm-ddThh:mm:ss',
-                        dest='end', action='store',
+                        action='store',
                         help='UTC end time, e.g., 2017-07-04T09:00:00')
     parser.add_argument('--dx', metavar='km',
-                        dest='dx', action='store', default=10.0, type=float,
+                        action='store', default=10.0, type=float,
                         help='approximate east-west grid spacing')
     parser.add_argument('--dy', metavar='km',
-                        dest='dy', action='store', default=10.0, type=float,
+                        action='store', default=10.0, type=float,
                         help='approximate north-south grid spacing')
     parser.add_argument('--dt', metavar='seconds',
-                        dest='dt', action='store', default=60.0, type=float,
+                        action='store', default=60.0, type=float,
                         help='frame duration')
     parser.add_argument('--width', metavar='distance in km',
-                        dest='width', action='store', default=400.0,
+                        action='store', default=400.0,
                         type=float, help='total width of the grid')
     parser.add_argument('--height', metavar='distance in km',
-                        dest='height', action='store', default=400.0,
+                        action='store', default=400.0,
                         type=float, help='total height of the grid')
     parser.add_argument('--nevents', metavar='minimum events per flash',
                         type=int, dest='min_events', action='store', default=1,
@@ -52,29 +50,26 @@ def create_parser():
     parser.add_argument('--ngroups', metavar='minimum groups per flash',
                         type=int, dest='min_groups', action='store', default=1,
                         help='minimum number of groups per flash')
-    parser.add_argument('--fixed_grid',
-                        action='store_true', dest='fixed_grid',
+    parser.add_argument('--fixed-grid', action='store_true',
                         help='grid to the geostationary fixed grid')
-    parser.add_argument('--subdivide_grid', metavar='sqrt(number of subgrids)',
-                        action='store', dest='subdivide_grid',
-                        type=int, default=1,
+    parser.add_argument('--subdivide-grid', metavar='sqrt(number of subgrids)',
+                        action='store', type=int, default=1,
                         help=("subdivide the grid this many times along "
                               "each dimension"))
-    parser.add_argument('--goes_position', default='none',
-                        action='store', dest='goes_position',
+    parser.add_argument('--goes-position', default='none', action='store',
                         help=("One of [east|west|test]. "
                               "Also requires goes_sector."))
-    parser.add_argument('--goes_sector', default='none',
-                        action='store', dest='goes_sector',
+    parser.add_argument('--goes-sector', default='none',
+                        action='store',
                         help=("One of [full|conus|meso]. "
                               "Also requires goes_position. If sector is "
                               "meso, ctr_lon and ctr_lat are interpreted as "
                               "the ctr_x and ctr_y of the fixed grid"))
-    parser.add_argument('--corner_points', metavar='filename.pickle',
-                        action='store', dest='corner_points',
+    parser.add_argument('--corner-points', metavar='filename.pickle',
+                        action='store',
                         help=("name of file containing a pickled "
                               "corner point lookup table"))
-    parser.add_argument('--split_events', dest='split_events',
+    parser.add_argument('--split-events',
                         action='store_true',
                         help='Split GLM event polygons when gridding')
     parser.add_argument('--ellipse', dest='ellipse_rev', default=-1,
@@ -82,7 +77,7 @@ def create_parser():
                         help='Lightning ellipse revision. -1 (default)=infer'
                              ' from date in each GLM file, 0=value at launch,'
                              ' 1=late 2018 revision')
-    parser.add_argument('--float_output', dest='output_scale_and_offset',
+    parser.add_argument('--float-output', dest='output_scale_and_offset',
                         default=True,
                         action='store_false',
                         help='write all output variables as floating point')
@@ -193,10 +188,7 @@ def grid_setup(args):
     date = datetime(start_time.year, start_time.month, start_time.day)
     # grid_dir = os.path.join('/data/LCFA-production/', 'grid_test')
     # outpath = grid_dir+'/20%s' %(date.strftime('%y/%b/%d'))
-    outpath = os.path.join(args.outdir, '20%s' % (date.strftime('%y/%b/%d')))
-    if os.path.exists(outpath) == False:
-        os.makedirs(outpath)
-        # subprocess.call(['chmod', 'a+w', outpath, grid_dir+'/20%s' %(date.strftime('%y/%b')), grid_dir+'/20%s' %(date.strftime('%y'))])
+    os.makedirs(args.output_dir, exist_ok=True)
 
     if args.fixed_grid:
         proj_name = 'geos'
@@ -288,7 +280,7 @@ def grid_setup(args):
                        base_date=date, do_3d=False,
                        dx=dx, dy=dy, frame_interval=float(args.dt),
                        x_bnd=x_bnd, y_bnd=y_bnd,
-                       ctr_lat=ctr_lat, ctr_lon=ctr_lon, outpath=outpath,
+                       ctr_lat=ctr_lat, ctr_lon=ctr_lon, outpath=args.output_dir,
                        min_points_per_flash=min_events,
                        output_writer=output_writer, subdivide=args.subdivide_grid,
                        output_filename_prefix=output_filename_prefix,
-- 
GitLab