From 9592a290d4cf3a78a01a86f610cfe224083365cf Mon Sep 17 00:00:00 2001
From: David Hoese <david.hoese@ssec.wisc.edu>
Date: Wed, 15 Jan 2020 15:19:32 -0600
Subject: [PATCH] Add untested initial attempt at a tile generation script

---
 mapserver/Dockerfile                   |  26 +++-
 mapserver/tiledb_geospatial_Dockerfile | 159 ++++++++++++++++++++++++
 tile_gen/Dockerfile                    |  10 ++
 tile_gen/generate_tiles.py             |  81 +++++++++++++
 tile_gen/run.sh                        |  61 ++++++++++
 tile_gen/tile_index.py                 | 160 +++++++++++++++++++++++++
 6 files changed, 492 insertions(+), 5 deletions(-)
 create mode 100644 mapserver/tiledb_geospatial_Dockerfile
 create mode 100644 tile_gen/Dockerfile
 create mode 100644 tile_gen/generate_tiles.py
 create mode 100644 tile_gen/run.sh
 create mode 100644 tile_gen/tile_index.py

diff --git a/mapserver/Dockerfile b/mapserver/Dockerfile
index 77ecdd8..bb670c2 100644
--- a/mapserver/Dockerfile
+++ b/mapserver/Dockerfile
@@ -1,4 +1,4 @@
-FROM ubuntu:eoan
+FROM tiledb/tiledb-geospatial:latest
 
 RUN apt-get -y update && \
     apt-get -y upgrade && \
@@ -32,20 +32,36 @@ RUN mkdir -p /build/gdal && \
     make install && \
     rm -rf /build/gdal
 
-# mapserver
+# mapserver (unstable)
+https://github.com/mapserver/mapserver/archive/master.zip
+# 8a8ea9ccb59b0ebc16bac9bbc4f86120f76835b4 (2019-12-06)
 RUN mkdir -p /build/mapserver && \
     apt-get -y update && \
     DEBIAN_FRONTEND=noninteractive apt-get install -y libprotobuf17 zlib1g-dev libpng-dev libjpeg-turbo8 libfreetype6 libfribidi-dev libharfbuzz-dev libcairo2-dev libfcgi-dev libgeos++-dev postgresql postgis libxml2-dev libgif-dev libjpeg-turbo8-dev libprotobuf-dev protobuf-compiler libprotobuf-c-dev libprotobuf-c1 libprotobuf-dev protobuf-c-compiler && \
     apt-get -y clean && \
-    curl -O http://download.osgeo.org/mapserver/mapserver-7.4.2.tar.gz && \
-    tar xf mapserver-7.4.2.tar.gz && \
-    cd mapserver-7.4.2 && \
+    curl -o mapserver-dev.zip https://github.com/mapserver/mapserver/archive/8a8ea9ccb59b0ebc16bac9bbc4f86120f76835b4.zip && \
+    unzip mapserver-dev.zip && \
+    cd mapserver && \
     mkdir build && \
     cd build && \
     cmake .. -DWITH_POSTGIS=0 && \
     make -j$(nproc) && \
     make install
 
+# mapserver (stable)
+#RUN mkdir -p /build/mapserver && \
+#    apt-get -y update && \
+#    DEBIAN_FRONTEND=noninteractive apt-get install -y libprotobuf17 zlib1g-dev libpng-dev libjpeg-turbo8 libfreetype6 libfribidi-dev libharfbuzz-dev libcairo2-dev libfcgi-dev libgeos++-dev postgresql postgis libxml2-dev libgif-dev libjpeg-turbo8-dev libprotobuf-dev protobuf-compiler libprotobuf-c-dev libprotobuf-c1 libprotobuf-dev protobuf-c-compiler && \
+#    apt-get -y clean && \
+#    curl -O http://download.osgeo.org/mapserver/mapserver-7.4.2.tar.gz && \
+#    tar xf mapserver-7.4.2.tar.gz && \
+#    cd mapserver-7.4.2 && \
+#    mkdir build && \
+#    cd build && \
+#    cmake .. -DWITH_POSTGIS=0 && \
+#    make -j$(nproc) && \
+#    make install
+
 # postgres
 #service postgresql start
 
diff --git a/mapserver/tiledb_geospatial_Dockerfile b/mapserver/tiledb_geospatial_Dockerfile
new file mode 100644
index 0000000..f80985e
--- /dev/null
+++ b/mapserver/tiledb_geospatial_Dockerfile
@@ -0,0 +1,159 @@
+# Based on
+
+FROM ubuntu:18.04
+LABEL maintainer="support@tiledb.io"
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV TZ=GMT
+ENV LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH}
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+
+RUN apt-get update && apt-get install -y \
+  gosu \
+  pwgen \
+  tzdata \
+  gcc \
+  g++ \
+  build-essential \
+  cmake \
+  sqlite \
+  libsqlite3-dev \
+  libxml2-dev \
+  libjpeg-dev \
+  libpng-dev \
+  libfreetype6-dev \
+  libzstd-dev \
+  python3-pip \
+  git \
+  wget \
+  && rm -rf /var/lib/apt/lists/*
+
+# Install tiledb using 1.7.2 release
+RUN mkdir -p /build_deps && cd /build_deps \
+  && git clone https://github.com/TileDB-Inc/TileDB.git -b 1.7.2 && cd TileDB \
+  && mkdir -p build && cd build \
+  && cmake -DTILEDB_VERBOSE=ON -DTILEDB_S3=ON -DTILEDB_SERIALIZATION=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local .. \
+  && make -j$(nproc) \
+  && make -C tiledb install
+
+# Install curl after building tiledb
+RUN apt-get update && apt-get install -y \
+  libcurl4 \
+  libcurl4-openssl-dev \
+  && rm -rf /var/lib/apt/lists/*
+
+# Install OpenJPEG
+RUN cd /build_deps \
+  && git clone https://github.com/uclouvain/openjpeg.git -b v2.2.0 && cd openjpeg \
+  && mkdir -p build && cd build \
+  && cmake .. \
+  && make -j$(nproc) \
+  && make install
+
+# Install libtiff
+RUN cd /build_deps \
+  && wget --no-check-certificate https://download.osgeo.org/libtiff/tiff-4.1.0.tar.gz \
+  && tar -zxf tiff-4.1.0.tar.gz \
+  && cd tiff-4.1.0 \
+  && ./configure \
+  && make \
+  && make install
+
+# Install Proj
+RUN cd /build_deps \
+  && git clone https://github.com/OSGeo/PROJ.git -b 6.2.1 && cd PROJ \
+  && mkdir -p build && cd build \
+  && cmake .. \
+  && make -j$(nproc) \
+  && make install
+
+# Install libgeotiff
+RUN cd /build_deps \
+  && wget --no-check-certificate https://download.osgeo.org/geotiff/libgeotiff/libgeotiff-1.5.1.tar.gz \
+  && tar -zxf libgeotiff-1.5.1.tar.gz \
+  && cd libgeotiff-1.5.1 \
+  && mkdir -p build && cd build \
+  && cmake .. \
+  && make \
+  && make install
+
+# Install GDAL
+RUN cd /build_deps \
+  && git clone https://github.com/OSGeo/gdal.git && cd gdal/gdal \
+  && git checkout c99a871a7bdedc751c503bb8cf508d9016510fe0 \
+  && ./configure --with-crypto=no --with-curl=no \
+  && make -j$(nproc) \
+  && make install
+
+## Install TileDB-Py
+RUN cd /build_deps \
+  && pip3 install numpy \
+  && git clone https://github.com/TileDB-Inc/TileDB-Py.git -b 0.5.3 \
+  && cd TileDB-Py && python3 setup.py install
+
+## Install XArray
+RUN cd /build_deps && pip3 install xarray
+
+## Install Dask
+RUN cd /build_deps \
+  && pip3 install toolz && pip3 install dask_image \
+  && git clone https://github.com/dask/dask.git && cd dask \
+  && git checkout 807f3225cf840f28ce7cf89b88fea63d473889e7 \
+  && python3 setup.py install \
+  && pip3 install dask distributed --upgrade \
+  && pip3 install dask-image
+
+# Install Rasterio
+RUN cd /build_deps && pip3 install cython
+RUN cd /build_deps \
+  && git clone https://github.com/mapbox/rasterio.git -b 1.1.0 && cd rasterio \
+  && python3 setup.py install
+
+# Install Fiona
+RUN cd /build_deps \
+  && git clone https://github.com/Toblerity/Fiona.git && cd Fiona \
+  && python3 setup.py install
+
+# Install TileDB-SAR
+RUN cd /build_deps \
+  && git clone https://github.com/TileDB-Inc/TileDB-SAR.git && cd TileDB-SAR \
+  && git checkout 888059a15d87ae95fff6dc01c8bd4343ee4eaee1 \
+  && python3 setup.py install
+
+# Install Mapserver
+RUN cd /build_deps \
+  && git clone https://github.com/mapserver/mapserver.git && cd mapserver \
+  && git checkout 0fcc810f0b559c800f950db78a79fa6574799f23 \
+  && mkdir -p build && cd build \
+  && cmake .. -DWITH_GIF=OFF -DWITH_HARFBUZZ=OFF -DWITH_PROTOBUFC=OFF -DWITH_FRIBIDI=OFF -DWITH_POSTGIS=OFF -DWITH_GEOS=OFF -DWITH_FCGI=OFF -DWITH_CAIRO=OFF \
+  && make \
+  && make install
+
+# Install LasZIP
+RUN cd /build_deps \
+  && wget https://github.com/LASzip/LASzip/releases/download/3.4.1/laszip-src-3.4.1.tar.gz \
+  && tar -zxf laszip-src-3.4.1.tar.gz \
+  && cd laszip-src-3.4.1 \
+  && mkdir -p build && cd build \
+  && cmake .. \
+  && make \
+  && make install
+
+# Install PDAL
+RUN cd /build_deps \
+  && git clone https://github.com/PDAL/PDAL.git -b 2.0.1 && cd PDAL \
+  && mkdir -p build && cd build \
+  && cmake .. \
+  && make \
+  && make install
+
+# Install PDAL Python
+RUN pip3 install packaging \
+  && git clone https://github.com/PDAL/python pdalextension \
+  && cd pdalextension \
+  && python3 setup.py build \
+  && python3 setup.py install
+
+# Clean up
+RUN cd /tmp && rm -r /build_deps
diff --git a/tile_gen/Dockerfile b/tile_gen/Dockerfile
new file mode 100644
index 0000000..adb4951
--- /dev/null
+++ b/tile_gen/Dockerfile
@@ -0,0 +1,10 @@
+FROM tiledb/tiledb-geospatial:latest
+
+# TODO may need the unzip command to be installed if not already
+RUN wget http://ssec.wisc.edu/~rayg/pub/amqpfind.zip && \
+    unzip amqpfind.zip && \
+    rm amqpfind.zip
+
+COPY tile_index.py .
+COPY generate_tiles.py .
+COPY run.sh .
diff --git a/tile_gen/generate_tiles.py b/tile_gen/generate_tiles.py
new file mode 100644
index 0000000..ed8fe09
--- /dev/null
+++ b/tile_gen/generate_tiles.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import warnings
+import logging
+import subprocess
+
+import tile_index
+
+LOG = logging.getLogger(__name__)
+
+
+def group_files(products, input_files):
+    """Group input geotiff files by product."""
+    groups = {}
+    for prod in products:
+        prods_files = [f for f in input_files if prod in f]
+        if prods_files:
+            groups[prod] = prods_files
+    if len(groups) != len(products):
+        warnings.warn("Not all product geotiffs were provided.")
+    return groups
+
+
+def remap_to_lonlat(itif, otif):
+    """Remap a single geotiff by calling gdalwarp."""
+    try:
+        subprocess.run(['gdalwarp', '-t_srs', 'EPSG:4326', itif, otif], check=True)
+    except subprocess.CalledProcessError:
+        LOG.error("Could not remap geotiff %s -> %s" % (itif, otif))
+        return None
+    return otif
+
+
+def remap_tifs(input_tifs, out_dir, remap_suffix):
+    """Remap all input geotiffs to EPSG:4326."""
+    for itif in input_tifs:
+        ifn = os.path.basename(itif)
+        otif = os.path.join(out_dir, ifn.replace('.tif', remap_suffix))
+        otif = remap_to_lonlat(itif, otif)
+        if otif is not None:
+            yield otif
+
+
+def main():
+    import argparse
+    parser = argparse.ArgumentParser(description="Take input geotiffs and generate mapserver compatible tiles.")
+    parser.add_argument('--remap', action='store_true',
+                        help="Remap input geotiffs to EPSG:4326")
+    parser.add_argument('--remap-suffix', default='_LL.tif',
+                        help="Replace 'tif' with provided suffix when geotiffs are remapped.")
+    parser.add_argument('-p', '--products', nargs="*",
+                        help="Product names to group together in each "
+                             "'layer'. Product name must be in the filename.")
+    parser.add_argument('--shape-file', default='{product}.shp',
+                        help="Shapefile filename pattern to use and placed in the output directory. (default: '{product}.shp')")
+    parser.add_argument('out_dir',
+                        help="Output path to save tile information to (ex. '/data/tiles/{product}')")
+    parser.add_argument('input_files',
+                        help="Input geotiffs to generate tiles for (separate from product lists with '--')")
+    args = parser.parse_args()
+
+    groups = group_files(args.products, args.input_files)
+    for prod, prod_files in groups.items():
+        out_dir = args.out_dir.format(prod)
+        os.makedirs(out_dir, exist_ok=True)
+        shp_fn = args.shape_file.format(prod)
+        shp_pathname = os.path.join(out_dir, shp_fn)
+
+        # remap if needed
+        if args.remap:
+            prod_files = list(remap_tifs(prod_files, out_dir, args.remap_suffix))
+
+        # create shape file
+        tile_index.index(prod_files, shp_pathname)
+
+
+
+if __name__ == "__main__":
+    sys.exit(main())
\ No newline at end of file
diff --git a/tile_gen/run.sh b/tile_gen/run.sh
new file mode 100644
index 0000000..1b6ebd5
--- /dev/null
+++ b/tile_gen/run.sh
@@ -0,0 +1,61 @@
+#!/bin/bash -le
+# Usage: run.sh
+# Environment variables used for configuration:
+# AMQPFIND_ARGS: Arguments to pass to amqpfind when listening for new input
+#     events. Should not include the "-C" topic flag (see AMQPFIND_TOPIC).
+#     Default: "-H cspp-geo-rabbit -X satellite -u guest -p guest"
+# AMQPFIND_TOPIC: Topic to use for incoming data events.
+#     Default: "data.goes.*.abi.*.l1b.netcdf.complete"
+#     The first asterisk (3rd element) can limit processing to a particular
+#     satellite (ex. `g16`). The second asterisk (5th element) can be used
+#     to limit to a particular sector (choices: radf, radc, radm1, radm2)
+# AMQPSEND_ARGS: Arguments to pass to amqpsend when sending out new data
+#     events. Default: "-H cspp-geo-rabbit -X satellite -u guest -p guest"
+
+# Verify that the data mount is available
+test -d "/data"
+
+# Change to /data volume to write our output
+test -d "/dst"
+
+export AMQPFIND_ARGS=${AMQPFIND_ARGS:-"-H cspp-geo-rabbit -X satellite -u guest -p guest"}
+export AMQPSEND_ARGS=${AMQPSEND_ARGS:-"-H cspp-geo-rabbit -X satellite -u guest -p guest"}
+export AMQPFIND_TOPIC=${AMQPFIND_TOPIC:-'data.goes.*.abi.*.l1b.geotiff.complete'}
+export G2G_PRODUCTS=${G2G_PRODUCTS:-"C01 C02 C03 C04 C05 C06 C07 C08 C09 C10 C11 C12 C13 C14 C15 C16 true_color"}
+export TMPDIR=${TMPDIR:-"/dst/tmp"}
+
+run_tile_gen() {
+    if [ $# -ne 5 ]; then
+        echo "Unexpected number of arguments (expected 5): $#"
+        return 1
+    fi
+
+    satellite_family=${1,,}
+    satellite_id=${2,,}
+    instrument=${3,,}
+    data_type=${4,,}
+    path="$5"
+    echo "Starting Geo2Grid processing for ${path}"
+    # convert path from a relative path to an absolute path
+    path="/data/${path}"
+
+    # update shapefile in a temporary directory
+    # and resample geotiff if necessary
+    # FUTURE: TileDB will be updated in-place
+    # generate_tiles.py will make a temporary directory
+    out_dir="/dst/tiles/${satellite_family}/${satellite_id}/${instrument}"
+    mkdir -p ${out_dir}
+    python3 generate_tiles.py --remap -p ${G2G_PRODUCTS} -- ${out_dir} ${path}
+    # OUT/<product>/<product>.shp
+    glob_pattern="${out_dir}/*/*.shp"
+
+    amqpsend_topic="data.${satellite_family}.${satellite_id}.${instrument}.${data_type}.l1b.tiledb.complete"
+    json_info="{path: ${glob_pattern}, satellite_family: ${satellite_family}, satellite_ID: ${satellite_id}, instrument: ${instrument}, data_type: ${data_type}}"
+    json_info="{\"path\": \"${glob_pattern}\", \"satellite_family\": \"${satellite_family}\", \"satellite_ID\": \"${satellite_id}\", \"instrument\": \"${instrument}\", \"data_type\": \"${data_type}\"}"
+    echo -e "[[\"$amqpsend_topic\", $json_info]]" | python /work/amqpfind/amqpsend.py ${AMQPSEND_ARGS}
+}
+
+export -f run_tile_gen
+echo "Listening to AMQP messages with topic \"$AMQPFIND_TOPIC\""
+python amqpfind/amqpfind.py ${AMQPFIND_ARGS} -C "${AMQPFIND_TOPIC}" -j "{satellite_family} {satellite_ID} {instrument} {data_type} \'{path}\'" | xargs -I{} -P3 -n1 bash -c "run_tile_gen {}"
+
diff --git a/tile_gen/tile_index.py b/tile_gen/tile_index.py
new file mode 100644
index 0000000..6ff1769
--- /dev/null
+++ b/tile_gen/tile_index.py
@@ -0,0 +1,160 @@
+import sys
+import fiona
+from fiona.crs import from_epsg
+import rasterio
+from shapely.geometry import mapping, box
+
+import argparse
+import datetime
+import glob
+import logging
+import os
+import re
+import shutil
+
+# Remap geostationary to EPSG 4326
+# gdalwarp -t_srs EPSG:4326 in.tif out.tif
+
+
+# Note if using docker tiledb-geospatial image then requires shapely - `pip3 install shapely`
+# export CPL_DEBUG=ON - GDAL
+# export MS_DEBUGLEVEL=6 - MAPSERVER
+# python3 tile_index.py -dir data
+# sample mapserver queries
+# mapserv -nh "QUERY_STRING=map=goes.map&request=GetCapabilities&service=WMS&version=1.1.1"
+# mapserv -nh "QUERY_STRING=map=goes.map&request=GetMap&service=WMS&version=1.1.1&layers=goes_abi&srs=EPSG:4326&bbox=-180,-90,180,90&format=image/jpeg&WIDTH=1000&HEIGHT=1000&TIME=2019-12-12T19:10:18" > out.jpg
+# TODO add overview example to mapserver
+
+logger = logging.getLogger(__name__)
+
+temporal_schema = {
+    'geometry': 'Polygon',
+    'properties': {
+        'location': 'str',
+        'time': 'str:19'
+    }
+}
+
+possible_time_regex = (
+    (re.compile(r'\d{4}\d{2}\d{2}_\d{2}\d{2}\d{2}'), '%Y%m%d_%H%M%S'),
+    (re.compile(r'\d{4}\d{2}\d{2}T\d{2}\d{2}\d{2}'), '%Y%m%dT%H%M%S'),
+)
+
+
+def get_file_time(fn):
+    for regex, time_fmt in possible_time_regex:
+        matches = regex.findall(fn)
+        if matches:
+            return datetime.datetime.strptime(matches[-1], time_fmt)
+    else:
+        raise ValueError("Unknown filename scheme, can't determine file time.")
+
+
+def index(input_files, output_shapefile):
+    """Create shapefile for location and times of provided geotiffs or tileDB arrays.
+
+    Note: All layers to be included in the shapefile must be provided all at
+          once. Repeated calls to this function will overwrite existing
+          shapefile information.
+
+    """
+    import tempfile
+    out_dir, shp_fn = os.path.split(output_shapefile)
+    tmp_dir = tempfile.mkdtemp("_tile_index")
+    tmp_shapefile = os.path.join(tmp_dir, shp_fn)
+    with fiona.open(tmp_shapefile, 'w', driver='ESRI Shapefile',
+                    schema=temporal_schema) as output:
+        for f in input_files:
+            try:
+                dt = get_file_time(f)
+            except ValueError:
+                logger.error(f"Can't time for file {f}")
+                continue
+
+            logger.info(f"Indexing {f} {dt.isoformat()}")
+            with rasterio.open(f) as src:
+                g = box(*src.bounds)
+
+                output.write(
+                    {
+                        'geometry': mapping(g),
+                        'properties': {
+                            'location': f,
+                            'time': dt.isoformat()
+                        }
+                    })
+
+    # move the shapefile contents to the final destination
+    for fn in os.listdir(tmp_dir):
+        shutil.move(os.path.join(tmp_dir, fn), os.path.join(out_dir, fn))
+    # we don't need the temporary directory anymore
+    shutil.rmtree(tmp_dir, ignore_errors=True)
+
+
+# def index(src_dir, output):
+#   files = glob.glob(os.path.join(src_dir, '*.tif'))
+#   folders = [os.path.join(src_dir, o) for o in os.listdir(src_dir) if os.path.isdir(os.path.join(src_dir, o))]
+#
+#     with fiona.open(output, 'w', driver='ESRI Shapefile',
+#                     schema=temporal_schema) as output:
+#         # simple toggle between indexing tiff files or tiledb arrays
+#         if len(files) > 0:
+#             it = files
+#         else:
+#             it = folders
+#
+#         for f in it:
+#             parts = f.split('_')
+#             tstamp = parts[5] + parts[6]
+#             dt = datetime.datetime.strptime(tstamp, '%Y%m%d%H%M%S')
+#             logger.info(f"Indexing {f} {dt.isoformat()}")
+#             with rasterio.open(f) as src:
+#                 g = box(*src.bounds)
+#
+#                 output.write(
+#                     {
+#                         'geometry': mapping(g),
+#                         'properties': {
+#                             'location': f,
+#                             'time': dt.isoformat()
+#                         }
+#                     })
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-d', '--directory',
+                        help='Single directory to search for .tif files or TileDB array directories.')
+    parser.add_argument('-o', '--output', default='img_index.shp')
+    parser.add_argument('input_files', nargs='*',
+                        help='TileDB directories or GeoTIFF files to ingest.')
+    args = parser.parse_args()
+
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s"
+    )
+
+    if args.input_files:
+        logger.info("Using input files...")
+        it = args.input_files
+    else:
+        src_dir = args.directory
+        files = glob.glob(os.path.join(src_dir, '*.tif'))
+        folders = [os.path.join(src_dir, o) for o in os.listdir(src_dir) if os.path.isdir(os.path.join(src_dir, o))]
+        # simple toggle between indexing tiff files or tiledb arrays
+        if len(files) > 0:
+            it = files
+        else:
+            it = folders
+
+    if not it:
+        raise ValueError("No valid inputs provided.")
+
+    if args.directory:
+        logger.info('Indexer starting')
+        index(it, args.output)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
-- 
GitLab