From 9592a290d4cf3a78a01a86f610cfe224083365cf Mon Sep 17 00:00:00 2001 From: David Hoese <david.hoese@ssec.wisc.edu> Date: Wed, 15 Jan 2020 15:19:32 -0600 Subject: [PATCH] Add untested initial attempt at a tile generation script --- mapserver/Dockerfile | 26 +++- mapserver/tiledb_geospatial_Dockerfile | 159 ++++++++++++++++++++++++ tile_gen/Dockerfile | 10 ++ tile_gen/generate_tiles.py | 81 +++++++++++++ tile_gen/run.sh | 61 ++++++++++ tile_gen/tile_index.py | 160 +++++++++++++++++++++++++ 6 files changed, 492 insertions(+), 5 deletions(-) create mode 100644 mapserver/tiledb_geospatial_Dockerfile create mode 100644 tile_gen/Dockerfile create mode 100644 tile_gen/generate_tiles.py create mode 100644 tile_gen/run.sh create mode 100644 tile_gen/tile_index.py diff --git a/mapserver/Dockerfile b/mapserver/Dockerfile index 77ecdd8..bb670c2 100644 --- a/mapserver/Dockerfile +++ b/mapserver/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:eoan +FROM tiledb/tiledb-geospatial:latest RUN apt-get -y update && \ apt-get -y upgrade && \ @@ -32,20 +32,36 @@ RUN mkdir -p /build/gdal && \ make install && \ rm -rf /build/gdal -# mapserver +# mapserver (unstable) +https://github.com/mapserver/mapserver/archive/master.zip +# 8a8ea9ccb59b0ebc16bac9bbc4f86120f76835b4 (2019-12-06) RUN mkdir -p /build/mapserver && \ apt-get -y update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y libprotobuf17 zlib1g-dev libpng-dev libjpeg-turbo8 libfreetype6 libfribidi-dev libharfbuzz-dev libcairo2-dev libfcgi-dev libgeos++-dev postgresql postgis libxml2-dev libgif-dev libjpeg-turbo8-dev libprotobuf-dev protobuf-compiler libprotobuf-c-dev libprotobuf-c1 libprotobuf-dev protobuf-c-compiler && \ apt-get -y clean && \ - curl -O http://download.osgeo.org/mapserver/mapserver-7.4.2.tar.gz && \ - tar xf mapserver-7.4.2.tar.gz && \ - cd mapserver-7.4.2 && \ + curl -o mapserver-dev.zip https://github.com/mapserver/mapserver/archive/8a8ea9ccb59b0ebc16bac9bbc4f86120f76835b4.zip && \ + unzip mapserver-dev.zip && \ + cd mapserver && \ mkdir build && \ cd build && \ cmake .. -DWITH_POSTGIS=0 && \ make -j$(nproc) && \ make install +# mapserver (stable) +#RUN mkdir -p /build/mapserver && \ +# apt-get -y update && \ +# DEBIAN_FRONTEND=noninteractive apt-get install -y libprotobuf17 zlib1g-dev libpng-dev libjpeg-turbo8 libfreetype6 libfribidi-dev libharfbuzz-dev libcairo2-dev libfcgi-dev libgeos++-dev postgresql postgis libxml2-dev libgif-dev libjpeg-turbo8-dev libprotobuf-dev protobuf-compiler libprotobuf-c-dev libprotobuf-c1 libprotobuf-dev protobuf-c-compiler && \ +# apt-get -y clean && \ +# curl -O http://download.osgeo.org/mapserver/mapserver-7.4.2.tar.gz && \ +# tar xf mapserver-7.4.2.tar.gz && \ +# cd mapserver-7.4.2 && \ +# mkdir build && \ +# cd build && \ +# cmake .. -DWITH_POSTGIS=0 && \ +# make -j$(nproc) && \ +# make install + # postgres #service postgresql start diff --git a/mapserver/tiledb_geospatial_Dockerfile b/mapserver/tiledb_geospatial_Dockerfile new file mode 100644 index 0000000..f80985e --- /dev/null +++ b/mapserver/tiledb_geospatial_Dockerfile @@ -0,0 +1,159 @@ +# Based on + +FROM ubuntu:18.04 +LABEL maintainer="support@tiledb.io" + +ENV DEBIAN_FRONTEND=noninteractive +ENV TZ=GMT +ENV LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH} +ENV LANG=C.UTF-8 +ENV LC_ALL=C.UTF-8 + +RUN apt-get update && apt-get install -y \ + gosu \ + pwgen \ + tzdata \ + gcc \ + g++ \ + build-essential \ + cmake \ + sqlite \ + libsqlite3-dev \ + libxml2-dev \ + libjpeg-dev \ + libpng-dev \ + libfreetype6-dev \ + libzstd-dev \ + python3-pip \ + git \ + wget \ + && rm -rf /var/lib/apt/lists/* + +# Install tiledb using 1.7.2 release +RUN mkdir -p /build_deps && cd /build_deps \ + && git clone https://github.com/TileDB-Inc/TileDB.git -b 1.7.2 && cd TileDB \ + && mkdir -p build && cd build \ + && cmake -DTILEDB_VERBOSE=ON -DTILEDB_S3=ON -DTILEDB_SERIALIZATION=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local .. \ + && make -j$(nproc) \ + && make -C tiledb install + +# Install curl after building tiledb +RUN apt-get update && apt-get install -y \ + libcurl4 \ + libcurl4-openssl-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install OpenJPEG +RUN cd /build_deps \ + && git clone https://github.com/uclouvain/openjpeg.git -b v2.2.0 && cd openjpeg \ + && mkdir -p build && cd build \ + && cmake .. \ + && make -j$(nproc) \ + && make install + +# Install libtiff +RUN cd /build_deps \ + && wget --no-check-certificate https://download.osgeo.org/libtiff/tiff-4.1.0.tar.gz \ + && tar -zxf tiff-4.1.0.tar.gz \ + && cd tiff-4.1.0 \ + && ./configure \ + && make \ + && make install + +# Install Proj +RUN cd /build_deps \ + && git clone https://github.com/OSGeo/PROJ.git -b 6.2.1 && cd PROJ \ + && mkdir -p build && cd build \ + && cmake .. \ + && make -j$(nproc) \ + && make install + +# Install libgeotiff +RUN cd /build_deps \ + && wget --no-check-certificate https://download.osgeo.org/geotiff/libgeotiff/libgeotiff-1.5.1.tar.gz \ + && tar -zxf libgeotiff-1.5.1.tar.gz \ + && cd libgeotiff-1.5.1 \ + && mkdir -p build && cd build \ + && cmake .. \ + && make \ + && make install + +# Install GDAL +RUN cd /build_deps \ + && git clone https://github.com/OSGeo/gdal.git && cd gdal/gdal \ + && git checkout c99a871a7bdedc751c503bb8cf508d9016510fe0 \ + && ./configure --with-crypto=no --with-curl=no \ + && make -j$(nproc) \ + && make install + +## Install TileDB-Py +RUN cd /build_deps \ + && pip3 install numpy \ + && git clone https://github.com/TileDB-Inc/TileDB-Py.git -b 0.5.3 \ + && cd TileDB-Py && python3 setup.py install + +## Install XArray +RUN cd /build_deps && pip3 install xarray + +## Install Dask +RUN cd /build_deps \ + && pip3 install toolz && pip3 install dask_image \ + && git clone https://github.com/dask/dask.git && cd dask \ + && git checkout 807f3225cf840f28ce7cf89b88fea63d473889e7 \ + && python3 setup.py install \ + && pip3 install dask distributed --upgrade \ + && pip3 install dask-image + +# Install Rasterio +RUN cd /build_deps && pip3 install cython +RUN cd /build_deps \ + && git clone https://github.com/mapbox/rasterio.git -b 1.1.0 && cd rasterio \ + && python3 setup.py install + +# Install Fiona +RUN cd /build_deps \ + && git clone https://github.com/Toblerity/Fiona.git && cd Fiona \ + && python3 setup.py install + +# Install TileDB-SAR +RUN cd /build_deps \ + && git clone https://github.com/TileDB-Inc/TileDB-SAR.git && cd TileDB-SAR \ + && git checkout 888059a15d87ae95fff6dc01c8bd4343ee4eaee1 \ + && python3 setup.py install + +# Install Mapserver +RUN cd /build_deps \ + && git clone https://github.com/mapserver/mapserver.git && cd mapserver \ + && git checkout 0fcc810f0b559c800f950db78a79fa6574799f23 \ + && mkdir -p build && cd build \ + && cmake .. -DWITH_GIF=OFF -DWITH_HARFBUZZ=OFF -DWITH_PROTOBUFC=OFF -DWITH_FRIBIDI=OFF -DWITH_POSTGIS=OFF -DWITH_GEOS=OFF -DWITH_FCGI=OFF -DWITH_CAIRO=OFF \ + && make \ + && make install + +# Install LasZIP +RUN cd /build_deps \ + && wget https://github.com/LASzip/LASzip/releases/download/3.4.1/laszip-src-3.4.1.tar.gz \ + && tar -zxf laszip-src-3.4.1.tar.gz \ + && cd laszip-src-3.4.1 \ + && mkdir -p build && cd build \ + && cmake .. \ + && make \ + && make install + +# Install PDAL +RUN cd /build_deps \ + && git clone https://github.com/PDAL/PDAL.git -b 2.0.1 && cd PDAL \ + && mkdir -p build && cd build \ + && cmake .. \ + && make \ + && make install + +# Install PDAL Python +RUN pip3 install packaging \ + && git clone https://github.com/PDAL/python pdalextension \ + && cd pdalextension \ + && python3 setup.py build \ + && python3 setup.py install + +# Clean up +RUN cd /tmp && rm -r /build_deps diff --git a/tile_gen/Dockerfile b/tile_gen/Dockerfile new file mode 100644 index 0000000..adb4951 --- /dev/null +++ b/tile_gen/Dockerfile @@ -0,0 +1,10 @@ +FROM tiledb/tiledb-geospatial:latest + +# TODO may need the unzip command to be installed if not already +RUN wget http://ssec.wisc.edu/~rayg/pub/amqpfind.zip && \ + unzip amqpfind.zip && \ + rm amqpfind.zip + +COPY tile_index.py . +COPY generate_tiles.py . +COPY run.sh . diff --git a/tile_gen/generate_tiles.py b/tile_gen/generate_tiles.py new file mode 100644 index 0000000..ed8fe09 --- /dev/null +++ b/tile_gen/generate_tiles.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 + +import os +import sys +import warnings +import logging +import subprocess + +import tile_index + +LOG = logging.getLogger(__name__) + + +def group_files(products, input_files): + """Group input geotiff files by product.""" + groups = {} + for prod in products: + prods_files = [f for f in input_files if prod in f] + if prods_files: + groups[prod] = prods_files + if len(groups) != len(products): + warnings.warn("Not all product geotiffs were provided.") + return groups + + +def remap_to_lonlat(itif, otif): + """Remap a single geotiff by calling gdalwarp.""" + try: + subprocess.run(['gdalwarp', '-t_srs', 'EPSG:4326', itif, otif], check=True) + except subprocess.CalledProcessError: + LOG.error("Could not remap geotiff %s -> %s" % (itif, otif)) + return None + return otif + + +def remap_tifs(input_tifs, out_dir, remap_suffix): + """Remap all input geotiffs to EPSG:4326.""" + for itif in input_tifs: + ifn = os.path.basename(itif) + otif = os.path.join(out_dir, ifn.replace('.tif', remap_suffix)) + otif = remap_to_lonlat(itif, otif) + if otif is not None: + yield otif + + +def main(): + import argparse + parser = argparse.ArgumentParser(description="Take input geotiffs and generate mapserver compatible tiles.") + parser.add_argument('--remap', action='store_true', + help="Remap input geotiffs to EPSG:4326") + parser.add_argument('--remap-suffix', default='_LL.tif', + help="Replace 'tif' with provided suffix when geotiffs are remapped.") + parser.add_argument('-p', '--products', nargs="*", + help="Product names to group together in each " + "'layer'. Product name must be in the filename.") + parser.add_argument('--shape-file', default='{product}.shp', + help="Shapefile filename pattern to use and placed in the output directory. (default: '{product}.shp')") + parser.add_argument('out_dir', + help="Output path to save tile information to (ex. '/data/tiles/{product}')") + parser.add_argument('input_files', + help="Input geotiffs to generate tiles for (separate from product lists with '--')") + args = parser.parse_args() + + groups = group_files(args.products, args.input_files) + for prod, prod_files in groups.items(): + out_dir = args.out_dir.format(prod) + os.makedirs(out_dir, exist_ok=True) + shp_fn = args.shape_file.format(prod) + shp_pathname = os.path.join(out_dir, shp_fn) + + # remap if needed + if args.remap: + prod_files = list(remap_tifs(prod_files, out_dir, args.remap_suffix)) + + # create shape file + tile_index.index(prod_files, shp_pathname) + + + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/tile_gen/run.sh b/tile_gen/run.sh new file mode 100644 index 0000000..1b6ebd5 --- /dev/null +++ b/tile_gen/run.sh @@ -0,0 +1,61 @@ +#!/bin/bash -le +# Usage: run.sh +# Environment variables used for configuration: +# AMQPFIND_ARGS: Arguments to pass to amqpfind when listening for new input +# events. Should not include the "-C" topic flag (see AMQPFIND_TOPIC). +# Default: "-H cspp-geo-rabbit -X satellite -u guest -p guest" +# AMQPFIND_TOPIC: Topic to use for incoming data events. +# Default: "data.goes.*.abi.*.l1b.netcdf.complete" +# The first asterisk (3rd element) can limit processing to a particular +# satellite (ex. `g16`). The second asterisk (5th element) can be used +# to limit to a particular sector (choices: radf, radc, radm1, radm2) +# AMQPSEND_ARGS: Arguments to pass to amqpsend when sending out new data +# events. Default: "-H cspp-geo-rabbit -X satellite -u guest -p guest" + +# Verify that the data mount is available +test -d "/data" + +# Change to /data volume to write our output +test -d "/dst" + +export AMQPFIND_ARGS=${AMQPFIND_ARGS:-"-H cspp-geo-rabbit -X satellite -u guest -p guest"} +export AMQPSEND_ARGS=${AMQPSEND_ARGS:-"-H cspp-geo-rabbit -X satellite -u guest -p guest"} +export AMQPFIND_TOPIC=${AMQPFIND_TOPIC:-'data.goes.*.abi.*.l1b.geotiff.complete'} +export G2G_PRODUCTS=${G2G_PRODUCTS:-"C01 C02 C03 C04 C05 C06 C07 C08 C09 C10 C11 C12 C13 C14 C15 C16 true_color"} +export TMPDIR=${TMPDIR:-"/dst/tmp"} + +run_tile_gen() { + if [ $# -ne 5 ]; then + echo "Unexpected number of arguments (expected 5): $#" + return 1 + fi + + satellite_family=${1,,} + satellite_id=${2,,} + instrument=${3,,} + data_type=${4,,} + path="$5" + echo "Starting Geo2Grid processing for ${path}" + # convert path from a relative path to an absolute path + path="/data/${path}" + + # update shapefile in a temporary directory + # and resample geotiff if necessary + # FUTURE: TileDB will be updated in-place + # generate_tiles.py will make a temporary directory + out_dir="/dst/tiles/${satellite_family}/${satellite_id}/${instrument}" + mkdir -p ${out_dir} + python3 generate_tiles.py --remap -p ${G2G_PRODUCTS} -- ${out_dir} ${path} + # OUT/<product>/<product>.shp + glob_pattern="${out_dir}/*/*.shp" + + amqpsend_topic="data.${satellite_family}.${satellite_id}.${instrument}.${data_type}.l1b.tiledb.complete" + json_info="{path: ${glob_pattern}, satellite_family: ${satellite_family}, satellite_ID: ${satellite_id}, instrument: ${instrument}, data_type: ${data_type}}" + json_info="{\"path\": \"${glob_pattern}\", \"satellite_family\": \"${satellite_family}\", \"satellite_ID\": \"${satellite_id}\", \"instrument\": \"${instrument}\", \"data_type\": \"${data_type}\"}" + echo -e "[[\"$amqpsend_topic\", $json_info]]" | python /work/amqpfind/amqpsend.py ${AMQPSEND_ARGS} +} + +export -f run_tile_gen +echo "Listening to AMQP messages with topic \"$AMQPFIND_TOPIC\"" +python amqpfind/amqpfind.py ${AMQPFIND_ARGS} -C "${AMQPFIND_TOPIC}" -j "{satellite_family} {satellite_ID} {instrument} {data_type} \'{path}\'" | xargs -I{} -P3 -n1 bash -c "run_tile_gen {}" + diff --git a/tile_gen/tile_index.py b/tile_gen/tile_index.py new file mode 100644 index 0000000..6ff1769 --- /dev/null +++ b/tile_gen/tile_index.py @@ -0,0 +1,160 @@ +import sys +import fiona +from fiona.crs import from_epsg +import rasterio +from shapely.geometry import mapping, box + +import argparse +import datetime +import glob +import logging +import os +import re +import shutil + +# Remap geostationary to EPSG 4326 +# gdalwarp -t_srs EPSG:4326 in.tif out.tif + + +# Note if using docker tiledb-geospatial image then requires shapely - `pip3 install shapely` +# export CPL_DEBUG=ON - GDAL +# export MS_DEBUGLEVEL=6 - MAPSERVER +# python3 tile_index.py -dir data +# sample mapserver queries +# mapserv -nh "QUERY_STRING=map=goes.map&request=GetCapabilities&service=WMS&version=1.1.1" +# mapserv -nh "QUERY_STRING=map=goes.map&request=GetMap&service=WMS&version=1.1.1&layers=goes_abi&srs=EPSG:4326&bbox=-180,-90,180,90&format=image/jpeg&WIDTH=1000&HEIGHT=1000&TIME=2019-12-12T19:10:18" > out.jpg +# TODO add overview example to mapserver + +logger = logging.getLogger(__name__) + +temporal_schema = { + 'geometry': 'Polygon', + 'properties': { + 'location': 'str', + 'time': 'str:19' + } +} + +possible_time_regex = ( + (re.compile(r'\d{4}\d{2}\d{2}_\d{2}\d{2}\d{2}'), '%Y%m%d_%H%M%S'), + (re.compile(r'\d{4}\d{2}\d{2}T\d{2}\d{2}\d{2}'), '%Y%m%dT%H%M%S'), +) + + +def get_file_time(fn): + for regex, time_fmt in possible_time_regex: + matches = regex.findall(fn) + if matches: + return datetime.datetime.strptime(matches[-1], time_fmt) + else: + raise ValueError("Unknown filename scheme, can't determine file time.") + + +def index(input_files, output_shapefile): + """Create shapefile for location and times of provided geotiffs or tileDB arrays. + + Note: All layers to be included in the shapefile must be provided all at + once. Repeated calls to this function will overwrite existing + shapefile information. + + """ + import tempfile + out_dir, shp_fn = os.path.split(output_shapefile) + tmp_dir = tempfile.mkdtemp("_tile_index") + tmp_shapefile = os.path.join(tmp_dir, shp_fn) + with fiona.open(tmp_shapefile, 'w', driver='ESRI Shapefile', + schema=temporal_schema) as output: + for f in input_files: + try: + dt = get_file_time(f) + except ValueError: + logger.error(f"Can't time for file {f}") + continue + + logger.info(f"Indexing {f} {dt.isoformat()}") + with rasterio.open(f) as src: + g = box(*src.bounds) + + output.write( + { + 'geometry': mapping(g), + 'properties': { + 'location': f, + 'time': dt.isoformat() + } + }) + + # move the shapefile contents to the final destination + for fn in os.listdir(tmp_dir): + shutil.move(os.path.join(tmp_dir, fn), os.path.join(out_dir, fn)) + # we don't need the temporary directory anymore + shutil.rmtree(tmp_dir, ignore_errors=True) + + +# def index(src_dir, output): +# files = glob.glob(os.path.join(src_dir, '*.tif')) +# folders = [os.path.join(src_dir, o) for o in os.listdir(src_dir) if os.path.isdir(os.path.join(src_dir, o))] +# +# with fiona.open(output, 'w', driver='ESRI Shapefile', +# schema=temporal_schema) as output: +# # simple toggle between indexing tiff files or tiledb arrays +# if len(files) > 0: +# it = files +# else: +# it = folders +# +# for f in it: +# parts = f.split('_') +# tstamp = parts[5] + parts[6] +# dt = datetime.datetime.strptime(tstamp, '%Y%m%d%H%M%S') +# logger.info(f"Indexing {f} {dt.isoformat()}") +# with rasterio.open(f) as src: +# g = box(*src.bounds) +# +# output.write( +# { +# 'geometry': mapping(g), +# 'properties': { +# 'location': f, +# 'time': dt.isoformat() +# } +# }) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-d', '--directory', + help='Single directory to search for .tif files or TileDB array directories.') + parser.add_argument('-o', '--output', default='img_index.shp') + parser.add_argument('input_files', nargs='*', + help='TileDB directories or GeoTIFF files to ingest.') + args = parser.parse_args() + + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s" + ) + + if args.input_files: + logger.info("Using input files...") + it = args.input_files + else: + src_dir = args.directory + files = glob.glob(os.path.join(src_dir, '*.tif')) + folders = [os.path.join(src_dir, o) for o in os.listdir(src_dir) if os.path.isdir(os.path.join(src_dir, o))] + # simple toggle between indexing tiff files or tiledb arrays + if len(files) > 0: + it = files + else: + it = folders + + if not it: + raise ValueError("No valid inputs provided.") + + if args.directory: + logger.info('Indexer starting') + index(it, args.output) + + +if __name__ == "__main__": + sys.exit(main()) -- GitLab