Skip to content
Snippets Groups Projects
Commit 9592a290 authored by David Hoese's avatar David Hoese
Browse files

Add untested initial attempt at a tile generation script

parent db1233a3
No related branches found
No related tags found
No related merge requests found
FROM ubuntu:eoan FROM tiledb/tiledb-geospatial:latest
RUN apt-get -y update && \ RUN apt-get -y update && \
apt-get -y upgrade && \ apt-get -y upgrade && \
...@@ -32,20 +32,36 @@ RUN mkdir -p /build/gdal && \ ...@@ -32,20 +32,36 @@ RUN mkdir -p /build/gdal && \
make install && \ make install && \
rm -rf /build/gdal rm -rf /build/gdal
# mapserver # mapserver (unstable)
https://github.com/mapserver/mapserver/archive/master.zip
# 8a8ea9ccb59b0ebc16bac9bbc4f86120f76835b4 (2019-12-06)
RUN mkdir -p /build/mapserver && \ RUN mkdir -p /build/mapserver && \
apt-get -y update && \ apt-get -y update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y libprotobuf17 zlib1g-dev libpng-dev libjpeg-turbo8 libfreetype6 libfribidi-dev libharfbuzz-dev libcairo2-dev libfcgi-dev libgeos++-dev postgresql postgis libxml2-dev libgif-dev libjpeg-turbo8-dev libprotobuf-dev protobuf-compiler libprotobuf-c-dev libprotobuf-c1 libprotobuf-dev protobuf-c-compiler && \ DEBIAN_FRONTEND=noninteractive apt-get install -y libprotobuf17 zlib1g-dev libpng-dev libjpeg-turbo8 libfreetype6 libfribidi-dev libharfbuzz-dev libcairo2-dev libfcgi-dev libgeos++-dev postgresql postgis libxml2-dev libgif-dev libjpeg-turbo8-dev libprotobuf-dev protobuf-compiler libprotobuf-c-dev libprotobuf-c1 libprotobuf-dev protobuf-c-compiler && \
apt-get -y clean && \ apt-get -y clean && \
curl -O http://download.osgeo.org/mapserver/mapserver-7.4.2.tar.gz && \ curl -o mapserver-dev.zip https://github.com/mapserver/mapserver/archive/8a8ea9ccb59b0ebc16bac9bbc4f86120f76835b4.zip && \
tar xf mapserver-7.4.2.tar.gz && \ unzip mapserver-dev.zip && \
cd mapserver-7.4.2 && \ cd mapserver && \
mkdir build && \ mkdir build && \
cd build && \ cd build && \
cmake .. -DWITH_POSTGIS=0 && \ cmake .. -DWITH_POSTGIS=0 && \
make -j$(nproc) && \ make -j$(nproc) && \
make install make install
# mapserver (stable)
#RUN mkdir -p /build/mapserver && \
# apt-get -y update && \
# DEBIAN_FRONTEND=noninteractive apt-get install -y libprotobuf17 zlib1g-dev libpng-dev libjpeg-turbo8 libfreetype6 libfribidi-dev libharfbuzz-dev libcairo2-dev libfcgi-dev libgeos++-dev postgresql postgis libxml2-dev libgif-dev libjpeg-turbo8-dev libprotobuf-dev protobuf-compiler libprotobuf-c-dev libprotobuf-c1 libprotobuf-dev protobuf-c-compiler && \
# apt-get -y clean && \
# curl -O http://download.osgeo.org/mapserver/mapserver-7.4.2.tar.gz && \
# tar xf mapserver-7.4.2.tar.gz && \
# cd mapserver-7.4.2 && \
# mkdir build && \
# cd build && \
# cmake .. -DWITH_POSTGIS=0 && \
# make -j$(nproc) && \
# make install
# postgres # postgres
#service postgresql start #service postgresql start
......
# Based on
FROM ubuntu:18.04
LABEL maintainer="support@tiledb.io"
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=GMT
ENV LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH}
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
RUN apt-get update && apt-get install -y \
gosu \
pwgen \
tzdata \
gcc \
g++ \
build-essential \
cmake \
sqlite \
libsqlite3-dev \
libxml2-dev \
libjpeg-dev \
libpng-dev \
libfreetype6-dev \
libzstd-dev \
python3-pip \
git \
wget \
&& rm -rf /var/lib/apt/lists/*
# Install tiledb using 1.7.2 release
RUN mkdir -p /build_deps && cd /build_deps \
&& git clone https://github.com/TileDB-Inc/TileDB.git -b 1.7.2 && cd TileDB \
&& mkdir -p build && cd build \
&& cmake -DTILEDB_VERBOSE=ON -DTILEDB_S3=ON -DTILEDB_SERIALIZATION=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local .. \
&& make -j$(nproc) \
&& make -C tiledb install
# Install curl after building tiledb
RUN apt-get update && apt-get install -y \
libcurl4 \
libcurl4-openssl-dev \
&& rm -rf /var/lib/apt/lists/*
# Install OpenJPEG
RUN cd /build_deps \
&& git clone https://github.com/uclouvain/openjpeg.git -b v2.2.0 && cd openjpeg \
&& mkdir -p build && cd build \
&& cmake .. \
&& make -j$(nproc) \
&& make install
# Install libtiff
RUN cd /build_deps \
&& wget --no-check-certificate https://download.osgeo.org/libtiff/tiff-4.1.0.tar.gz \
&& tar -zxf tiff-4.1.0.tar.gz \
&& cd tiff-4.1.0 \
&& ./configure \
&& make \
&& make install
# Install Proj
RUN cd /build_deps \
&& git clone https://github.com/OSGeo/PROJ.git -b 6.2.1 && cd PROJ \
&& mkdir -p build && cd build \
&& cmake .. \
&& make -j$(nproc) \
&& make install
# Install libgeotiff
RUN cd /build_deps \
&& wget --no-check-certificate https://download.osgeo.org/geotiff/libgeotiff/libgeotiff-1.5.1.tar.gz \
&& tar -zxf libgeotiff-1.5.1.tar.gz \
&& cd libgeotiff-1.5.1 \
&& mkdir -p build && cd build \
&& cmake .. \
&& make \
&& make install
# Install GDAL
RUN cd /build_deps \
&& git clone https://github.com/OSGeo/gdal.git && cd gdal/gdal \
&& git checkout c99a871a7bdedc751c503bb8cf508d9016510fe0 \
&& ./configure --with-crypto=no --with-curl=no \
&& make -j$(nproc) \
&& make install
## Install TileDB-Py
RUN cd /build_deps \
&& pip3 install numpy \
&& git clone https://github.com/TileDB-Inc/TileDB-Py.git -b 0.5.3 \
&& cd TileDB-Py && python3 setup.py install
## Install XArray
RUN cd /build_deps && pip3 install xarray
## Install Dask
RUN cd /build_deps \
&& pip3 install toolz && pip3 install dask_image \
&& git clone https://github.com/dask/dask.git && cd dask \
&& git checkout 807f3225cf840f28ce7cf89b88fea63d473889e7 \
&& python3 setup.py install \
&& pip3 install dask distributed --upgrade \
&& pip3 install dask-image
# Install Rasterio
RUN cd /build_deps && pip3 install cython
RUN cd /build_deps \
&& git clone https://github.com/mapbox/rasterio.git -b 1.1.0 && cd rasterio \
&& python3 setup.py install
# Install Fiona
RUN cd /build_deps \
&& git clone https://github.com/Toblerity/Fiona.git && cd Fiona \
&& python3 setup.py install
# Install TileDB-SAR
RUN cd /build_deps \
&& git clone https://github.com/TileDB-Inc/TileDB-SAR.git && cd TileDB-SAR \
&& git checkout 888059a15d87ae95fff6dc01c8bd4343ee4eaee1 \
&& python3 setup.py install
# Install Mapserver
RUN cd /build_deps \
&& git clone https://github.com/mapserver/mapserver.git && cd mapserver \
&& git checkout 0fcc810f0b559c800f950db78a79fa6574799f23 \
&& mkdir -p build && cd build \
&& cmake .. -DWITH_GIF=OFF -DWITH_HARFBUZZ=OFF -DWITH_PROTOBUFC=OFF -DWITH_FRIBIDI=OFF -DWITH_POSTGIS=OFF -DWITH_GEOS=OFF -DWITH_FCGI=OFF -DWITH_CAIRO=OFF \
&& make \
&& make install
# Install LasZIP
RUN cd /build_deps \
&& wget https://github.com/LASzip/LASzip/releases/download/3.4.1/laszip-src-3.4.1.tar.gz \
&& tar -zxf laszip-src-3.4.1.tar.gz \
&& cd laszip-src-3.4.1 \
&& mkdir -p build && cd build \
&& cmake .. \
&& make \
&& make install
# Install PDAL
RUN cd /build_deps \
&& git clone https://github.com/PDAL/PDAL.git -b 2.0.1 && cd PDAL \
&& mkdir -p build && cd build \
&& cmake .. \
&& make \
&& make install
# Install PDAL Python
RUN pip3 install packaging \
&& git clone https://github.com/PDAL/python pdalextension \
&& cd pdalextension \
&& python3 setup.py build \
&& python3 setup.py install
# Clean up
RUN cd /tmp && rm -r /build_deps
FROM tiledb/tiledb-geospatial:latest
# TODO may need the unzip command to be installed if not already
RUN wget http://ssec.wisc.edu/~rayg/pub/amqpfind.zip && \
unzip amqpfind.zip && \
rm amqpfind.zip
COPY tile_index.py .
COPY generate_tiles.py .
COPY run.sh .
#!/usr/bin/env python3
import os
import sys
import warnings
import logging
import subprocess
import tile_index
LOG = logging.getLogger(__name__)
def group_files(products, input_files):
"""Group input geotiff files by product."""
groups = {}
for prod in products:
prods_files = [f for f in input_files if prod in f]
if prods_files:
groups[prod] = prods_files
if len(groups) != len(products):
warnings.warn("Not all product geotiffs were provided.")
return groups
def remap_to_lonlat(itif, otif):
"""Remap a single geotiff by calling gdalwarp."""
try:
subprocess.run(['gdalwarp', '-t_srs', 'EPSG:4326', itif, otif], check=True)
except subprocess.CalledProcessError:
LOG.error("Could not remap geotiff %s -> %s" % (itif, otif))
return None
return otif
def remap_tifs(input_tifs, out_dir, remap_suffix):
"""Remap all input geotiffs to EPSG:4326."""
for itif in input_tifs:
ifn = os.path.basename(itif)
otif = os.path.join(out_dir, ifn.replace('.tif', remap_suffix))
otif = remap_to_lonlat(itif, otif)
if otif is not None:
yield otif
def main():
import argparse
parser = argparse.ArgumentParser(description="Take input geotiffs and generate mapserver compatible tiles.")
parser.add_argument('--remap', action='store_true',
help="Remap input geotiffs to EPSG:4326")
parser.add_argument('--remap-suffix', default='_LL.tif',
help="Replace 'tif' with provided suffix when geotiffs are remapped.")
parser.add_argument('-p', '--products', nargs="*",
help="Product names to group together in each "
"'layer'. Product name must be in the filename.")
parser.add_argument('--shape-file', default='{product}.shp',
help="Shapefile filename pattern to use and placed in the output directory. (default: '{product}.shp')")
parser.add_argument('out_dir',
help="Output path to save tile information to (ex. '/data/tiles/{product}')")
parser.add_argument('input_files',
help="Input geotiffs to generate tiles for (separate from product lists with '--')")
args = parser.parse_args()
groups = group_files(args.products, args.input_files)
for prod, prod_files in groups.items():
out_dir = args.out_dir.format(prod)
os.makedirs(out_dir, exist_ok=True)
shp_fn = args.shape_file.format(prod)
shp_pathname = os.path.join(out_dir, shp_fn)
# remap if needed
if args.remap:
prod_files = list(remap_tifs(prod_files, out_dir, args.remap_suffix))
# create shape file
tile_index.index(prod_files, shp_pathname)
if __name__ == "__main__":
sys.exit(main())
\ No newline at end of file
#!/bin/bash -le
# Usage: run.sh
# Environment variables used for configuration:
# AMQPFIND_ARGS: Arguments to pass to amqpfind when listening for new input
# events. Should not include the "-C" topic flag (see AMQPFIND_TOPIC).
# Default: "-H cspp-geo-rabbit -X satellite -u guest -p guest"
# AMQPFIND_TOPIC: Topic to use for incoming data events.
# Default: "data.goes.*.abi.*.l1b.netcdf.complete"
# The first asterisk (3rd element) can limit processing to a particular
# satellite (ex. `g16`). The second asterisk (5th element) can be used
# to limit to a particular sector (choices: radf, radc, radm1, radm2)
# AMQPSEND_ARGS: Arguments to pass to amqpsend when sending out new data
# events. Default: "-H cspp-geo-rabbit -X satellite -u guest -p guest"
# Verify that the data mount is available
test -d "/data"
# Change to /data volume to write our output
test -d "/dst"
export AMQPFIND_ARGS=${AMQPFIND_ARGS:-"-H cspp-geo-rabbit -X satellite -u guest -p guest"}
export AMQPSEND_ARGS=${AMQPSEND_ARGS:-"-H cspp-geo-rabbit -X satellite -u guest -p guest"}
export AMQPFIND_TOPIC=${AMQPFIND_TOPIC:-'data.goes.*.abi.*.l1b.geotiff.complete'}
export G2G_PRODUCTS=${G2G_PRODUCTS:-"C01 C02 C03 C04 C05 C06 C07 C08 C09 C10 C11 C12 C13 C14 C15 C16 true_color"}
export TMPDIR=${TMPDIR:-"/dst/tmp"}
run_tile_gen() {
if [ $# -ne 5 ]; then
echo "Unexpected number of arguments (expected 5): $#"
return 1
fi
satellite_family=${1,,}
satellite_id=${2,,}
instrument=${3,,}
data_type=${4,,}
path="$5"
echo "Starting Geo2Grid processing for ${path}"
# convert path from a relative path to an absolute path
path="/data/${path}"
# update shapefile in a temporary directory
# and resample geotiff if necessary
# FUTURE: TileDB will be updated in-place
# generate_tiles.py will make a temporary directory
out_dir="/dst/tiles/${satellite_family}/${satellite_id}/${instrument}"
mkdir -p ${out_dir}
python3 generate_tiles.py --remap -p ${G2G_PRODUCTS} -- ${out_dir} ${path}
# OUT/<product>/<product>.shp
glob_pattern="${out_dir}/*/*.shp"
amqpsend_topic="data.${satellite_family}.${satellite_id}.${instrument}.${data_type}.l1b.tiledb.complete"
json_info="{path: ${glob_pattern}, satellite_family: ${satellite_family}, satellite_ID: ${satellite_id}, instrument: ${instrument}, data_type: ${data_type}}"
json_info="{\"path\": \"${glob_pattern}\", \"satellite_family\": \"${satellite_family}\", \"satellite_ID\": \"${satellite_id}\", \"instrument\": \"${instrument}\", \"data_type\": \"${data_type}\"}"
echo -e "[[\"$amqpsend_topic\", $json_info]]" | python /work/amqpfind/amqpsend.py ${AMQPSEND_ARGS}
}
export -f run_tile_gen
echo "Listening to AMQP messages with topic \"$AMQPFIND_TOPIC\""
python amqpfind/amqpfind.py ${AMQPFIND_ARGS} -C "${AMQPFIND_TOPIC}" -j "{satellite_family} {satellite_ID} {instrument} {data_type} \'{path}\'" | xargs -I{} -P3 -n1 bash -c "run_tile_gen {}"
import sys
import fiona
from fiona.crs import from_epsg
import rasterio
from shapely.geometry import mapping, box
import argparse
import datetime
import glob
import logging
import os
import re
import shutil
# Remap geostationary to EPSG 4326
# gdalwarp -t_srs EPSG:4326 in.tif out.tif
# Note if using docker tiledb-geospatial image then requires shapely - `pip3 install shapely`
# export CPL_DEBUG=ON - GDAL
# export MS_DEBUGLEVEL=6 - MAPSERVER
# python3 tile_index.py -dir data
# sample mapserver queries
# mapserv -nh "QUERY_STRING=map=goes.map&request=GetCapabilities&service=WMS&version=1.1.1"
# mapserv -nh "QUERY_STRING=map=goes.map&request=GetMap&service=WMS&version=1.1.1&layers=goes_abi&srs=EPSG:4326&bbox=-180,-90,180,90&format=image/jpeg&WIDTH=1000&HEIGHT=1000&TIME=2019-12-12T19:10:18" > out.jpg
# TODO add overview example to mapserver
logger = logging.getLogger(__name__)
temporal_schema = {
'geometry': 'Polygon',
'properties': {
'location': 'str',
'time': 'str:19'
}
}
possible_time_regex = (
(re.compile(r'\d{4}\d{2}\d{2}_\d{2}\d{2}\d{2}'), '%Y%m%d_%H%M%S'),
(re.compile(r'\d{4}\d{2}\d{2}T\d{2}\d{2}\d{2}'), '%Y%m%dT%H%M%S'),
)
def get_file_time(fn):
for regex, time_fmt in possible_time_regex:
matches = regex.findall(fn)
if matches:
return datetime.datetime.strptime(matches[-1], time_fmt)
else:
raise ValueError("Unknown filename scheme, can't determine file time.")
def index(input_files, output_shapefile):
"""Create shapefile for location and times of provided geotiffs or tileDB arrays.
Note: All layers to be included in the shapefile must be provided all at
once. Repeated calls to this function will overwrite existing
shapefile information.
"""
import tempfile
out_dir, shp_fn = os.path.split(output_shapefile)
tmp_dir = tempfile.mkdtemp("_tile_index")
tmp_shapefile = os.path.join(tmp_dir, shp_fn)
with fiona.open(tmp_shapefile, 'w', driver='ESRI Shapefile',
schema=temporal_schema) as output:
for f in input_files:
try:
dt = get_file_time(f)
except ValueError:
logger.error(f"Can't time for file {f}")
continue
logger.info(f"Indexing {f} {dt.isoformat()}")
with rasterio.open(f) as src:
g = box(*src.bounds)
output.write(
{
'geometry': mapping(g),
'properties': {
'location': f,
'time': dt.isoformat()
}
})
# move the shapefile contents to the final destination
for fn in os.listdir(tmp_dir):
shutil.move(os.path.join(tmp_dir, fn), os.path.join(out_dir, fn))
# we don't need the temporary directory anymore
shutil.rmtree(tmp_dir, ignore_errors=True)
# def index(src_dir, output):
# files = glob.glob(os.path.join(src_dir, '*.tif'))
# folders = [os.path.join(src_dir, o) for o in os.listdir(src_dir) if os.path.isdir(os.path.join(src_dir, o))]
#
# with fiona.open(output, 'w', driver='ESRI Shapefile',
# schema=temporal_schema) as output:
# # simple toggle between indexing tiff files or tiledb arrays
# if len(files) > 0:
# it = files
# else:
# it = folders
#
# for f in it:
# parts = f.split('_')
# tstamp = parts[5] + parts[6]
# dt = datetime.datetime.strptime(tstamp, '%Y%m%d%H%M%S')
# logger.info(f"Indexing {f} {dt.isoformat()}")
# with rasterio.open(f) as src:
# g = box(*src.bounds)
#
# output.write(
# {
# 'geometry': mapping(g),
# 'properties': {
# 'location': f,
# 'time': dt.isoformat()
# }
# })
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-d', '--directory',
help='Single directory to search for .tif files or TileDB array directories.')
parser.add_argument('-o', '--output', default='img_index.shp')
parser.add_argument('input_files', nargs='*',
help='TileDB directories or GeoTIFF files to ingest.')
args = parser.parse_args()
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s"
)
if args.input_files:
logger.info("Using input files...")
it = args.input_files
else:
src_dir = args.directory
files = glob.glob(os.path.join(src_dir, '*.tif'))
folders = [os.path.join(src_dir, o) for o in os.listdir(src_dir) if os.path.isdir(os.path.join(src_dir, o))]
# simple toggle between indexing tiff files or tiledb arrays
if len(files) > 0:
it = files
else:
it = folders
if not it:
raise ValueError("No valid inputs provided.")
if args.directory:
logger.info('Indexer starting')
index(it, args.output)
if __name__ == "__main__":
sys.exit(main())
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment