Skip to content
Snippets Groups Projects
tile_index.py 5.22 KiB
import sys
import fiona
from fiona.crs import from_epsg
import rasterio
from shapely.geometry import mapping, box

import argparse
import datetime
import glob
import logging
import os
import re
import shutil

# Remap geostationary to EPSG 4326
# gdalwarp -t_srs EPSG:4326 in.tif out.tif


# Note if using docker tiledb-geospatial image then requires shapely - `pip3 install shapely`
# export CPL_DEBUG=ON - GDAL
# export MS_DEBUGLEVEL=6 - MAPSERVER
# python3 tile_index.py -dir data
# sample mapserver queries
# mapserv -nh "QUERY_STRING=map=goes.map&request=GetCapabilities&service=WMS&version=1.1.1"
# mapserv -nh "QUERY_STRING=map=goes.map&request=GetMap&service=WMS&version=1.1.1&layers=goes_abi&srs=EPSG:4326&bbox=-180,-90,180,90&format=image/jpeg&WIDTH=1000&HEIGHT=1000&TIME=2019-12-12T19:10:18" > out.jpg
# TODO add overview example to mapserver

logger = logging.getLogger(__name__)

temporal_schema = {
    'geometry': 'Polygon',
    'properties': {
        'location': 'str',
        'time': 'str:19'
    }
}

possible_time_regex = (
    (re.compile(r'\d{4}\d{2}\d{2}_\d{2}\d{2}\d{2}'), '%Y%m%d_%H%M%S'),
    (re.compile(r'\d{4}\d{2}\d{2}T\d{2}\d{2}\d{2}'), '%Y%m%dT%H%M%S'),
)


def get_file_time(fn):
    for regex, time_fmt in possible_time_regex:
        matches = regex.findall(fn)
        if matches:
            return datetime.datetime.strptime(matches[-1], time_fmt)
    else:
        raise ValueError("Unknown filename scheme, can't determine file time.")


def index(input_files, output_shapefile):
    """Create shapefile for location and times of provided geotiffs or tileDB arrays.

    Note: All layers to be included in the shapefile must be provided all at
          once. Repeated calls to this function will overwrite existing
          shapefile information.

    """
    import tempfile
    out_dir, shp_fn = os.path.split(output_shapefile)
    tmp_dir = tempfile.mkdtemp("_tile_index")
    tmp_shapefile = os.path.join(tmp_dir, shp_fn)
    with fiona.open(tmp_shapefile, 'w', driver='ESRI Shapefile',
                    schema=temporal_schema) as output:
        for f in input_files:
            try:
                dt = get_file_time(f)
            except ValueError:
                logger.error(f"Can't time for file {f}")
                continue

            logger.info(f"Indexing {f} {dt.isoformat()}")
            with rasterio.open(f) as src:
                g = box(*src.bounds)

                output.write(
                    {
                        'geometry': mapping(g),
                        'properties': {
                            'location': f,
                            'time': dt.isoformat()
                        }
                    })

    # move the shapefile contents to the final destination
    for fn in os.listdir(tmp_dir):
        shutil.move(os.path.join(tmp_dir, fn), os.path.join(out_dir, fn))
    # we don't need the temporary directory anymore
    shutil.rmtree(tmp_dir, ignore_errors=True)


# def index(src_dir, output):
#   files = glob.glob(os.path.join(src_dir, '*.tif'))
#   folders = [os.path.join(src_dir, o) for o in os.listdir(src_dir) if os.path.isdir(os.path.join(src_dir, o))]
#
#     with fiona.open(output, 'w', driver='ESRI Shapefile',
#                     schema=temporal_schema) as output:
#         # simple toggle between indexing tiff files or tiledb arrays
#         if len(files) > 0:
#             it = files
#         else:
#             it = folders
#
#         for f in it:
#             parts = f.split('_')
#             tstamp = parts[5] + parts[6]
#             dt = datetime.datetime.strptime(tstamp, '%Y%m%d%H%M%S')
#             logger.info(f"Indexing {f} {dt.isoformat()}")
#             with rasterio.open(f) as src:
#                 g = box(*src.bounds)
#
#                 output.write(
#                     {
#                         'geometry': mapping(g),
#                         'properties': {
#                             'location': f,
#                             'time': dt.isoformat()
#                         }
#                     })


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-d', '--directory',
                        help='Single directory to search for .tif files or TileDB array directories.')
    parser.add_argument('-o', '--output', default='img_index.shp')
    parser.add_argument('input_files', nargs='*',
                        help='TileDB directories or GeoTIFF files to ingest.')
    args = parser.parse_args()

    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s"
    )

    if args.input_files:
        logger.info("Using input files...")
        it = args.input_files
    else:
        src_dir = args.directory
        files = glob.glob(os.path.join(src_dir, '*.tif'))
        folders = [os.path.join(src_dir, o) for o in os.listdir(src_dir) if os.path.isdir(os.path.join(src_dir, o))]
        # simple toggle between indexing tiff files or tiledb arrays
        if len(files) > 0:
            it = files
        else:
            it = folders

    if not it:
        raise ValueError("No valid inputs provided.")

    if args.directory:
        logger.info('Indexer starting')
        index(it, args.output)


if __name__ == "__main__":
    sys.exit(main())