-
David Hoese authoredDavid Hoese authored
tile_index.py 5.22 KiB
import sys
import fiona
from fiona.crs import from_epsg
import rasterio
from shapely.geometry import mapping, box
import argparse
import datetime
import glob
import logging
import os
import re
import shutil
# Remap geostationary to EPSG 4326
# gdalwarp -t_srs EPSG:4326 in.tif out.tif
# Note if using docker tiledb-geospatial image then requires shapely - `pip3 install shapely`
# export CPL_DEBUG=ON - GDAL
# export MS_DEBUGLEVEL=6 - MAPSERVER
# python3 tile_index.py -dir data
# sample mapserver queries
# mapserv -nh "QUERY_STRING=map=goes.map&request=GetCapabilities&service=WMS&version=1.1.1"
# mapserv -nh "QUERY_STRING=map=goes.map&request=GetMap&service=WMS&version=1.1.1&layers=goes_abi&srs=EPSG:4326&bbox=-180,-90,180,90&format=image/jpeg&WIDTH=1000&HEIGHT=1000&TIME=2019-12-12T19:10:18" > out.jpg
# TODO add overview example to mapserver
logger = logging.getLogger(__name__)
temporal_schema = {
'geometry': 'Polygon',
'properties': {
'location': 'str',
'time': 'str:19'
}
}
possible_time_regex = (
(re.compile(r'\d{4}\d{2}\d{2}_\d{2}\d{2}\d{2}'), '%Y%m%d_%H%M%S'),
(re.compile(r'\d{4}\d{2}\d{2}T\d{2}\d{2}\d{2}'), '%Y%m%dT%H%M%S'),
)
def get_file_time(fn):
for regex, time_fmt in possible_time_regex:
matches = regex.findall(fn)
if matches:
return datetime.datetime.strptime(matches[-1], time_fmt)
else:
raise ValueError("Unknown filename scheme, can't determine file time.")
def index(input_files, output_shapefile):
"""Create shapefile for location and times of provided geotiffs or tileDB arrays.
Note: All layers to be included in the shapefile must be provided all at
once. Repeated calls to this function will overwrite existing
shapefile information.
"""
import tempfile
out_dir, shp_fn = os.path.split(output_shapefile)
tmp_dir = tempfile.mkdtemp("_tile_index")
tmp_shapefile = os.path.join(tmp_dir, shp_fn)
with fiona.open(tmp_shapefile, 'w', driver='ESRI Shapefile',
schema=temporal_schema) as output:
for f in input_files:
try:
dt = get_file_time(f)
except ValueError:
logger.error(f"Can't time for file {f}")
continue
logger.info(f"Indexing {f} {dt.isoformat()}")
with rasterio.open(f) as src:
g = box(*src.bounds)
output.write(
{
'geometry': mapping(g),
'properties': {
'location': f,
'time': dt.isoformat()
}
})
# move the shapefile contents to the final destination
for fn in os.listdir(tmp_dir):
shutil.move(os.path.join(tmp_dir, fn), os.path.join(out_dir, fn))
# we don't need the temporary directory anymore
shutil.rmtree(tmp_dir, ignore_errors=True)
# def index(src_dir, output):
# files = glob.glob(os.path.join(src_dir, '*.tif'))
# folders = [os.path.join(src_dir, o) for o in os.listdir(src_dir) if os.path.isdir(os.path.join(src_dir, o))]
#
# with fiona.open(output, 'w', driver='ESRI Shapefile',
# schema=temporal_schema) as output:
# # simple toggle between indexing tiff files or tiledb arrays
# if len(files) > 0:
# it = files
# else:
# it = folders
#
# for f in it:
# parts = f.split('_')
# tstamp = parts[5] + parts[6]
# dt = datetime.datetime.strptime(tstamp, '%Y%m%d%H%M%S')
# logger.info(f"Indexing {f} {dt.isoformat()}")
# with rasterio.open(f) as src:
# g = box(*src.bounds)
#
# output.write(
# {
# 'geometry': mapping(g),
# 'properties': {
# 'location': f,
# 'time': dt.isoformat()
# }
# })
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-d', '--directory',
help='Single directory to search for .tif files or TileDB array directories.')
parser.add_argument('-o', '--output', default='img_index.shp')
parser.add_argument('input_files', nargs='*',
help='TileDB directories or GeoTIFF files to ingest.')
args = parser.parse_args()
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s"
)
if args.input_files:
logger.info("Using input files...")
it = args.input_files
else:
src_dir = args.directory
files = glob.glob(os.path.join(src_dir, '*.tif'))
folders = [os.path.join(src_dir, o) for o in os.listdir(src_dir) if os.path.isdir(os.path.join(src_dir, o))]
# simple toggle between indexing tiff files or tiledb arrays
if len(files) > 0:
it = files
else:
it = folders
if not it:
raise ValueError("No valid inputs provided.")
if args.directory:
logger.info('Indexer starting')
index(it, args.output)
if __name__ == "__main__":
sys.exit(main())