import sys import fiona from fiona.crs import from_epsg import rasterio from shapely.geometry import mapping, box import argparse import datetime import glob import logging import os import re import shutil # Remap geostationary to EPSG 4326 # gdalwarp -t_srs EPSG:4326 in.tif out.tif # Note if using docker tiledb-geospatial image then requires shapely - `pip3 install shapely` # export CPL_DEBUG=ON - GDAL # export MS_DEBUGLEVEL=6 - MAPSERVER # python3 tile_index.py -dir data # sample mapserver queries # mapserv -nh "QUERY_STRING=map=goes.map&request=GetCapabilities&service=WMS&version=1.1.1" # mapserv -nh "QUERY_STRING=map=goes.map&request=GetMap&service=WMS&version=1.1.1&layers=goes_abi&srs=EPSG:4326&bbox=-180,-90,180,90&format=image/jpeg&WIDTH=1000&HEIGHT=1000&TIME=2019-12-12T19:10:18" > out.jpg # TODO add overview example to mapserver logger = logging.getLogger(__name__) temporal_schema = { 'geometry': 'Polygon', 'properties': { 'location': 'str', 'time': 'str:19' } } possible_time_regex = ( (re.compile(r'\d{4}\d{2}\d{2}_\d{2}\d{2}\d{2}'), '%Y%m%d_%H%M%S'), (re.compile(r'\d{4}\d{2}\d{2}T\d{2}\d{2}\d{2}'), '%Y%m%dT%H%M%S'), ) def get_current_times(shp_filename): """Get currently available times in the shapefile.""" if not os.path.isfile(shp_filename): # no shapefile, no times return with fiona.open(shp_filename, 'r') as shp_file: for time_step in shp_file: t = time_step['properties']['time'] yield datetime.datetime.strptime(t, '%Y-%m-%dT%H:%M:%S') def get_file_time(fn): for regex, time_fmt in possible_time_regex: matches = regex.findall(fn) if matches: return datetime.datetime.strptime(matches[-1], time_fmt) else: raise ValueError("Unknown filename scheme, can't determine file time.") def index(input_files, output_shapefile): """Create shapefile for location and times of provided geotiffs or tileDB arrays. Note: All layers to be included in the shapefile must be provided all at once. Repeated calls to this function will overwrite existing shapefile information. Returns: (times_removed, times_added): Two lists of times removed by this index being updated and times added by this index being updated. """ import tempfile out_dir, shp_fn = os.path.split(output_shapefile) tmp_dir = tempfile.mkdtemp("_tile_index") tmp_shapefile = os.path.join(tmp_dir, shp_fn) current_times = set(get_current_times(output_shapefile)) new_times = set() with fiona.open(tmp_shapefile, 'w', driver='ESRI Shapefile', schema=temporal_schema) as output: for f in input_files: try: dt = get_file_time(f) new_times.add(dt) except ValueError: logger.error(f"Can't time for file {f}") continue logger.info(f"Indexing {f} {dt.isoformat()}") with rasterio.open(f) as src: g = box(*src.bounds) output.write( { 'geometry': mapping(g), 'properties': { 'location': f, 'time': dt.isoformat() } }) # move the shapefile contents to the final destination for fn in os.listdir(tmp_dir): shutil.move(os.path.join(tmp_dir, fn), os.path.join(out_dir, fn)) # we don't need the temporary directory anymore shutil.rmtree(tmp_dir, ignore_errors=True) times_removed = current_times - new_times times_added = new_times - current_times return sorted(times_removed), sorted(times_added) # def index(src_dir, output): # files = glob.glob(os.path.join(src_dir, '*.tif')) # folders = [os.path.join(src_dir, o) for o in os.listdir(src_dir) if os.path.isdir(os.path.join(src_dir, o))] # # with fiona.open(output, 'w', driver='ESRI Shapefile', # schema=temporal_schema) as output: # # simple toggle between indexing tiff files or tiledb arrays # if len(files) > 0: # it = files # else: # it = folders # # for f in it: # parts = f.split('_') # tstamp = parts[5] + parts[6] # dt = datetime.datetime.strptime(tstamp, '%Y%m%d%H%M%S') # logger.info(f"Indexing {f} {dt.isoformat()}") # with rasterio.open(f) as src: # g = box(*src.bounds) # # output.write( # { # 'geometry': mapping(g), # 'properties': { # 'location': f, # 'time': dt.isoformat() # } # }) def main(): parser = argparse.ArgumentParser() parser.add_argument('-d', '--directory', help='Single directory to search for .tif files or TileDB array directories.') parser.add_argument('-o', '--output', default='img_index.shp') parser.add_argument('input_files', nargs='*', help='TileDB directories or GeoTIFF files to ingest.') args = parser.parse_args() logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s" ) if args.input_files: logger.info("Using input files...") it = args.input_files else: src_dir = args.directory files = glob.glob(os.path.join(src_dir, '*.tif')) folders = [os.path.join(src_dir, o) for o in os.listdir(src_dir) if os.path.isdir(os.path.join(src_dir, o))] # simple toggle between indexing tiff files or tiledb arrays if len(files) > 0: it = files else: it = folders if not it: raise ValueError("No valid inputs provided.") if args.directory: logger.info('Indexer starting') index(it, args.output) if __name__ == "__main__": sys.exit(main())