Skip to content
Snippets Groups Projects
Commit c61a0314 authored by David Hoese's avatar David Hoese
Browse files

Add basic semi-working mapcache containers and very basic tests

parent 8cca40fd
No related branches found
No related tags found
No related merge requests found
...@@ -2,7 +2,6 @@ FROM tiledb/tiledb-geospatial:latest ...@@ -2,7 +2,6 @@ FROM tiledb/tiledb-geospatial:latest
WORKDIR /work WORKDIR /work
# TODO may need the unzip command to be installed if not already
RUN apt-get update && apt-get install -y unzip && \ RUN apt-get update && apt-get install -y unzip && \
wget http://ssec.wisc.edu/~rayg/pub/amqpfind.zip && \ wget http://ssec.wisc.edu/~rayg/pub/amqpfind.zip && \
unzip amqpfind.zip && \ unzip amqpfind.zip && \
......
...@@ -6,6 +6,7 @@ import warnings ...@@ -6,6 +6,7 @@ import warnings
import logging import logging
import subprocess import subprocess
import shutil import shutil
import json
from glob import glob from glob import glob
import tile_index import tile_index
...@@ -71,6 +72,11 @@ def main(): ...@@ -71,6 +72,11 @@ def main():
help="Shapefile filename pattern to use and placed " help="Shapefile filename pattern to use and placed "
"in the output directory. " "in the output directory. "
"(default: '{product}.shp')") "(default: '{product}.shp')")
parser.add_argument('--json-times', action='store_true',
help="Print a JSON dict to stdout containing removed "
"and added times (ex. '{\'times_removed\': [], "
"\'times_added\': []}'). Times are in the form: "
"YYYY-MM-DDTHH:MM:SS.")
parser.add_argument('out_dir', parser.add_argument('out_dir',
help="Output path to save tile information to (ex. '/data/tiles/{product}')") help="Output path to save tile information to (ex. '/data/tiles/{product}')")
parser.add_argument('input_files', nargs="+", parser.add_argument('input_files', nargs="+",
...@@ -78,6 +84,8 @@ def main(): ...@@ -78,6 +84,8 @@ def main():
args = parser.parse_args() args = parser.parse_args()
groups = group_files(args.products, args.input_files) groups = group_files(args.products, args.input_files)
all_added = set()
all_removed = set()
for prod, prod_files in groups.items(): for prod, prod_files in groups.items():
out_dir = args.out_dir.format(product=prod) out_dir = args.out_dir.format(product=prod)
os.makedirs(out_dir, exist_ok=True) os.makedirs(out_dir, exist_ok=True)
...@@ -97,8 +105,14 @@ def main(): ...@@ -97,8 +105,14 @@ def main():
# create shape file # create shape file
LOG.info("Rebuilding shapefile index with:\n\t{}".format(", ".join(all_prod_files))) LOG.info("Rebuilding shapefile index with:\n\t{}".format(", ".join(all_prod_files)))
tile_index.index(all_prod_files, shp_pathname) removed_times, added_times = tile_index.index(all_prod_files, shp_pathname)
all_removed.update(removed_times)
all_added.update(added_times)
if args.json_times:
all_removed = [x.isoformat() for x in sorted(all_removed)]
all_added = [x.isoformat() for x in sorted(all_added)]
print(json.dumps({'removed_times': all_removed, 'added_times': all_added}))
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
test -d "/data" test -d "/data"
export AMQPFIND_ARGS=${AMQPFIND_ARGS:-"-H cspp-geo-rabbit -X satellite -u guest -p guest"} export AMQPFIND_ARGS=${AMQPFIND_ARGS:-"-H cspp-geo-rabbit -X satellite -u guest -p guest"}
export AMQPSEND_ARGS=${AMQPSEND_ARGS:-"-H cspp-geo-rabbit -X satellite -u guest -p guest"} export AMQPSEND_ARGS=${AMQPSEND_ARGS:-"-D 15 -H cspp-geo-rabbit -X satellite -u guest -p guest"}
export AMQPFIND_TOPIC=${AMQPFIND_TOPIC:-'data.goes.*.abi.*.l1b.geotiff.complete'} export AMQPFIND_TOPIC=${AMQPFIND_TOPIC:-'data.goes.*.abi.*.l1b.geotiff.complete'}
export G2G_PRODUCTS=${G2G_PRODUCTS:-"C01 C02 C03 C04 C05 C06 C07 C08 C09 C10 C11 C12 C13 C14 C15 C16 true_color"} export G2G_PRODUCTS=${G2G_PRODUCTS:-"C01 C02 C03 C04 C05 C06 C07 C08 C09 C10 C11 C12 C13 C14 C15 C16 true_color"}
export TILE_ARGS=${TILE_ARGS:-""} export TILE_ARGS=${TILE_ARGS:-""}
...@@ -62,16 +62,19 @@ run_tile_gen() { ...@@ -62,16 +62,19 @@ run_tile_gen() {
out_dir="${out_dir}/{product}" out_dir="${out_dir}/{product}"
fi fi
echo "Generating tiles in directory: ${out_dir}" echo "Generating tiles in directory: ${out_dir}"
python3 generate_tiles.py ${TILE_ARGS} -p ${G2G_PRODUCTS} -- ${out_dir} ${path} for product in ${G2G_PRODUCTS}; do
# OUT/<product>/<product>.shp json_times_changed=$(python3 generate_tiles.py ${TILE_ARGS} --json-times -p ${product} -- ${out_dir} ${path})
glob_pattern="${out_dir}/*/*.shp" # OUT/<product>/<product>.shp
# Remove the /data prefix glob_pattern="${out_dir}/${product}/*.shp"
glob_pattern="${glob_pattern/${dst_dir}\//}" # Remove the /data prefix
glob_pattern="${glob_pattern/${dst_dir}\//}"
amqpsend_topic="data.${satellite_family}.${satellite_id}.${instrument}.${data_type}.l1b.tiledb.complete" amqpsend_topic="data.${satellite_family}.${satellite_id}.${instrument}.${data_type}.l1b.tiles.${product}.complete"
# json_info="{path: ${glob_pattern}, satellite_family: ${satellite_family}, satellite_ID: ${satellite_id}, instrument: ${instrument}, data_type: ${data_type}}" json_info="{\"path\": \"${glob_pattern}\", \"satellite_family\": \"${satellite_family}\", \"satellite_ID\": \"${satellite_id}\", \"instrument\": \"${instrument}\", \"data_type\": \"${data_type}\", \"product\": \"${product}\"}"
json_info="{\"path\": \"${glob_pattern}\", \"satellite_family\": \"${satellite_family}\", \"satellite_ID\": \"${satellite_id}\", \"instrument\": \"${instrument}\", \"data_type\": \"${data_type}\"}" # append the JSON returned by the python (remove the curly braces at the ends)
echo -e "[[\"$amqpsend_topic\", $json_info]]" | python3 /work/amqpfind/amqpsend.py ${AMQPSEND_ARGS} json_info="${json_info:0:-1}, ${json_times_changed:1}"
echo -e "[[\"$amqpsend_topic\", $json_info]]" | python3 /work/amqpfind/amqpsend.py ${AMQPSEND_ARGS}
done
echo "Done generating tiles for ${path}" echo "Done generating tiles for ${path}"
} }
......
...@@ -41,6 +41,18 @@ possible_time_regex = ( ...@@ -41,6 +41,18 @@ possible_time_regex = (
) )
def get_current_times(shp_filename):
"""Get currently available times in the shapefile."""
if not os.path.isfile(shp_filename):
# no shapefile, no times
return
with fiona.open(shp_filename, 'r') as shp_file:
for time_step in shp_file:
t = time_step['properties']['time']
yield datetime.datetime.strptime(t, '%Y-%m-%dT%H:%M:%S')
def get_file_time(fn): def get_file_time(fn):
for regex, time_fmt in possible_time_regex: for regex, time_fmt in possible_time_regex:
matches = regex.findall(fn) matches = regex.findall(fn)
...@@ -57,16 +69,23 @@ def index(input_files, output_shapefile): ...@@ -57,16 +69,23 @@ def index(input_files, output_shapefile):
once. Repeated calls to this function will overwrite existing once. Repeated calls to this function will overwrite existing
shapefile information. shapefile information.
Returns:
(times_removed, times_added): Two lists of times removed by this index
being updated and times added by this index being updated.
""" """
import tempfile import tempfile
out_dir, shp_fn = os.path.split(output_shapefile) out_dir, shp_fn = os.path.split(output_shapefile)
tmp_dir = tempfile.mkdtemp("_tile_index") tmp_dir = tempfile.mkdtemp("_tile_index")
tmp_shapefile = os.path.join(tmp_dir, shp_fn) tmp_shapefile = os.path.join(tmp_dir, shp_fn)
current_times = set(get_current_times(output_shapefile))
new_times = set()
with fiona.open(tmp_shapefile, 'w', driver='ESRI Shapefile', with fiona.open(tmp_shapefile, 'w', driver='ESRI Shapefile',
schema=temporal_schema) as output: schema=temporal_schema) as output:
for f in input_files: for f in input_files:
try: try:
dt = get_file_time(f) dt = get_file_time(f)
new_times.add(dt)
except ValueError: except ValueError:
logger.error(f"Can't time for file {f}") logger.error(f"Can't time for file {f}")
continue continue
...@@ -90,6 +109,10 @@ def index(input_files, output_shapefile): ...@@ -90,6 +109,10 @@ def index(input_files, output_shapefile):
# we don't need the temporary directory anymore # we don't need the temporary directory anymore
shutil.rmtree(tmp_dir, ignore_errors=True) shutil.rmtree(tmp_dir, ignore_errors=True)
times_removed = current_times - new_times
times_added = new_times = current_times
return sorted(times_removed), sorted(times_added)
# def index(src_dir, output): # def index(src_dir, output):
# files = glob.glob(os.path.join(src_dir, '*.tif')) # files = glob.glob(os.path.join(src_dir, '*.tif'))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment