From 2cf618c9a15c152cd81811957cbcd86cf8c84db6 Mon Sep 17 00:00:00 2001 From: nickb <nickb@ssec.wisc.edu> Date: Wed, 10 Feb 2021 22:50:48 +0000 Subject: [PATCH] add traceability attributes to our output files (fixes #3) --- gridded_glm/libexec/_minute_gridder.py | 27 ++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/gridded_glm/libexec/_minute_gridder.py b/gridded_glm/libexec/_minute_gridder.py index cac2087..c4dec0f 100644 --- a/gridded_glm/libexec/_minute_gridder.py +++ b/gridded_glm/libexec/_minute_gridder.py @@ -21,6 +21,8 @@ import tempfile import shutil import atexit from glob import glob +import socket +from netCDF4 import Dataset #from multiprocessing import freeze_support # https://docs.python.org/2/library/multiprocessing.html#multiprocessing.freeze_support from functools import partial from lmatools.grid.make_grids import write_cf_netcdf_latlon, write_cf_netcdf_noproj, write_cf_netcdf_fixedgrid @@ -186,6 +188,25 @@ def grid_setup(args, work_dir=os.getcwd()): return gridder, args.filenames, start_time, end_time, grid_kwargs +def get_cspp_gglm_version(): + try: + version_filename = os.path.join(os.getenv('CSPP_GEO_GGLM_HOME'), ".VERSION.txt") + return open(version_filename, 'r').read() + except: + return "unknown" + + +def add_gglm_attrs(netcdf_filename, input_filenames): + try: + nc = Dataset(netcdf_filename, 'a') + setattr(nc, 'cspp_geo_gglm_version', get_cspp_gglm_version()) + setattr(nc, 'cspp_geo_gglm_production_host', socket.gethostname()) + setattr(nc, 'cspp_geo_gglm_input_files', ",".join(input_filenames)) # this probably needs to be added somewhere else? + nc.close() + except: + log.error("could not add CSPP Geo GGLM attributes to {}".format(netcdf_filename)) + + if __name__ == '__main__': # freeze_support() # nb. I don't think this is needed as we're not making windows execs at this time parser = create_parser() @@ -222,6 +243,11 @@ if __name__ == '__main__': gridded_files = glob(gridded_path) log.debug(gridded_files) + # we need to add attributes here due to an issue where satpy (or its dependencies) are + # holding the input gridded file open until the process exits + for f in gridded_files: + add_gglm_attrs(f, glm_filenames) + # (optionally) do tiling if args.create_tiles: from satpy import Scene @@ -252,6 +278,7 @@ if __name__ == '__main__': tiled_path = os.path.join(tempdir_path, 'OR_GLM-L2-GLM?-M?_G??_T??_*.nc') tiled_files = glob(tiled_path) for f in tiled_files: + add_gglm_attrs(f, glm_filenames) shutil.move(f, os.path.join(args.output_dir, os.path.basename(f))) for f in gridded_files: shutil.move(f, os.path.join(args.output_dir, os.path.basename(f))) -- GitLab