From 2cf618c9a15c152cd81811957cbcd86cf8c84db6 Mon Sep 17 00:00:00 2001
From: nickb <nickb@ssec.wisc.edu>
Date: Wed, 10 Feb 2021 22:50:48 +0000
Subject: [PATCH] add traceability attributes to our output files (fixes #3)

---
 gridded_glm/libexec/_minute_gridder.py | 27 ++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/gridded_glm/libexec/_minute_gridder.py b/gridded_glm/libexec/_minute_gridder.py
index cac2087..c4dec0f 100644
--- a/gridded_glm/libexec/_minute_gridder.py
+++ b/gridded_glm/libexec/_minute_gridder.py
@@ -21,6 +21,8 @@ import tempfile
 import shutil
 import atexit
 from glob import glob
+import socket
+from netCDF4 import Dataset
 #from multiprocessing import freeze_support # https://docs.python.org/2/library/multiprocessing.html#multiprocessing.freeze_support
 from functools import partial
 from lmatools.grid.make_grids import write_cf_netcdf_latlon, write_cf_netcdf_noproj, write_cf_netcdf_fixedgrid
@@ -186,6 +188,25 @@ def grid_setup(args, work_dir=os.getcwd()):
     return gridder, args.filenames, start_time, end_time, grid_kwargs
 
 
+def get_cspp_gglm_version():
+    try:
+        version_filename = os.path.join(os.getenv('CSPP_GEO_GGLM_HOME'), ".VERSION.txt")
+        return open(version_filename, 'r').read()
+    except:
+        return "unknown"
+
+
+def add_gglm_attrs(netcdf_filename, input_filenames):
+    try:
+        nc = Dataset(netcdf_filename, 'a')
+        setattr(nc, 'cspp_geo_gglm_version', get_cspp_gglm_version())
+        setattr(nc, 'cspp_geo_gglm_production_host', socket.gethostname())
+        setattr(nc, 'cspp_geo_gglm_input_files', ",".join(input_filenames)) # this probably needs to be added somewhere else?
+        nc.close()
+    except:
+        log.error("could not add CSPP Geo GGLM attributes to {}".format(netcdf_filename))
+
+
 if __name__ == '__main__':
 #    freeze_support() # nb. I don't think this is needed as we're not making windows execs at this time
     parser = create_parser()
@@ -222,6 +243,11 @@ if __name__ == '__main__':
     gridded_files = glob(gridded_path)
     log.debug(gridded_files)
 
+    # we need to add attributes here due to an issue where satpy (or its dependencies) are
+    # holding the input gridded file open until the process exits
+    for f in gridded_files:
+        add_gglm_attrs(f, glm_filenames)
+
     # (optionally) do tiling
     if args.create_tiles:
         from satpy import Scene
@@ -252,6 +278,7 @@ if __name__ == '__main__':
     tiled_path = os.path.join(tempdir_path, 'OR_GLM-L2-GLM?-M?_G??_T??_*.nc')
     tiled_files = glob(tiled_path)
     for f in tiled_files:
+        add_gglm_attrs(f, glm_filenames)
         shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
     for f in gridded_files:
         shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
-- 
GitLab