diff --git a/gridded_glm/libexec/gridded_glm/_minute_gridder.py b/gridded_glm/libexec/gridded_glm/_minute_gridder.py index f0969b1d55d07db96e3e627a9b7950d3c1b6d295..4e9cabd8924b4b391712b91057cbc2e0f2b46738 100644 --- a/gridded_glm/libexec/gridded_glm/_minute_gridder.py +++ b/gridded_glm/libexec/gridded_glm/_minute_gridder.py @@ -54,6 +54,7 @@ from lmatools.grid.make_grids import dlonlat_at_grid_center, grid_h5flashfiles from glmtools.grid.make_grids import grid_GLM_flashes from glmtools.io.glm import parse_glm_filename from lmatools.grid.fixed import get_GOESR_grid, get_GOESR_coordsys +from statistics import create_statistics_file import logging log = logging.getLogger(__name__) @@ -387,18 +388,29 @@ def grid_minute(minute, args): check_categories=False, # check_categories is there because of that issue I mentioned where DQF is all valid all the time so there is no way to detect empty tiles unless we ignore the "category" products environment_prefix=args.system_environment_prefix_tiles, compress=True) + + create_statistics_file(tile_files=glob(f"{tempdir_path}/*_GLM-L2-GLM*-M?_G??_T??_*.nc"), minute=minute + timedelta(minutes=1)) # pick up output files from the tempdir # output looks like: CG_GLM-L2-GLMC-M3_G17_T03_20200925160040.nc log.debug("files in {}".format(tempdir_path)) log.debug(os.listdir(tempdir_path)) log.debug("moving output to {}".format(args.output_dir)) + tiled_path = os.path.join(tempdir_path, '{}_GLM-L2-GLM*-M?_G??_T??_*.nc'.format(args.system_environment_prefix_tiles)) tiled_files = glob(tiled_path) for f in tiled_files: add_gglm_attrs(f, glm_filenames) shutil.move(f, os.path.join(args.output_dir, os.path.basename(f))) + + stats_path = os.path.join(tempdir_path, '{}_GLM-L2-GLM*-M?_G??_statistics_*.nc'.format(args.system_environment_prefix_tiles)) + stats_files = glob(stats_path) + for f in stats_files: + add_gglm_attrs(f, glm_filenames) + shutil.move(f, os.path.join(args.output_dir, os.path.basename(f))) + for f in gridded_files: + # we add gglm attributes above shutil.move(f, os.path.join(args.output_dir, os.path.basename(f))) # tempdir cleans itself up via atexit, above diff --git a/gridded_glm/libexec/gridded_glm/statistics.py b/gridded_glm/libexec/gridded_glm/statistics.py new file mode 100644 index 0000000000000000000000000000000000000000..2deea5df14e44ccdd00f2e8e4b6a3bdb8cdbb7a4 --- /dev/null +++ b/gridded_glm/libexec/gridded_glm/statistics.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python3 +""" +Copyright (C) 2022 Space Science and Engineering Center (SSEC), University of Wisconsin-Madison. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + +This file is part of the CSPP Geo Gridded GLM software package. CSPP Geo +Gridded GLM takes GOES GLM Level 2 LCFA files and grids them to the ABI +fixed grid. It does this using the open source glmtools python package by +Eric Bruning. glmtools can be found in the runtime directory of this package +or at https://github.com/deeplycloudy/glmtools. +""" + +""" +Functions for creating a statistics file from a stack of Gridded GLM tiles. + +Statistics files document potentially missing ("punctured") tiles and are +required by AWIPS for it to perform multi-minute aggregations. +""" + +import os +from datetime import datetime +import re +from typing import Iterable +import netCDF4 + + +def create_statistics_filename(tile_filename: str) -> str: + # ex: OR_GLM-L2-GLMF-M6_G16_statistics_e20220907012700.nc + """ + It appears the quick way to produce this convention is to use a donor tile file and: + * replace the T?? with "statistics" + * add an e before the end time + """ + statistics_filename = re.sub(r"_T[0-9]{2}_", "_statistics_", tile_filename) + statistics_filename = re.sub(r"_([0-9]{14}).nc", r"_e\g<1>.nc", statistics_filename) + return statistics_filename + + +def get_tile_number_from_tile_file(tile_filename: str) -> int: + match = re.search(r"_T([0-9]{2})_", tile_filename) + if not match: + raise RuntimeError + # match.group(0) is the entire matched string + return int(match.group(1)) + + +def get_tile_numbers(tile_files: list) -> list: + return [get_tile_number_from_tile_file(tf) for tf in tile_files] + + +def get_minute_from_tile_file(tile_filename: str) -> datetime: + match = re.search(r"_([0-9]{14}).nc$", tile_filename) + if not match: + raise RuntimeError + # match.group(0) is the entire matched string + return datetime.strptime(match.group(1), '%Y%m%d%H%M%S') + + +""" +variables: + +dimensions: + produced = 19 ; + defined = 62 ; +variables: + short produced(produced) ; + produced:_FillValue = 0s ; + produced:standard_name = "Tiles Produced" ; + produced:long_name = "Tiles Produced" ; + produced:_Unsigned = "true" ; + produced:_Storage = "chunked" ; + produced:_ChunkSizes = 19 ; + produced:_DeflateLevel = 1 ; + produced:_Shuffle = "true" ; + produced:_Endianness = "little" ; + short defined(defined) ; + defined:_FillValue = 0s ; + defined:standard_name = "Tiles Defined" ; + defined:long_name = "Tiles Defined" ; + defined:_Unsigned = "true" ; + defined:_Storage = "chunked" ; + defined:_ChunkSizes = 62 ; + defined:_DeflateLevel = 1 ; + defined:_Shuffle = "true" ; + defined:_Endianness = "little" ; +""" + +def create_statistics_file(tile_files: list, + minute: datetime, + ) -> str: + good_tiles = [tile for tile in tile_files if get_minute_from_tile_file(tile) == minute] + + assert(len(good_tiles) > 0) + + donorTile = good_tiles[0] + statistics_filename = create_statistics_filename(donorTile) + assert(not os.path.exists(statistics_filename)) + + tileNumbers = get_tile_numbers(good_tiles) + + ds = netCDF4.Dataset(statistics_filename, 'w') + + dimDefined = ds.createDimension('defined', size=62) + dimProduced = ds.createDimension('produced', size=len(good_tiles)) + + varDefined = ds.createVariable('defined', + datatype='i2', + dimensions=(dimDefined), + zlib=True, complevel=1, + endian='little', + fill_value=0, + ) + varDefined.standard_name = "Tiles Defined" + varDefined.long_name = "Tiles Defined" + varDefined._Unsigned = "true" + + # short defined + # defined = _, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + # 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, + # 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, + # 55, 56, 57, 58, 59, 60, 61 ; + varDefined[:] = range(62) + + varProduced = ds.createVariable('produced', + datatype='i2', + dimensions=(dimProduced), + zlib=True, complevel=1, + endian='little', + fill_value=0, + ) + varProduced.standard_name = "Tiles Produced" + varProduced.long_name = "Tiles Produced" + varProduced._Unsigned = "true" + + # short produced + # produced = 5, 7, 11, 12, 14, 17, 18, 19, 22, 23, 24, 25, 27, 31, 32, 35, 44, 49, 50 ; + varProduced[:] = tileNumbers + + """ + attributes that should be copied directly from the donor tile file: + :orbital_slot = "GOES-East" ; + :platform_ID = "G16" ; + :production_site = "GCP" ; + :scene_id = "Full Disk" ; + :spatial_resolution = "2km at nadir" ; + :time_coverage_end = "2022-09-07T01:27:00Z" ; + :time_coverage_start = "2022-09-07T01:26:00Z" ; + """ + donor_ds = netCDF4.Dataset(donorTile, 'r') + for attrName in ['orbital_slot', + 'platform_ID', + 'production_site', + 'scene_id', + 'spatial_resolution', + 'time_coverage_end', + 'time_coverage_start', + ]: + setattr(ds, attrName, getattr(donor_ds, attrName)) + donor_ds.close() + + """ + attributes that need to be generated: + :dataset_name = "OR_GLM-L2-GLMF-M6_G16_statistics_e20220907012700.nc" ; + :title = "GLM Punctured Gridded Tiles Production Record" ; + """ + ds.dataset_name = statistics_filename + ds.title = "GLM Punctured Gridded Tiles Production Record" + + """ + attributes we're skipping unless we learn they're necessary: + :production_data_source = "Realtime" ; + :production_environment = "OE" ; + :project = "GOES" ; + :timeline_id = "ABI Mode 6" ; + """ + + ds.close() + return statistics_filename diff --git a/tests/test_functionality.py b/tests/test_functionality.py index 4241ec2106fb29e7990552ccade32b3f04b2d9aa..ece6033a63ff0ef473a4e72a9a919963bf270d44 100644 --- a/tests/test_functionality.py +++ b/tests/test_functionality.py @@ -32,11 +32,44 @@ def test_realtime(): ]) assert completed_process.returncode == 0 assert(check_fileglob("CG_GLM-L2-GLMC-M3_G16_s20220740002000_e20220740003000_c*", 1)) + assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T*.nc", 4)) assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T08_20220315000300.nc", 1)) assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T05_20220315000300.nc", 1)) assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T04_20220315000300.nc", 1)) assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T07_20220315000300.nc", 1)) +# tests for the new statistics file +def test_statistics_file(): + completed_process = subprocess.run(['python', + MINUTE_GRIDDER, + "--goes-sector", "full", + "--create-tiles", + "--realtime", + os.path.join(TESTDIR, "test-inputs/OR_GLM-L2-LCFA_G16_s20220740002400_e20220740003000_c20220740003018.nc"), + ]) + assert completed_process.returncode == 0 + assert(check_fileglob("CG_GLM-L2-GLMF-M3_G16_s20220740002000_e20220740003000_c*", 1)) + assert(check_fileglob("CSPP_OR_GLM-L2-GLMF-M3_G16_T08_20220315000300.nc", 1)) + assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T33_20220315000300.nc', 1)) + assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T34_20220315000300.nc', 1)) + assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T28_20220315000300.nc', 1)) + assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T53_20220315000300.nc', 1)) + assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T35_20220315000300.nc', 1)) + assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T14_20220315000300.nc', 1)) + assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T08_20220315000300.nc', 1)) + assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T46_20220315000300.nc', 1)) + assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T20_20220315000300.nc', 1)) + assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T38_20220315000300.nc', 1)) + assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T42_20220315000300.nc', 1)) + assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T15_20220315000300.nc', 1)) + assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T47_20220315000300.nc', 1)) + assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T09_20220315000300.nc', 1)) + assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T21_20220315000300.nc', 1)) + assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T41_20220315000300.nc', 1)) + assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T27_20220315000300.nc', 1)) + assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T40_20220315000300.nc', 1)) + assert(check_fileglob("CSPP_OR_GLM-L2-GLMF-M3_G16_statistics_e20220315000300.nc", 1)) + def test_g16(): completed_process = subprocess.run(['python', MINUTE_GRIDDER, diff --git a/tests/test_regressions.py b/tests/test_regressions.py index 47c77ccdcdd181698bce59a159601f6fa72fdf62..14c07251a85b5f7f98f442c16f1425fa2c9184c4 100644 --- a/tests/test_regressions.py +++ b/tests/test_regressions.py @@ -74,6 +74,7 @@ def test_issue5_3minutes(): assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T08_20220315000100.nc")) assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T08_20220315000200.nc")) assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T08_20220315000300.nc")) + assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_statistics_*.nc", 3)) def test_issue5_crossminutes_fail(): completed_process = subprocess.run(['python', diff --git a/tests/test_tiles.py b/tests/test_tiles.py new file mode 100644 index 0000000000000000000000000000000000000000..969ef1a567cc05acdb9963b4ae2cdd99185092f3 --- /dev/null +++ b/tests/test_tiles.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +""" +Gridded GLM tile testing +""" + +import subprocess +import os +import warnings +import netCDF4 + +from testlib.common import TESTDIR, MINUTE_GRIDDER +from testlib.common import check_fileglob + +""" +Check our tile variable names against a sample metadata listing for a tile the ground segment generated. + +Provided by Lee Byerle via email on 2022-09-21 +(DR_GLMFD-020-B16-M0C00-T021_G18_s2022255171000_c2022255171140) +""" +def test_tile_variable_names(): + completed_process = subprocess.run(['python', + MINUTE_GRIDDER, + "--goes-sector", "conus", + "--create-tiles", + "--realtime", + os.path.join(TESTDIR, "test-inputs/OR_GLM-L2-LCFA_G16_s20220740002400_e20220740003000_c20220740003018.nc"), + ]) + assert completed_process.returncode == 0 + assert(check_fileglob("CG_GLM-L2-GLMC-M3_G16_s20220740002000_e20220740003000_c*", 1)) + assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T08_20220315000300.nc", 1)) + ds = netCDF4.Dataset('CSPP_OR_GLM-L2-GLMC-M3_G16_T08_20220315000300.nc', 'r') + assert('x' in ds.variables) + assert('y' in ds.variables) + assert('Flash_extent_density' in ds.variables) + assert('Total_Optical_energy' in ds.variables) + assert('Minimum_flash_area' in ds.variables) + assert('DQF' in ds.variables) \ No newline at end of file diff --git a/tests/test_unit.py b/tests/test_unit.py new file mode 100644 index 0000000000000000000000000000000000000000..75cceabfcb563ccf4e36fb40978c605559fd6764 --- /dev/null +++ b/tests/test_unit.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 +""" +Gridded GLM function tests +""" + +import sys +from testlib.common import PYDIR +sys.path.append(PYDIR) + +from gridded_glm.statistics import create_statistics_filename, get_tile_number_from_tile_file + +def test_create_statistics_filename(): + assert(create_statistics_filename("CSPP_OR_GLM-L2-GLMF-M3_G16_T07_20220315000300.nc") == "CSPP_OR_GLM-L2-GLMF-M3_G16_statistics_e20220315000300.nc") + +def test_get_tile_number_from_tile_file(): + assert(get_tile_number_from_tile_file("CSPP_OR_GLM-L2-GLMF-M3_G16_T07_20220315000300.nc") == 7) \ No newline at end of file