Skip to content
Snippets Groups Projects
Commit 3daa9198 authored by Nick Bearson's avatar Nick Bearson
Browse files

Merge branch 'stats-files' into 'master'

add support for stats files

See merge request !31
parents ca6b7133 f06bae6c
No related branches found
No related tags found
1 merge request!31add support for stats files
...@@ -54,6 +54,7 @@ from lmatools.grid.make_grids import dlonlat_at_grid_center, grid_h5flashfiles ...@@ -54,6 +54,7 @@ from lmatools.grid.make_grids import dlonlat_at_grid_center, grid_h5flashfiles
from glmtools.grid.make_grids import grid_GLM_flashes from glmtools.grid.make_grids import grid_GLM_flashes
from glmtools.io.glm import parse_glm_filename from glmtools.io.glm import parse_glm_filename
from lmatools.grid.fixed import get_GOESR_grid, get_GOESR_coordsys from lmatools.grid.fixed import get_GOESR_grid, get_GOESR_coordsys
from statistics import create_statistics_file
import logging import logging
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
...@@ -387,18 +388,29 @@ def grid_minute(minute, args): ...@@ -387,18 +388,29 @@ def grid_minute(minute, args):
check_categories=False, # check_categories is there because of that issue I mentioned where DQF is all valid all the time so there is no way to detect empty tiles unless we ignore the "category" products check_categories=False, # check_categories is there because of that issue I mentioned where DQF is all valid all the time so there is no way to detect empty tiles unless we ignore the "category" products
environment_prefix=args.system_environment_prefix_tiles, environment_prefix=args.system_environment_prefix_tiles,
compress=True) compress=True)
create_statistics_file(tile_files=glob(f"{tempdir_path}/*_GLM-L2-GLM*-M?_G??_T??_*.nc"), minute=minute + timedelta(minutes=1))
# pick up output files from the tempdir # pick up output files from the tempdir
# output looks like: CG_GLM-L2-GLMC-M3_G17_T03_20200925160040.nc # output looks like: CG_GLM-L2-GLMC-M3_G17_T03_20200925160040.nc
log.debug("files in {}".format(tempdir_path)) log.debug("files in {}".format(tempdir_path))
log.debug(os.listdir(tempdir_path)) log.debug(os.listdir(tempdir_path))
log.debug("moving output to {}".format(args.output_dir)) log.debug("moving output to {}".format(args.output_dir))
tiled_path = os.path.join(tempdir_path, '{}_GLM-L2-GLM*-M?_G??_T??_*.nc'.format(args.system_environment_prefix_tiles)) tiled_path = os.path.join(tempdir_path, '{}_GLM-L2-GLM*-M?_G??_T??_*.nc'.format(args.system_environment_prefix_tiles))
tiled_files = glob(tiled_path) tiled_files = glob(tiled_path)
for f in tiled_files: for f in tiled_files:
add_gglm_attrs(f, glm_filenames) add_gglm_attrs(f, glm_filenames)
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f))) shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
stats_path = os.path.join(tempdir_path, '{}_GLM-L2-GLM*-M?_G??_statistics_*.nc'.format(args.system_environment_prefix_tiles))
stats_files = glob(stats_path)
for f in stats_files:
add_gglm_attrs(f, glm_filenames)
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
for f in gridded_files: for f in gridded_files:
# we add gglm attributes above
shutil.move(f, os.path.join(args.output_dir, os.path.basename(f))) shutil.move(f, os.path.join(args.output_dir, os.path.basename(f)))
# tempdir cleans itself up via atexit, above # tempdir cleans itself up via atexit, above
......
#!/usr/bin/env python3
"""
Copyright (C) 2022 Space Science and Engineering Center (SSEC), University of Wisconsin-Madison.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
This file is part of the CSPP Geo Gridded GLM software package. CSPP Geo
Gridded GLM takes GOES GLM Level 2 LCFA files and grids them to the ABI
fixed grid. It does this using the open source glmtools python package by
Eric Bruning. glmtools can be found in the runtime directory of this package
or at https://github.com/deeplycloudy/glmtools.
"""
"""
Functions for creating a statistics file from a stack of Gridded GLM tiles.
Statistics files document potentially missing ("punctured") tiles and are
required by AWIPS for it to perform multi-minute aggregations.
"""
import os
from datetime import datetime
import re
from typing import Iterable
import netCDF4
def create_statistics_filename(tile_filename: str) -> str:
# ex: OR_GLM-L2-GLMF-M6_G16_statistics_e20220907012700.nc
"""
It appears the quick way to produce this convention is to use a donor tile file and:
* replace the T?? with "statistics"
* add an e before the end time
"""
statistics_filename = re.sub(r"_T[0-9]{2}_", "_statistics_", tile_filename)
statistics_filename = re.sub(r"_([0-9]{14}).nc", r"_e\g<1>.nc", statistics_filename)
return statistics_filename
def get_tile_number_from_tile_file(tile_filename: str) -> int:
match = re.search(r"_T([0-9]{2})_", tile_filename)
if not match:
raise RuntimeError
# match.group(0) is the entire matched string
return int(match.group(1))
def get_tile_numbers(tile_files: list) -> list:
return [get_tile_number_from_tile_file(tf) for tf in tile_files]
def get_minute_from_tile_file(tile_filename: str) -> datetime:
match = re.search(r"_([0-9]{14}).nc$", tile_filename)
if not match:
raise RuntimeError
# match.group(0) is the entire matched string
return datetime.strptime(match.group(1), '%Y%m%d%H%M%S')
"""
variables:
dimensions:
produced = 19 ;
defined = 62 ;
variables:
short produced(produced) ;
produced:_FillValue = 0s ;
produced:standard_name = "Tiles Produced" ;
produced:long_name = "Tiles Produced" ;
produced:_Unsigned = "true" ;
produced:_Storage = "chunked" ;
produced:_ChunkSizes = 19 ;
produced:_DeflateLevel = 1 ;
produced:_Shuffle = "true" ;
produced:_Endianness = "little" ;
short defined(defined) ;
defined:_FillValue = 0s ;
defined:standard_name = "Tiles Defined" ;
defined:long_name = "Tiles Defined" ;
defined:_Unsigned = "true" ;
defined:_Storage = "chunked" ;
defined:_ChunkSizes = 62 ;
defined:_DeflateLevel = 1 ;
defined:_Shuffle = "true" ;
defined:_Endianness = "little" ;
"""
def create_statistics_file(tile_files: list,
minute: datetime,
) -> str:
good_tiles = [tile for tile in tile_files if get_minute_from_tile_file(tile) == minute]
assert(len(good_tiles) > 0)
donorTile = good_tiles[0]
statistics_filename = create_statistics_filename(donorTile)
assert(not os.path.exists(statistics_filename))
tileNumbers = get_tile_numbers(good_tiles)
ds = netCDF4.Dataset(statistics_filename, 'w')
dimDefined = ds.createDimension('defined', size=62)
dimProduced = ds.createDimension('produced', size=len(good_tiles))
varDefined = ds.createVariable('defined',
datatype='i2',
dimensions=(dimDefined),
zlib=True, complevel=1,
endian='little',
fill_value=0,
)
varDefined.standard_name = "Tiles Defined"
varDefined.long_name = "Tiles Defined"
varDefined._Unsigned = "true"
# short defined
# defined = _, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
# 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
# 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
# 55, 56, 57, 58, 59, 60, 61 ;
varDefined[:] = range(62)
varProduced = ds.createVariable('produced',
datatype='i2',
dimensions=(dimProduced),
zlib=True, complevel=1,
endian='little',
fill_value=0,
)
varProduced.standard_name = "Tiles Produced"
varProduced.long_name = "Tiles Produced"
varProduced._Unsigned = "true"
# short produced
# produced = 5, 7, 11, 12, 14, 17, 18, 19, 22, 23, 24, 25, 27, 31, 32, 35, 44, 49, 50 ;
varProduced[:] = tileNumbers
"""
attributes that should be copied directly from the donor tile file:
:orbital_slot = "GOES-East" ;
:platform_ID = "G16" ;
:production_site = "GCP" ;
:scene_id = "Full Disk" ;
:spatial_resolution = "2km at nadir" ;
:time_coverage_end = "2022-09-07T01:27:00Z" ;
:time_coverage_start = "2022-09-07T01:26:00Z" ;
"""
donor_ds = netCDF4.Dataset(donorTile, 'r')
for attrName in ['orbital_slot',
'platform_ID',
'production_site',
'scene_id',
'spatial_resolution',
'time_coverage_end',
'time_coverage_start',
]:
setattr(ds, attrName, getattr(donor_ds, attrName))
donor_ds.close()
"""
attributes that need to be generated:
:dataset_name = "OR_GLM-L2-GLMF-M6_G16_statistics_e20220907012700.nc" ;
:title = "GLM Punctured Gridded Tiles Production Record" ;
"""
ds.dataset_name = statistics_filename
ds.title = "GLM Punctured Gridded Tiles Production Record"
"""
attributes we're skipping unless we learn they're necessary:
:production_data_source = "Realtime" ;
:production_environment = "OE" ;
:project = "GOES" ;
:timeline_id = "ABI Mode 6" ;
"""
ds.close()
return statistics_filename
...@@ -32,11 +32,44 @@ def test_realtime(): ...@@ -32,11 +32,44 @@ def test_realtime():
]) ])
assert completed_process.returncode == 0 assert completed_process.returncode == 0
assert(check_fileglob("CG_GLM-L2-GLMC-M3_G16_s20220740002000_e20220740003000_c*", 1)) assert(check_fileglob("CG_GLM-L2-GLMC-M3_G16_s20220740002000_e20220740003000_c*", 1))
assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T*.nc", 4))
assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T08_20220315000300.nc", 1)) assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T08_20220315000300.nc", 1))
assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T05_20220315000300.nc", 1)) assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T05_20220315000300.nc", 1))
assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T04_20220315000300.nc", 1)) assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T04_20220315000300.nc", 1))
assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T07_20220315000300.nc", 1)) assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T07_20220315000300.nc", 1))
# tests for the new statistics file
def test_statistics_file():
completed_process = subprocess.run(['python',
MINUTE_GRIDDER,
"--goes-sector", "full",
"--create-tiles",
"--realtime",
os.path.join(TESTDIR, "test-inputs/OR_GLM-L2-LCFA_G16_s20220740002400_e20220740003000_c20220740003018.nc"),
])
assert completed_process.returncode == 0
assert(check_fileglob("CG_GLM-L2-GLMF-M3_G16_s20220740002000_e20220740003000_c*", 1))
assert(check_fileglob("CSPP_OR_GLM-L2-GLMF-M3_G16_T08_20220315000300.nc", 1))
assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T33_20220315000300.nc', 1))
assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T34_20220315000300.nc', 1))
assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T28_20220315000300.nc', 1))
assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T53_20220315000300.nc', 1))
assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T35_20220315000300.nc', 1))
assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T14_20220315000300.nc', 1))
assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T08_20220315000300.nc', 1))
assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T46_20220315000300.nc', 1))
assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T20_20220315000300.nc', 1))
assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T38_20220315000300.nc', 1))
assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T42_20220315000300.nc', 1))
assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T15_20220315000300.nc', 1))
assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T47_20220315000300.nc', 1))
assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T09_20220315000300.nc', 1))
assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T21_20220315000300.nc', 1))
assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T41_20220315000300.nc', 1))
assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T27_20220315000300.nc', 1))
assert(check_fileglob('CSPP_OR_GLM-L2-GLMF-M3_G16_T40_20220315000300.nc', 1))
assert(check_fileglob("CSPP_OR_GLM-L2-GLMF-M3_G16_statistics_e20220315000300.nc", 1))
def test_g16(): def test_g16():
completed_process = subprocess.run(['python', completed_process = subprocess.run(['python',
MINUTE_GRIDDER, MINUTE_GRIDDER,
......
...@@ -74,6 +74,7 @@ def test_issue5_3minutes(): ...@@ -74,6 +74,7 @@ def test_issue5_3minutes():
assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T08_20220315000100.nc")) assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T08_20220315000100.nc"))
assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T08_20220315000200.nc")) assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T08_20220315000200.nc"))
assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T08_20220315000300.nc")) assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T08_20220315000300.nc"))
assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_statistics_*.nc", 3))
def test_issue5_crossminutes_fail(): def test_issue5_crossminutes_fail():
completed_process = subprocess.run(['python', completed_process = subprocess.run(['python',
......
#!/usr/bin/env python3
"""
Gridded GLM tile testing
"""
import subprocess
import os
import warnings
import netCDF4
from testlib.common import TESTDIR, MINUTE_GRIDDER
from testlib.common import check_fileglob
"""
Check our tile variable names against a sample metadata listing for a tile the ground segment generated.
Provided by Lee Byerle via email on 2022-09-21
(DR_GLMFD-020-B16-M0C00-T021_G18_s2022255171000_c2022255171140)
"""
def test_tile_variable_names():
completed_process = subprocess.run(['python',
MINUTE_GRIDDER,
"--goes-sector", "conus",
"--create-tiles",
"--realtime",
os.path.join(TESTDIR, "test-inputs/OR_GLM-L2-LCFA_G16_s20220740002400_e20220740003000_c20220740003018.nc"),
])
assert completed_process.returncode == 0
assert(check_fileglob("CG_GLM-L2-GLMC-M3_G16_s20220740002000_e20220740003000_c*", 1))
assert(check_fileglob("CSPP_OR_GLM-L2-GLMC-M3_G16_T08_20220315000300.nc", 1))
ds = netCDF4.Dataset('CSPP_OR_GLM-L2-GLMC-M3_G16_T08_20220315000300.nc', 'r')
assert('x' in ds.variables)
assert('y' in ds.variables)
assert('Flash_extent_density' in ds.variables)
assert('Total_Optical_energy' in ds.variables)
assert('Minimum_flash_area' in ds.variables)
assert('DQF' in ds.variables)
\ No newline at end of file
#!/usr/bin/env python3
"""
Gridded GLM function tests
"""
import sys
from testlib.common import PYDIR
sys.path.append(PYDIR)
from gridded_glm.statistics import create_statistics_filename, get_tile_number_from_tile_file
def test_create_statistics_filename():
assert(create_statistics_filename("CSPP_OR_GLM-L2-GLMF-M3_G16_T07_20220315000300.nc") == "CSPP_OR_GLM-L2-GLMF-M3_G16_statistics_e20220315000300.nc")
def test_get_tile_number_from_tile_file():
assert(get_tile_number_from_tile_file("CSPP_OR_GLM-L2-GLMF-M3_G16_T07_20220315000300.nc") == 7)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment