From 5bf987bcb56d628fcd3aee53b93e7913f2c9d136 Mon Sep 17 00:00:00 2001 From: David Hoese <david.hoese@ssec.wisc.edu> Date: Thu, 25 Jul 2024 15:23:19 -0500 Subject: [PATCH] Cleanup round 5 --- .pre-commit-config.yaml | 18 +- aossceilo/CONFIG.py | 1 + aossceilo/__init__.py | 1 + aossceilo/level_b1/message.py | 1 + aossceilo/level_b1/nc.py | 404 +++++++++++++++++----------------- aossceilo/message.py | 1 + aossceilo/nc.py | 1 + aossceilo/tests/test_nc.py | 2 +- 8 files changed, 223 insertions(+), 206 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ba4344a..ccec9c9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,16 +2,21 @@ exclude: '^$' fail_fast: false repos: - repo: https://github.com/psf/black - rev: 23.11.0 + rev: 24.4.2 hooks: - id: black language_version: python3 exclude: versioneer.py args: - --target-version=py38 + - repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + language_version: python3 - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: 'v0.5.0' + rev: 'v0.5.5' hooks: - id: ruff - repo: https://github.com/pre-commit/pre-commit-hooks @@ -22,12 +27,12 @@ repos: - id: check-yaml args: [--unsafe] - repo: https://github.com/PyCQA/bandit - rev: '1.7.5' # Update me! + rev: '1.7.9' # Update me! hooks: - id: bandit args: [--ini, .bandit] - repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v1.10.1' # Use the sha / tag you want to point at + rev: 'v1.11.0' # Use the sha / tag you want to point at hooks: - id: mypy additional_dependencies: @@ -35,11 +40,6 @@ repos: - types-pkg-resources - types-PyYAML - types-requests - - repo: https://github.com/pycqa/isort - rev: 5.13.2 - hooks: - - id: isort - language_version: python3 ci: # To trigger manually, comment on a pull request with "pre-commit.ci autofix" autofix_prs: false diff --git a/aossceilo/CONFIG.py b/aossceilo/CONFIG.py index 82d3f35..86ecb6a 100644 --- a/aossceilo/CONFIG.py +++ b/aossceilo/CONFIG.py @@ -9,6 +9,7 @@ This module will check for environment variables to create constants of PATHS and URL bases. It also has functions for getting file locations for any file for a specified day. """ + import os import re from datetime import datetime, timedelta diff --git a/aossceilo/__init__.py b/aossceilo/__init__.py index 2212fb4..5d2a69c 100644 --- a/aossceilo/__init__.py +++ b/aossceilo/__init__.py @@ -1,4 +1,5 @@ """Processing and archive management code for the AOSS Ceilometer instrument.""" + from aossceilo import CONFIG as c from aossceilo.version import __version__ # noqa diff --git a/aossceilo/level_b1/message.py b/aossceilo/level_b1/message.py index 0a159f1..441b64c 100644 --- a/aossceilo/level_b1/message.py +++ b/aossceilo/level_b1/message.py @@ -1,6 +1,7 @@ """ For processing Ceilometer CT25K Messages """ + import logging from calendar import timegm from datetime import datetime diff --git a/aossceilo/level_b1/nc.py b/aossceilo/level_b1/nc.py index 096bea3..8a294fe 100644 --- a/aossceilo/level_b1/nc.py +++ b/aossceilo/level_b1/nc.py @@ -1,6 +1,7 @@ import argparse import datetime import importlib.resources +import logging import os import sys from calendar import timegm @@ -11,6 +12,62 @@ from netCDF4 import Dataset from aossceilo.level_b1 import message +LOG = logging.getLogger(__name__) + +VAR_TYPE_MAP = { + "int": ( + "i4", + numpy.int32, + ), + "char": ( + "S1", + str, + ), + "float": ( + "f4", + numpy.float32, + ), + "short": ( + "i2", + numpy.int16, + ), + "double": ( + "f8", + numpy.float64, + ), + "String": ( + "S1", + str, + ), +} + +ATTR_TYPE_MAP = { + "int": ( + "i4", + int, + ), + "char": ( + "S1", + str, + ), + "float": ( + "f4", + float, + ), + "short": ( + "i2", + int, + ), + "double": ( + "f8", + float, + ), + "String": ( + "S1", + str, + ), +} + def _get_value(var, value): return ( @@ -30,225 +87,180 @@ def create_nc(input_files, out_files): .read() ) - # comparison object - class compareMessage: - def __init__(self, stamp): - self.stamp = stamp + for f in out_files: + _create_one_nc(messages, ncml, f) + return 0 - def __lt__(self, msg): - return self.stamp < msg.stamp - def __le__(self, msg): - return self.stamp <= msg.stamp +def _create_one_nc(messages, ncml, out_file): + now = datetime.datetime.strptime( + os.path.basename(out_file).split(".")[1], "%Y-%m-%d" + ) + # get bounds of messages + mask = (messages >= _compareMessage(now)) & ( + messages <= _compareMessage(now + datetime.timedelta(days=1)) + ) - def __gt__(self, msg): - return self.stamp > msg.stamp + if not mask.any(): + LOG.info(f"No files found for date range {now}") + return - def __ge__(self, msg): - return self.stamp >= msg.stamp + base = timegm(messages[mask][0].stamp.timetuple()) - def __eq__(self, msg): - return self.stamp == msg.stamp + # SETUP + nc = Dataset(out_file, "w") + _add_dimensions(ncml, nc) + _add_attributes(ncml, nc) + _add_variables(ncml, nc) - def _get_date(fn): - return datetime.datetime.strptime( - os.path.basename(fn).split(".")[1], "%Y-%m-%d" - ) + ssec_loc = ( + 43.08543, + -89.271632, + ) + nc.variables["lat"][:] = ssec_loc[0] + nc.variables["lon"][:] = ssec_loc[1] + nc.sync() + + var = nc.variables["base_time"] + setattr( + var, + "units", + "seconds since 1970-01-01 00:00:00 0:00", + ) + var[:] = base - for f in out_files: - now = _get_date(f) - # get bounds of messages - mask = (messages >= compareMessage(now)) & ( - messages <= compareMessage(now + datetime.timedelta(days=1)) + times = numpy.array( + tuple(numpy.int64(timegm(m.stamp.timetuple())) for m in messages[mask]) + ) + met_data = get_message_met_data(nc, messages[mask]) + hk_data = get_message_hk_data(nc, messages[mask]) + + midnight = timegm(now.timetuple()) + offsets = times - midnight # use scalar array subtraction + if offsets.dtype != numpy.int32: + offsets = numpy.array(offsets, dtype=numpy.int32) + var = nc.variables["time"] + var[: len(offsets)] = offsets + var.units = now.strftime("seconds since %Y-%m-%d 00:00:00 0:00") + + # time offsets from base_time + offsets = times - base + var = nc.variables["time_offset"] + var.units = messages[mask][0].stamp.strftime("seconds since %Y-%m-%d %H:%M:%S 0:00") + if offsets.dtype != numpy.int32: + offsets = numpy.array( + offsets, + numpy.int32, ) + var[: len(offsets)] = offsets + + for val_dict in [ + met_data, + hk_data, + ]: + for var_name in val_dict.keys(): + arr = val_dict[var_name] + var = nc.variables[var_name] + if len(arr.shape) == 1: + var[: arr.shape[0]] = arr + else: + var[ + : arr.shape[0], + : arr.shape[1], + ] = arr + nc.sync() + nc.close() - if not mask.any(): - break - base = timegm(messages[mask][0].stamp.timetuple()) +class _compareMessage: + def __init__(self, stamp): + self.stamp = stamp - # SETUP - nc = Dataset(f, "w") + def __lt__(self, msg): + return self.stamp < msg.stamp - def tag(s): - return f"{{http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2}}{s}" + def __le__(self, msg): + return self.stamp <= msg.stamp - # Dimensions - for e in ncml.findall(tag("dimension")): - name = e.attrib["name"] - if "isUnlimited" in e.attrib: - nc.createDimension( - name, - None, - ) # None for unlimited length - else: - nc.createDimension( - name, - int(e.attrib["length"]), - ) + def __gt__(self, msg): + return self.stamp > msg.stamp - # Attributes - for e in ncml.findall(tag("attribute")): - name = e.attrib["name"] - value = e.attrib["value"] - setattr( - nc, - name, - value, - ) + def __ge__(self, msg): + return self.stamp >= msg.stamp - # Variables - var_map = { - "int": ( - "i4", - numpy.int32, - ), - "char": ( - "S1", - str, - ), - "float": ( - "f4", - numpy.float32, - ), - "short": ( - "i2", - numpy.int16, - ), - "double": ( - "f8", - numpy.float64, - ), - "String": ( - "S1", - str, - ), - } - attr_map = { - "int": ( - "i4", - int, - ), - "char": ( - "S1", - str, - ), - "float": ( - "f4", - float, - ), - "short": ( - "i2", - int, - ), - "double": ( - "f8", - float, - ), - "String": ( - "S1", - str, - ), - } - - for v in ncml.findall(tag("variable")): - v_name = v.attrib["name"] - v_type = var_map[v.attrib["type"]][0] - shape = v.attrib.get("shape") - - attributes = v.findall(tag("attribute")) - try: - fill_value_attr = v.find(tag('attribute[@name="_FillValue"]')) - fill_value = fill_value_attr.attrib["value"] - attributes.remove(fill_value_attr) - except AttributeError: - fill_value = None - - if shape: - var = nc.createVariable( - v_name, - v_type, - dimensions=tuple(shape.split(" ")), - fill_value=fill_value, - ) - else: - var = nc.createVariable( - v_name, - v_type, - fill_value=fill_value, - ) + def __eq__(self, msg): + return self.stamp == msg.stamp - for a in attributes: - a_name = a.attrib["name"] - a_type = attr_map[a.attrib["type"]][1] - value = a.attrib["value"] - setattr( - var, - a_name, - a_type(value), - ) - nc.sync() - ssec_loc = ( - 43.08543, - -89.271632, - ) - nc.variables["lat"][:] = ssec_loc[0] - nc.variables["lon"][:] = ssec_loc[1] - nc.sync() +def _add_dimensions(ncml, nc): + for e in ncml.findall(_tag("dimension")): + name = e.attrib["name"] + if "isUnlimited" in e.attrib: + nc.createDimension( + name, + None, + ) # None for unlimited length + else: + nc.createDimension( + name, + int(e.attrib["length"]), + ) + - var = nc.variables["base_time"] +def _add_attributes(ncml, nc): + for e in ncml.findall(_tag("attribute")): + name = e.attrib["name"] + value = e.attrib["value"] setattr( - var, - "units", - "seconds since 1970-01-01 00:00:00 0:00", + nc, + name, + value, ) - var[:] = base - times = numpy.array( - tuple(numpy.int64(timegm(m.stamp.timetuple())) for m in messages[mask]) - ) - met_data = get_message_met_data(nc, messages[mask]) - hk_data = get_message_hk_data(nc, messages[mask]) - - midnight = timegm(now.timetuple()) - offsets = times - midnight # use scalar array subtraction - if offsets.dtype != numpy.int32: - offsets = numpy.array(offsets, dtype=numpy.int32) - var = nc.variables["time"] - var[: len(offsets)] = offsets - var.units = now.strftime("seconds since %Y-%m-%d 00:00:00 0:00") - - # time offsets from base_time - offsets = times - base - var = nc.variables["time_offset"] - var.units = messages[mask][0].stamp.strftime( - "seconds since %Y-%m-%d %H:%M:%S 0:00" - ) - if offsets.dtype != numpy.int32: - offsets = numpy.array( - offsets, - numpy.int32, + +def _add_variables(ncml, nc): + for v in ncml.findall(_tag("variable")): + v_name = v.attrib["name"] + v_type = VAR_TYPE_MAP[v.attrib["type"]][0] + shape = v.attrib.get("shape") + + attributes = v.findall(_tag("attribute")) + try: + fill_value_attr = v.find(_tag('attribute[@name="_FillValue"]')) + fill_value = fill_value_attr.attrib["value"] + attributes.remove(fill_value_attr) + except AttributeError: + fill_value = None + + if shape: + var = nc.createVariable( + v_name, + v_type, + dimensions=tuple(shape.split(" ")), + fill_value=fill_value, + ) + else: + var = nc.createVariable( + v_name, + v_type, + fill_value=fill_value, + ) + + for a in attributes: + a_name = a.attrib["name"] + a_type = ATTR_TYPE_MAP[a.attrib["type"]][1] + value = a.attrib["value"] + setattr( + var, + a_name, + a_type(value), ) - var[: len(offsets)] = offsets - - for val_dict in [ - met_data, - hk_data, - ]: - for var_name in val_dict.keys(): - arr = val_dict[var_name] - var = nc.variables[var_name] - if len(arr.shape) == 1: - var[: arr.shape[0]] = arr - else: - var[ - : arr.shape[0], - : arr.shape[1], - ] = arr + nc.sync() - nc.close() - return 0 + + +def _tag(s): + return f"{{http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2}}{s}" def get_message_hk_data(nc, messages): diff --git a/aossceilo/message.py b/aossceilo/message.py index 2c6fe5d..9560acb 100644 --- a/aossceilo/message.py +++ b/aossceilo/message.py @@ -1,6 +1,7 @@ """ For processing Ceilometer CT25K Messages """ + # ruff: noqa import logging from calendar import timegm diff --git a/aossceilo/nc.py b/aossceilo/nc.py index ceb43b4..7565d6d 100644 --- a/aossceilo/nc.py +++ b/aossceilo/nc.py @@ -1,6 +1,7 @@ """ Library for creating and manipulating Viasala CT25K Ceilometer NetCDF files. """ + # ruff: noqa __author__ = "Bruce Flynn, SSEC" __version__ = "$Revision: 1.15 $" diff --git a/aossceilo/tests/test_nc.py b/aossceilo/tests/test_nc.py index 6e23adb..13d8dfb 100644 --- a/aossceilo/tests/test_nc.py +++ b/aossceilo/tests/test_nc.py @@ -19,7 +19,7 @@ def test_load_messages(): # Normal Operation messages = message.load_messages(ascii_test_files + ("does not exist",)) for m in messages: - assert type(m) == message.Message2 + assert isinstance(m, message.Message2) assert hasattr(m, "stamp") messages = message.load_messages(("does not exist",)) assert messages.shape == (0,) -- GitLab