diff --git a/aosstower/level_b1/nc.py b/aosstower/level_b1/nc.py index 14fee4aa22c9a2f91d29f325d064c6d1cc2d4596..701945b00fd2ced93d9d90f4436c54a581c75dd5 100644 --- a/aosstower/level_b1/nc.py +++ b/aosstower/level_b1/nc.py @@ -1,10 +1,10 @@ -#!/usr/bin/env python """Generate AOSS Tower NetCDF4 files from Level 00 ASCII files.""" import logging import os import platform import sys from datetime import datetime +from pathlib import Path import numpy as np import pandas as pd @@ -37,7 +37,8 @@ def _get_data(input_files): bad_files += 1 continue if bad_files == len(input_files): - raise ValueError("No valid input data files found") + msg = "No valid input data files found" + raise ValueError(msg) def get_data(input_files): @@ -75,14 +76,14 @@ def write_global_attributes(nc_file, input_sources, interval=None, datastream=No nc_file.command_line = " ".join(sys.argv) # generate history - nc_file.history = " ".join(platform.uname()) + " " + os.path.basename(__file__) + nc_file.history = " ".join(platform.uname()) + " " + Path(__file__).name nc_file.input_source = input_sources[0] nc_file.input_sources = ", ".join(input_sources) def create_giant_netcdf( - input_files, - output_fn, + input_files: list[Path], + output_fn: Path, zlib, chunk_size, start=None, @@ -94,13 +95,15 @@ def create_giant_netcdf( ): frame = get_data(input_files) if frame.empty: - raise ValueError("No data found from input files: {}".format(", ".join(input_files))) + in_files_str = ", ".join(str(in_file) for in_file in input_files) + msg = f"No data found from input files: {in_files_str}" + raise ValueError(msg) # Add wind direction components so we can average wind direction properly frame["wind_east"], frame["wind_north"], _ = calc.wind_vector_components(frame["wind_speed"], frame["wind_dir"]) if "air_temp" in frame and "rh" in frame and ("dewpoint" in database or "dewpoint_mean" in database): - LOG.info("'dewpoint' is missing from the input file, will calculate " "it from air temp and relative humidity") + LOG.info("'dewpoint' is missing from the input file, will calculate it from air temp and relative humidity") frame["dewpoint"] = calc.dewpoint(frame["air_temp"], frame["rh"]) # round up each 1 minute group so data at time T is the average of data @@ -128,7 +131,7 @@ def create_giant_netcdf( if start and end: frame = frame[start.strftime("%Y-%m-%d %H:%M:%S") : end.strftime("%Y-%m-%d %H:%M:%S")] - chunk_sizes = [chunk_size] if chunk_size and not isinstance(chunk_size, (list, tuple)) else [frame.shape[0]] + chunk_sizes = [chunk_size] if chunk_size and not isinstance(chunk_size, list | tuple) else [frame.shape[0]] first_stamp = datetime.strptime(str(frame.index[0]), "%Y-%m-%d %H:%M:%S") # NETCDF4_CLASSIC was chosen so that MFDataset reading would work. See: @@ -147,7 +150,7 @@ def create_giant_netcdf( write_global_attributes( nc_file, - [os.path.basename(x) for x in input_files], + [x.name for x in input_files], interval=interval_width, datastream=datastream, ) @@ -195,13 +198,13 @@ def main(): "--start-time", type=_dt_convert, help="Start time of massive netcdf file, if only -s is given, a netcdf file for only that day is given" - + ". Formats allowed: 'YYYY-MM-DDTHH:MM:SS', 'YYYY-MM-DD'", + ". Formats allowed: 'YYYY-MM-DDTHH:MM:SS', 'YYYY-MM-DD'", ) parser.add_argument( "-e", "--end-time", type=_dt_convert, - help="End time of massive netcdf file. Formats allowed:" + "'YYYY-MM-DDTHH:MM:SS', 'YYYY-MM-DD'", + help="End time of massive netcdf file. Formats allowed: 'YYYY-MM-DDTHH:MM:SS', 'YYYY-MM-DD'", ) parser.add_argument( "-n", @@ -257,7 +260,8 @@ each input file is mapped to the corresponding output file. if args.start_time and not args.end_time: args.end_time = args.start_time.replace(hour=23, minute=59, second=59) elif not args.start_time and args.end_time: - raise ValueError("start time must be specified when end time is specified") + msg = "start time must be specified when end time is specified" + raise ValueError(msg) mini_database = {k: schema.database_dict[k] for k in args.fields} if args.summary: @@ -266,18 +270,19 @@ each input file is mapped to the corresponding output file. # Case 1: All inputs to 1 output file # Case 2: Each input in to a separate output file if args.output_files and len(args.output_files) not in [1, len(args.input_files)]: - raise ValueError("Output filenames must be 1 or the same length as input files") - elif args.output_files and len(args.output_files) == len(args.input_files): + msg = "Output filenames must be 1 or the same length as input files" + raise ValueError(msg) + if args.output_files and len(args.output_files) == len(args.input_files): args.input_files = [[i] for i in args.input_files] else: args.input_files = [args.input_files] success = False - for in_files, out_fn in zip(args.input_files, args.output_files): + for in_files, out_fn in zip(args.input_files, args.output_files, strict=True): try: create_giant_netcdf( - in_files, - out_fn, + [Path(in_file) for in_file in in_files], + Path(out_fn), args.zlib, args.chunk_size, args.start_time, @@ -291,7 +296,8 @@ each input file is mapped to the corresponding output file. except (ValueError, TypeError): LOG.error(f"Could not generate NetCDF file for {in_files}", exc_info=True) if not success: - raise OSError("All ASCII files were empty or could not be read") + msg = "All ASCII files were empty or could not be read" + raise OSError(msg) if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index 20dc9efb3f89f5edc13032a71193025c220d2d0c..e8dff0b77f433f53ea892584582940d6e3dd2bbc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ ignore = ["D100", "D101", "D102", "D103", "D104", "D105", "D106", "D107", "D203" [tool.ruff.per-file-ignores] "aosstower/tests/*" = ["S", "PLR2004"] "aosstower/level_b1/quicklook.py" = ["PLR0913"] +"aosstower/level_b1/nc.py" = ["PLR0913"] [tool.ruff.pydocstyle] convention = "google"