From d18de49d91f1a6584fd85d488a584c9c5034d67b Mon Sep 17 00:00:00 2001 From: David Hoese <david.hoese@ssec.wisc.edu> Date: Mon, 6 Mar 2023 06:00:30 -0600 Subject: [PATCH] Fix most ruff identified issues --- .bandit | 3 - .pre-commit-config.yaml | 25 +- metobsapi/__init__.py | 4 +- metobsapi/common_config.py | 6 +- metobsapi/data_api.py | 93 +++--- metobsapi/files_api.py | 45 ++- metobsapi/server.py | 44 +-- metobsapi/tests/test_data_api.py | 508 ++++++++++++++++-------------- metobsapi/tests/test_files_api.py | 15 +- metobsapi/tests/test_misc.py | 21 +- metobsapi/util/__init__.py | 48 ++- metobsapi/util/data_responses.py | 4 +- metobsapi/util/file_responses.py | 32 +- metobsapi/util/query_influx.py | 57 ++-- pyproject.toml | 17 + setup.cfg | 8 - 16 files changed, 466 insertions(+), 464 deletions(-) delete mode 100644 .bandit delete mode 100644 setup.cfg diff --git a/.bandit b/.bandit deleted file mode 100644 index 9ccd928..0000000 --- a/.bandit +++ /dev/null @@ -1,3 +0,0 @@ -[bandit] -skips: B506 -exclude: metobsapi/tests diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index faf43a6..847932a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,12 +13,17 @@ repos: hooks: - id: isort language_version: python3 - - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 + - repo: https://github.com/charliermarsh/ruff-pre-commit + # Ruff version. + rev: 'v0.0.254' hooks: - - id: flake8 - additional_dependencies: [flake8-docstrings, flake8-debugger, flake8-bugbear, mccabe] - args: [--max-complexity, "10"] + - id: ruff +# - repo: https://github.com/PyCQA/flake8 +# rev: 6.0.0 +# hooks: +# - id: flake8 +# additional_dependencies: [flake8-docstrings, flake8-debugger, flake8-bugbear, mccabe] +# args: [--max-complexity, "10"] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: @@ -33,11 +38,11 @@ repos: args: ["-i", "4"] #- id: shfmt-docker # Docker image (requires Docker to run) # - - repo: https://github.com/PyCQA/bandit - rev: '1.7.4' - hooks: - - id: bandit - args: [--ini, .bandit] +# - repo: https://github.com/PyCQA/bandit +# rev: '1.7.4' +# hooks: +# - id: bandit +# args: [--ini, .bandit] - repo: https://github.com/pre-commit/mirrors-mypy rev: 'v1.0.1' # Use the sha / tag you want to point at hooks: diff --git a/metobsapi/__init__.py b/metobsapi/__init__.py index 4175874..bb623f7 100644 --- a/metobsapi/__init__.py +++ b/metobsapi/__init__.py @@ -1,3 +1,3 @@ -from metobsapi.server import app # noqa +from metobsapi.server import app # noqa: F401 -from .version import __version__ # noqa +from .version import __version__ # noqa: F401 diff --git a/metobsapi/common_config.py b/metobsapi/common_config.py index acf0156..eaed3dc 100644 --- a/metobsapi/common_config.py +++ b/metobsapi/common_config.py @@ -3,7 +3,7 @@ JSONIFY_PRETTYPRINT_REGULAR = False if "SECRET_KEY" not in globals(): # we don't do anything with cookies or sessions, set this somewhere secret in the future # Security: This is expected to be overwritten either via environment variable or sub-configuration - SECRET_KEY = "secret!" # nosec B105 + SECRET_KEY = "secret!" # noqa: S105 ARCHIVE_ROOT = "/data1/cache" ARCHIVE_URL = "http://metobs.ssec.wisc.edu/pub/cache" @@ -13,9 +13,9 @@ INFLUXDB_HOST = "rain01" INFLUXDB_PORT = 8086 INFLUXDB_USER = "root" # Security: This is expected to be overwritten either via environment variable or sub-configuration -INFLUXDB_PASS = "root" # nosec B105 +INFLUXDB_PASS = "root" # noqa: S105 # If token is provided, user and password are ignored # Security: This is expected to be overwritten either via environment variable or sub-configuration -INFLUXDB_TOKEN = "" # nosec B105 +INFLUXDB_TOKEN = "" INFLUXDB_ORG = "metobs" INFLUXDB_DB = "metobs" diff --git a/metobsapi/data_api.py b/metobsapi/data_api.py index 0a0358b..8e5b09f 100644 --- a/metobsapi/data_api.py +++ b/metobsapi/data_api.py @@ -1,9 +1,9 @@ import logging +from collections.abc import Iterable from datetime import datetime, timedelta -from typing import Iterable # Security: Document is only used for creating an XML document, not parsing one -from xml.dom.minidom import Document # nosec B408 +from xml.dom.minidom import Document import numpy as np import pandas as pd @@ -15,6 +15,7 @@ from metobsapi.util.query_influx import query LOG = logging.getLogger(__name__) +LENGTH_DATE_STR = 10 # NOTE: These use the influxdb column name (default API name) # In most cases these names are equivalent @@ -29,16 +30,14 @@ ROUNDING = { } -def handle_date(date): +def handle_date(date: str) -> datetime: + datetime_fmt = "%Y-%m-%d" if len(date) == LENGTH_DATE_STR else "%Y-%m-%dT%H:%M:%S" try: - date_len = len(date) - if date_len == 10: - return datetime.strptime(date, "%Y-%m-%d") - else: - return datetime.strptime(date, "%Y-%m-%dT%H:%M:%S") - except ValueError: - LOG.warning("Malformed date string '%s'", date) - raise ValueError("Malformed date string '%s'" % (date,)) + return datetime.strptime(date, datetime_fmt) + except ValueError as e: + msg = f"Malformed date string '{date}'" + LOG.warning(msg) + raise ValueError(msg) from e def handle_time_string(date_string): @@ -59,13 +58,15 @@ def handle_symbols(symbols: list[str]): add_winds = set() for symbol in symbols: if symbol in handled_symbols: - raise ValueError("Symbol listed multiple times: {}".format(symbol)) + msg = f"Symbol listed multiple times: {symbol}" + raise ValueError(msg) handled_symbols.add(symbol) try: site, inst, s = symbol.split(".") - except ValueError: - raise ValueError("Symbols must have 3 period-separated parts: {}".format(symbol)) + except ValueError as e: + msg = f"Symbols must have 3 period-separated parts: {symbol}" + raise ValueError(msg) from e influx_name = _get_column_name_in_influxdb(site, inst, s) # this assignment must happen before we continue the loop after @@ -89,13 +90,14 @@ def handle_symbols(symbols: list[str]): def _get_column_name_in_influxdb(site: str, inst: str, api_symbol: str) -> str: api_to_influx_rename_map = data_responses.SYMBOL_TRANSLATIONS.get((site, inst)) if not api_to_influx_rename_map: - raise ValueError("Unknown site/instrument: {},{}".format(site, inst)) + msg = f"Unknown site/instrument: {site},{inst}" + raise ValueError(msg) influx_name = api_to_influx_rename_map.get(api_symbol) if api_symbol != "wind_direction" and influx_name is None: - raise ValueError(f"Unknown symbol: {site}.{inst}.{api_symbol}") - influx_name = influx_name or api_symbol - return influx_name + msg = f"Unknown symbol: {site}.{inst}.{api_symbol}" + raise ValueError(msg) + return influx_name or api_symbol def handle_influxdb_result(data_frame, influx_to_requested_symbols): @@ -113,18 +115,18 @@ def handle_influxdb_result(data_frame, influx_to_requested_symbols): def _create_empty_dataframe_for_columns(columns: list[str]) -> pd.DataFrame: - columns = ["time"] + columns + columns = ["time", *columns] data_frame = pd.DataFrame(columns=columns) - data_frame.set_index("time", inplace=True) - data_frame.fillna(value=np.nan, inplace=True) - return data_frame + data_frame = data_frame.set_index("time") + return data_frame.fillna(value=np.nan) def _convert_wind_vectors_to_direction_if_needed(data_frame: pd.DataFrame) -> pd.DataFrame: for weast_col_name, wnorth_col_name, wdir_col_name in _wind_vector_column_groups(data_frame.columns): data_frame[wdir_col_name] = np.rad2deg(np.arctan2(data_frame[weast_col_name], data_frame[wnorth_col_name])) data_frame[wdir_col_name] = data_frame[wdir_col_name].where( - data_frame[wdir_col_name] > 0, data_frame[wdir_col_name] + 360.0 + data_frame[wdir_col_name] > 0, + data_frame[wdir_col_name] + 360.0, ) data_frame = data_frame.drop(columns=[weast_col_name, wnorth_col_name]) return data_frame @@ -161,6 +163,7 @@ def calc_num_records(begin, end, interval): def handle_csv(frame, epoch, sep=",", message="", code=200, status="success", **kwargs): + del kwargs # other kwargs used by other handlers output = """# status: {status} # code: {code:d} # message: {message} @@ -172,9 +175,9 @@ def handle_csv(frame, epoch, sep=",", message="", code=200, status="success", ** data_lines = [] line_format = sep.join(["{time}", "{symbols}"]) if frame is not None and not frame.empty: - for t, row in frame.iterrows(): - t = _time_column_to_epoch_str(t, epoch) - line = line_format.format(time=t, symbols=sep.join(str(x) for x in row.values)) + for index_time, row in frame.iterrows(): + time_str = _time_column_to_epoch_str(index_time, epoch) + line = line_format.format(time=time_str, symbols=sep.join(str(x) for x in row.to_list())) data_lines.append(line) if not epoch: @@ -200,27 +203,27 @@ def handle_csv(frame, epoch, sep=",", message="", code=200, status="success", ** @as_json_p(optional=True) def handle_json(frame, epoch, order="columns", message="", code=200, status="success", **kwargs): + del kwargs # other kwargs used by other handlers package = {} if frame is not None and not frame.empty: # force conversion to float types so they can be json'd - for column, data_type in zip(frame.columns, frame.dtypes.values): + for column, data_type in zip(frame.columns, frame.dtypes.values, strict=True): if issubclass(data_type.type, np.integer): frame[column] = frame[column].astype(float) # replace NaNs with None - frame = frame.where(pd.notnull(frame), None) + frame = frame.where(pd.notna(frame), None) - newStamps = [] - for stamp in frame.index.values: - newStamps.append(_time_column_to_epoch_str(stamp, epoch)) - package["timestamps"] = newStamps + new_stamps = [] + for stamp in frame.index.to_list(): + new_stamps.append(_time_column_to_epoch_str(stamp, epoch)) + package["timestamps"] = new_stamps if order == "column": package["data"] = dict(frame) else: package["symbols"] = frame.columns - package["data"] = [frame.iloc[i].values for i in range(frame.shape[0])] - # package['data'] = frame.values + package["data"] = list(frame.to_dict(orient="split")["data"]) else: package["timestamps"] = [] if order == "column": @@ -240,6 +243,7 @@ def handle_json(frame, epoch, order="columns", message="", code=200, status="suc def handle_xml(frame, epoch, sep=",", message="", code=200, status="success", **kwargs): + del kwargs # other kwargs used by other handlers doc = Document() header = "metobs" @@ -293,15 +297,17 @@ def _time_column_to_epoch_str(time_val: datetime | np.datetime64 | pd.Timestamp if isinstance(time_val, str): return time_val if isinstance(time_val, np.datetime64): - time_val = time_val.astype("datetime64[us]").astype(datetime) - if isinstance(time_val, pd.Timestamp): - time_val = time_val.to_pydatetime() + time_val_dt = time_val.astype("datetime64[us]").astype(datetime) + elif isinstance(time_val, pd.Timestamp): + time_val_dt = time_val.to_pydatetime() + else: + time_val_dt = time_val if not epoch: - return time_val.strftime("%Y-%m-%dT%H:%M:%SZ") + return time_val_dt.strftime("%Y-%m-%dT%H:%M:%SZ") # NOTE: Assumes host system is UTC - time_seconds = time_val.timestamp() + time_seconds = time_val_dt.timestamp() time_in_units = data_responses.seconds_to_epoch[epoch](time_seconds) time_in_units = round(time_in_units) # round to nearest integer return f"{time_in_units:d}" @@ -310,8 +316,7 @@ def _time_column_to_epoch_str(time_val: datetime | np.datetime64 | pd.Timestamp def handle_error(fmt, error_str): handler = RESPONSE_HANDLERS[fmt] err_code, err_msg = data_responses.ERROR_MESSAGES.get(error_str, (400, error_str)) - res = handler(None, None, message=err_msg, code=err_code, status="error") - return res + return handler(None, None, message=err_msg, code=err_code, status="error") RESPONSE_HANDLERS = { @@ -347,8 +352,8 @@ def _convert_begin_and_end(begin, end) -> tuple[datetime | timedelta, datetime | # these will be either datetime or timedelta objects begin = handle_time_string(begin) end = handle_time_string(end) - except (TypeError, ValueError): - raise ValueError("malformed_timestamp") + except (TypeError, ValueError) as e: + raise ValueError("malformed_timestamp") from e return begin, end @@ -367,7 +372,7 @@ def _parse_symbol_names(site: str, inst: str, symbols: str) -> tuple[list[str] | if site and inst: # shorthand for symbols that all use the same site and inst short_symbols = symbols.split(":") - fully_qualified_symbols = ["{}.{}.{}".format(site, inst, s) for s in short_symbols] + fully_qualified_symbols = [f"{site}.{inst}.{s}" for s in short_symbols] elif not site and not inst: # each symbol is fully qualified with site.inst.symbol short_symbols = None diff --git a/metobsapi/files_api.py b/metobsapi/files_api.py index 879f1be..7c0988d 100644 --- a/metobsapi/files_api.py +++ b/metobsapi/files_api.py @@ -1,7 +1,7 @@ import logging -import os from datetime import datetime from datetime import timedelta as delta +from pathlib import Path import pandas as pd from flask import Response, render_template @@ -49,13 +49,15 @@ def get_data(dates, streams, frame=True): data = [] + root = Path(app.config["ARCHIVE_ROOT"]) + url_root = app.config["ARCHIVE_URL"] for stream_id in streams: stream_info = file_responses.ARCHIVE_STREAMS.get(stream_id) if stream_info is None: # an error occurred return stream_id, "unknown_stream" - if isinstance(stream_info, (list, tuple)): + if isinstance(stream_info, list | tuple): # special wildcard stream_id res = get_data(dates, stream_info, frame=False) data.extend(res) @@ -64,24 +66,23 @@ def get_data(dates, streams, frame=True): relpath = stream_info["relpath"] f_set = set() for dt in dates: - path = dt.strftime(relpath) - pathname = os.path.join(app.config["ARCHIVE_ROOT"], path) - if os.path.exists(pathname) and pathname not in f_set: + path = dt.strftime(str(relpath)) + pathname = root / path + if pathname.exists() and pathname not in f_set: f_set.add(pathname) file_info = stream_info.copy() - file_info["filename"] = os.path.basename(pathname) - file_info["url"] = os.path.join(app.config["ARCHIVE_URL"], path) - file_info["size"] = os.stat(pathname).st_size + file_info["filename"] = pathname.name + file_info["url"] = f"{url_root}/{path}" + file_info["size"] = pathname.stat().st_size file_info["relpath"] = path if file_info["thumbnail"] is not None: - file_info["thumbnail"] = dt.strftime(file_info["thumbnail"]) - file_info["thumbnail"] = os.path.join(app.config["ARCHIVE_URL"], file_info["thumbnail"]) + tn_filename = dt.strftime(str(file_info["thumbnail"])) + file_info["thumbnail"] = f"{url_root}/{tn_filename}" data.append(file_info) if frame: return pd.DataFrame(data) - else: - return data + return data def handle_csv(frame, message="", code=200, status="success"): @@ -98,8 +99,8 @@ def handle_csv(frame, message="", code=200, status="success"): if frame is not None and not frame.empty: frame = frame[columns] data_lines = [] - for row in frame.values: - data_lines.append(",".join(str(x) for x in row)) + for row_dict in frame.to_dict(orient="records"): + data_lines.append(",".join(str(x) for x in row_dict.values())) else: data_lines = [] rows = "\n".join(data_lines) @@ -175,15 +176,14 @@ bitsadmin /resume myDownloadJob bitsadmin /SetNotifyCmdLine myDownloadJob \"%SystemRoot%\\system32\\bitsadmin.exe\" \"%SystemRoot%\\syste,32\\bitsadmin.exe /complete myDownloadJob\" bitsadmin /monitor -""" # noqa +""" # noqa: E501 if frame is not None and not frame.empty: urls = frame["url"] directories = [] commands = [] for relpath in frame["relpath"]: - directory = os.path.dirname(relpath).replace("/", "\\") - + directory = str(Path(relpath).parent).replace("/", "\\") if directory not in directories: directories.append(directory) dir_str = """ @@ -191,7 +191,7 @@ if not exist %cd%\\data\\{directory} ( mkdir %cd%\\{directory} ) """.format( - directory=directory + directory=directory, ) commands.append(dir_str) commands.append("bitsadmin /create myDownloadJob\n") @@ -226,10 +226,7 @@ def handle_json(frame, message="", code=200, status="success"): output["num_results"] = (len(list(frame.index))) if frame is not None else 0 if frame is not None and not frame.empty: - body = [] - for row in frame.values: - new_row = dict((k, row[idx]) for idx, k in enumerate(frame.columns)) - body.append(new_row) + body = list(frame.to_dict(orient="records")) output["data"] = body else: output["data"] = [] @@ -248,7 +245,7 @@ def handle_error(fmt, error_str, stream_id=None): handler = RESPONSE_HANDLERS[fmt] err_code, err_msg = file_responses.ERROR_MESSAGES[error_str] if stream_id is not None: - err_msg += ": '{}'".format(stream_id) + err_msg += f": '{stream_id}'" res = handler(None, message=err_msg, code=err_code, status="error") return res, err_code @@ -259,7 +256,7 @@ def find_stream_files(fmt, begin_time, end_time, dates, streams): if not streams: return handle_error(fmt, "missing_streams") - elif isinstance(streams, str): + if isinstance(streams, str): streams = streams.split(":") if isinstance(dates, str): diff --git a/metobsapi/server.py b/metobsapi/server.py index 08752e1..bd62603 100644 --- a/metobsapi/server.py +++ b/metobsapi/server.py @@ -4,6 +4,7 @@ import os import sys from datetime import datetime from enum import Enum +from pathlib import Path from urllib.error import URLError from urllib.request import urlopen @@ -33,6 +34,7 @@ json = FlaskJSON(app) def enum_encoder(o): if isinstance(o, Enum): return o.value + return None # Allow for cross-domain access to the API using CORS @@ -62,12 +64,12 @@ def data_index(): @app.errorhandler(404) -def page_not_found(e): +def page_not_found(_): return render_template("404.html"), 404 @app.errorhandler(500) -def internal_server(e): +def internal_server(_): return render_template("500.html"), 500 @@ -89,9 +91,7 @@ def get_data(fmt): order = request.args.get("order", "row") epoch = request.args.get("epoch") - result = data_api.modify_data(fmt, begin_time, end_time, site, inst, symbols, interval, sep, order, epoch) - - return result + return data_api.modify_data(fmt, begin_time, end_time, site, inst, symbols, interval, sep, order, epoch) @app.route("/api/files.<fmt>", methods=["GET"]) @@ -110,7 +110,7 @@ def get_archive_info(): "code": 200, "message": "", "sites": file_responses.ARCHIVE_INFO, - } + }, ) @@ -120,21 +120,20 @@ def status_index(): def _status_dict_to_html(response): - items = "<br>\n".join("{}: {}".format(k, v) for k, v in sorted(response.items())) + items = "<br>\n".join(f"{k}: {v}" for k, v in sorted(response.items())) return """<html> <body> {} </body> </html>""".format( - items + items, ) def _status_render(response, fmt): if fmt == "json": return jsonify(response) - else: - return _status_dict_to_html(response) + return _status_dict_to_html(response) @app.route("/api/status/<site>/<inst>.<fmt>", methods=["GET"]) @@ -158,30 +157,31 @@ def get_instrument_status(site, inst=None, fmt=None): if fmt not in ["html", "json"]: return render_template("400.html", format=fmt), 400 - if inst is None: - json_subpath = os.path.join(site, "status.json") - else: - json_subpath = os.path.join(site, inst, "status.json") + site_path = Path(site) + json_subpath = site_path / "status.json" if inst is None else site_path / inst / "status.json" # try to load the JSON file from the archive - if not os.path.isfile(app.config.get("ARCHIVE_ROOT")) and app.config.get("ARCHIVE_ROOT").startswith("http"): + archive_root = app.config.get("ARCHIVE_ROOT") + if archive_root.startswith("http"): LOG.warning("Using URL request for status JSON, not meant for operational use") # we aren't on a system with the archive available, fall back to URL # loads directly to the archive base_url = app.config.get("ARCHIVE_URL") - json_url = os.path.join(base_url, json_subpath) + json_url = f"{base_url}/{json_subpath}" try: # Security: We check to ensure this is an HTTP URL as a base URL. # The server configuration is also the one setting what the root URL is. - json_str = urlopen(json_url).read() # nosec B310 + with urlopen(json_url) as json_file: + json_str = json_file.read() except URLError: - response["status_message"] = "Could not retrieve configured status: {}".format(json_url) + response["status_message"] = f"Could not retrieve configured status: {json_url}" json_str = None else: - base_path = app.config.get("ARCHIVE_ROOT") - json_path = os.path.join(base_path, json_subpath) + base_path = Path(archive_root) + json_path = base_path / json_subpath try: - json_str = open(json_path, "r").read() + with json_path.open("r") as json_file: + json_str = json_file.read() mod_time = datetime.fromtimestamp(os.path.getmtime(json_path)) except FileNotFoundError: response["status_message"] = "No status information found." @@ -199,5 +199,5 @@ def get_instrument_status(site, inst=None, fmt=None): if __name__ == "__main__": app.debug = True - bind_addr = "0.0.0.0" if len(sys.argv) <= 1 else sys.argv[0] # nosec B104 + bind_addr = "0.0.0.0" if len(sys.argv) <= 1 else sys.argv[0] # noqa: S104 app.run(bind_addr, threaded=True) diff --git a/metobsapi/tests/test_data_api.py b/metobsapi/tests/test_data_api.py index d6710a7..fb668cb 100644 --- a/metobsapi/tests/test_data_api.py +++ b/metobsapi/tests/test_data_api.py @@ -1,8 +1,10 @@ +from __future__ import annotations + import contextlib import json import random from datetime import datetime, timedelta -from typing import Any, Callable, ContextManager, Iterable, Iterator +from typing import TYPE_CHECKING, Any, ContextManager from unittest import mock import numpy as np @@ -19,7 +21,10 @@ try: except ImportError: ResultSet = None -from metobsapi.util.query_influx import QueryResult +if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Iterator + + from metobsapi.util.query_influx import QueryResult @pytest.fixture(params=["1", "2"], ids=["influxdbv1", "influxdbv2"]) @@ -27,7 +32,7 @@ def influxdb_version(request): return request.param -@pytest.fixture +@pytest.fixture() def mock_influxdb_query(influxdb_version) -> Callable[[list[dict]], ContextManager[mock.Mock]]: @contextlib.contextmanager def _mock_influxdb_query_with_fake_data(fake_result_data: list[dict]) -> Iterator[mock.Mock]: @@ -45,7 +50,8 @@ def mock_influxdb_query(influxdb_version) -> Callable[[list[dict]], ContextManag @contextlib.contextmanager def _mock_influxdb_library_availability(v1_available: bool, v2_available: bool) -> Iterator[None]: with _mock_if(not v1_available, "metobsapi.util.query_influx.InfluxDBClientv1"), _mock_if( - not v2_available, "metobsapi.util.query_influx.InfluxDBClientv2" + not v2_available, + "metobsapi.util.query_influx.InfluxDBClientv2", ): yield None @@ -79,17 +85,16 @@ def _patch_v2_query(fake_result_data: list[dict]) -> Iterator[mock.Mock]: data_row_dicts = [] for row_values in data_dict["values"]: data_row = {} - for col_name, col_value in zip(data_dict["columns"], row_values): + for col_name, col_value in zip(data_dict["columns"], row_values, strict=True): data_row[col_name] = np.nan if col_value is None else col_value data_row.update(data_dict["tags"]) data_row_dicts.append(data_row) single_df = pd.DataFrame(data_row_dicts, columns=["site", "inst"] + data_dict["columns"]) - single_df.set_index("time", inplace=True) + single_df = single_df.set_index("time") single_df = single_df.dropna(axis=1, how="all") site_data_frames.append(single_df) - # merged_df = pd.concat(site_data_frames, axis=1, copy=False) with mock.patch("metobsapi.util.query_influx._query_influxdbv2") as query_func: query_func.return_value = site_data_frames yield query_func @@ -106,27 +111,29 @@ def _fake_data_to_v1_resultset(fake_data: list[dict]) -> QueryResult: return [ResultSet({"series": [single_result], "statement_id": 0}) for single_result in result_sets] -@pytest.fixture +@pytest.fixture() def influxdb_air_temp_9_values(mock_influxdb_query) -> Iterable[None]: fake_result = _fake_data("1m", {("aoss", "tower"): ["time", "air_temp"]}, 9) - with mock_influxdb_query(fake_result): - yield + with mock_influxdb_query(fake_result) as query_func: + yield query_func -@pytest.fixture +@pytest.fixture() def influxdb_3_symbols_9_values(mock_influxdb_query) -> Iterable[None]: fake_result = _fake_data("1m", {("aoss", "tower"): ["time", "air_temp", "rel_hum", "wind_speed"]}, 9) - with mock_influxdb_query(fake_result): - yield + with mock_influxdb_query(fake_result) as query_func: + yield query_func -@pytest.fixture +@pytest.fixture() def influxdb_wind_fields_9_values(mock_influxdb_query) -> Iterable[None]: fake_result = _fake_data( - "1m", {("aoss", "tower"): ["time", "wind_speed", "wind_direction", "wind_east", "wind_north"]}, 9 + "1m", + {("aoss", "tower"): ["time", "wind_speed", "wind_direction", "wind_east", "wind_north"]}, + 9, ) - with mock_influxdb_query(fake_result): - yield + with mock_influxdb_query(fake_result) as query_func: + yield query_func def _fake_data(interval: str, symbols: dict[tuple[str, str], list[str]], num_vals: int) -> list[dict[str, Any]]: @@ -137,7 +144,7 @@ def _fake_data(interval: str, symbols: dict[tuple[str, str], list[str]], num_val tags = {"site": site, "inst": inst} vals: list[list[datetime | float | None]] = [] for i in range(num_vals): - vals.append([(now + timedelta(minutes=i))] + _generate_fake_column_values(columns[1:])) + vals.append([now + timedelta(minutes=i), *_generate_fake_column_values(columns[1:])]) # make up some Nones/nulls (but not all the time) r_idx = int(random.random() * len(columns) * 3) # don't include time (index 0) @@ -162,266 +169,279 @@ def _generate_fake_column_values(columns: list[str]) -> list[float | None]: return rand_data -@pytest.fixture +@pytest.fixture() def app(influxdb_version): from metobsapi import app as real_app real_app.config.update({"TESTING": True, "DEBUG": True}) if influxdb_version == "2": real_app.config.update({"INFLUXDB_TOKEN": "1234"}) - yield real_app + return real_app -@pytest.fixture +@pytest.fixture() def client(app): return app.test_client() -class TestDataAPI: - def test_doc(self, client): - res = client.get("/api/data") - assert b"Data Request Application" in res.data +def test_doc(client): + res = client.get("/api/data") + assert b"Data Request Application" in res.data - def test_bad_format(self, client): - res = client.get("/api/data.fake") - assert b"No data file format" in res.data - def test_bad_begin_json(self, client): - res = client.get("/api/data.json?symbols=air_temp&begin=blah") - res = json.loads(res.data.decode()) - assert res["code"] == 400 - assert res["status"] == "error" - assert "timestamp" in res["message"] +def test_bad_format(client): + res = client.get("/api/data.fake") + assert b"No data file format" in res.data - def test_bad_order(self, client): - res = client.get("/api/data.json?order=blah&symbols=air_temp") - res = json.loads(res.data.decode()) - assert "column" in res["message"] - assert "row" in res["message"] - def test_bad_epoch(self, client): - res = client.get("/api/data.json?epoch=blah&symbols=air_temp") - res = json.loads(res.data.decode()) - assert "'h'" in res["message"] - assert "'m'" in res["message"] - assert "'s'" in res["message"] - assert "'u'" in res["message"] +def test_bad_begin_json(client): + res = client.get("/api/data.json?symbols=air_temp&begin=blah") + res = json.loads(res.data.decode()) + assert res["code"] == 400 + assert res["status"] == "error" + assert "timestamp" in res["message"] - def test_bad_interval(self, client): - res = client.get("/api/data.json?interval=blah&symbols=air_temp") - res = json.loads(res.data.decode()) - assert "'1m'" in res["message"] - assert "'5m'" in res["message"] - assert "'1h'" in res["message"] - def test_missing_inst(self, client): - res = client.get("/api/data.json?site=X&symbols=air_temp&begin=-05:00:00") - res = json.loads(res.data.decode()) - assert res["code"] == 400 - assert res["status"] == "error" - assert "'site'" in res["message"] - assert "'inst'" in res["message"] +def test_bad_order(client): + res = client.get("/api/data.json?order=blah&symbols=air_temp") + res = json.loads(res.data.decode()) + assert "column" in res["message"] + assert "row" in res["message"] - def test_missing_site(self, client): - res = client.get("/api/data.json?inst=X&symbols=air_temp&begin=-05:00:00") - res = json.loads(res.data.decode()) - assert res["code"] == 400 - assert res["status"] == "error" - assert "'site'" in res["message"] - assert "'inst'" in res["message"] - def test_missing_symbols(self, client): - res = client.get("/api/data.json?begin=-05:00:00") - res = json.loads(res.data.decode()) - assert res["code"] == 400 - assert res["status"] == "error" - assert "'symbols'" in res["message"] +def test_bad_epoch(client): + res = client.get("/api/data.json?epoch=blah&symbols=air_temp") + res = json.loads(res.data.decode()) + assert "'h'" in res["message"] + assert "'m'" in res["message"] + assert "'s'" in res["message"] + assert "'u'" in res["message"] - def test_nonexistent_symbol(self, client): - res = client.get("/api/data.json?begin=-05:00:00&symbols=aoss.tower.air_temp:aoss.tower.noexist") - res = json.loads(res.data.decode()) - assert res["code"] == 400 - assert res["status"] == "error" - assert "Unknown symbol" in res["message"] - def test_bad_symbol_site_inst(self, client): - res = client.get("/api/data.json?begin=-05:00:00&symbols=aoss.tower.something.air_temp") - res = json.loads(res.data.decode()) - assert res["code"] == 400 - assert res["status"] == "error" - assert "3 period-separated parts" in res["message"] +def test_bad_interval(client): + res = client.get("/api/data.json?interval=blah&symbols=air_temp") + res = json.loads(res.data.decode()) + assert "'1m'" in res["message"] + assert "'5m'" in res["message"] + assert "'1h'" in res["message"] - def test_symbol_unknown_site_inst(self, client): - res = client.get("/api/data.json?begin=-05:00:00&symbols=aoss2.tower.air_temp") - res = json.loads(res.data.decode()) - assert res["code"] == 400 - assert res["status"] == "error" - assert "Unknown site/instrument" in res["message"] - def test_multiple_symbol(self, client): - res = client.get( - "/api/data.json?begin=-05:00:00&symbols=aoss.tower.air_temp:aoss.tower.wind_speed:aoss.tower.air_temp" - ) - res = json.loads(res.data.decode()) - assert res["code"] == 400 - assert res["status"] == "error" - assert "multiple times" in res["message"] +def test_missing_inst(client): + res = client.get("/api/data.json?site=X&symbols=air_temp&begin=-05:00:00") + res = json.loads(res.data.decode()) + assert res["code"] == 400 + assert res["status"] == "error" + assert "'site'" in res["message"] + assert "'inst'" in res["message"] - def test_too_many_points(self, client): - res = client.get("/api/data.json?symbols=aoss.tower.air_temp&begin=1970-01-01T00:00:00") - assert res.status_code == 413 - res = json.loads(res.data.decode()) - assert "too many values" in res["message"] - assert res["code"] == 413 - assert res["status"] == "fail" - def test_shorthand_one_symbol_json_row(self, client, influxdb_air_temp_9_values): - # row should be the default - res = client.get("/api/data.json?site=aoss&inst=tower&symbols=air_temp&begin=-00:10:00") - res = json.loads(res.data.decode()) - assert res["code"] == 200 - assert res["num_results"] == 9 - assert res["results"]["symbols"] == ["air_temp"] - assert len(res["results"]["timestamps"]) == 9 - assert len(res["results"]["data"]) == 9 - assert len(res["results"]["data"][0]) == 1 +def test_missing_site(client): + res = client.get("/api/data.json?inst=X&symbols=air_temp&begin=-05:00:00") + res = json.loads(res.data.decode()) + assert res["code"] == 400 + assert res["status"] == "error" + assert "'site'" in res["message"] + assert "'inst'" in res["message"] - def test_shorthand_one_symbol_json_column(self, client, influxdb_air_temp_9_values): - res = client.get("/api/data.json?site=aoss&inst=tower&symbols=air_temp&begin=-00:10:00&order=column") - res = json.loads(res.data.decode()) - assert res["code"] == 200 - assert res["num_results"] == 9 - assert "air_temp" in res["results"]["data"] - assert len(res["results"]["data"]["air_temp"]) == 9 - assert len(res["results"]["timestamps"]) == 9 - @pytest.mark.parametrize( - "symbols", - [ - ["aoss.tower.wind_speed", "aoss.tower.wind_direction"], - ["wind_speed", "wind_direction"], - ], +def test_missing_symbols(client): + res = client.get("/api/data.json?begin=-05:00:00") + res = json.loads(res.data.decode()) + assert res["code"] == 400 + assert res["status"] == "error" + assert "'symbols'" in res["message"] + + +def test_nonexistent_symbol(client): + res = client.get("/api/data.json?begin=-05:00:00&symbols=aoss.tower.air_temp:aoss.tower.noexist") + res = json.loads(res.data.decode()) + assert res["code"] == 400 + assert res["status"] == "error" + assert "Unknown symbol" in res["message"] + + +def test_bad_symbol_site_inst(client): + res = client.get("/api/data.json?begin=-05:00:00&symbols=aoss.tower.something.air_temp") + res = json.loads(res.data.decode()) + assert res["code"] == 400 + assert res["status"] == "error" + assert "3 period-separated parts" in res["message"] + + +def test_symbol_unknown_site_inst(client): + res = client.get("/api/data.json?begin=-05:00:00&symbols=aoss2.tower.air_temp") + res = json.loads(res.data.decode()) + assert res["code"] == 400 + assert res["status"] == "error" + assert "Unknown site/instrument" in res["message"] + + +def test_multiple_symbol(client): + res = client.get( + "/api/data.json?begin=-05:00:00&symbols=aoss.tower.air_temp:aoss.tower.wind_speed:aoss.tower.air_temp", ) - def test_wind_speed_direction_json(self, symbols, client, influxdb_wind_fields_9_values): - symbol_param = ":".join(symbols) - site_inst_params = "&site=aoss&inst=tower" if "." not in symbols[0] else "" + res = json.loads(res.data.decode()) + assert res["code"] == 400 + assert res["status"] == "error" + assert "multiple times" in res["message"] + + +def test_too_many_points(client): + res = client.get("/api/data.json?symbols=aoss.tower.air_temp&begin=1970-01-01T00:00:00") + assert res.status_code == 413 + res = json.loads(res.data.decode()) + assert "too many values" in res["message"] + assert res["code"] == 413 + assert res["status"] == "fail" + + +def test_shorthand_one_symbol_json_row(client, influxdb_air_temp_9_values): + # row should be the default + res = client.get("/api/data.json?site=aoss&inst=tower&symbols=air_temp&begin=-00:10:00") + res = json.loads(res.data.decode()) + assert res["code"] == 200 + assert res["num_results"] == 9 + assert res["results"]["symbols"] == ["air_temp"] + assert len(res["results"]["timestamps"]) == 9 + assert len(res["results"]["data"]) == 9 + assert len(res["results"]["data"][0]) == 1 + + +def test_shorthand_one_symbol_json_column(client, influxdb_air_temp_9_values): + res = client.get("/api/data.json?site=aoss&inst=tower&symbols=air_temp&begin=-00:10:00&order=column") + res = json.loads(res.data.decode()) + assert res["code"] == 200 + assert res["num_results"] == 9 + assert "air_temp" in res["results"]["data"] + assert len(res["results"]["data"]["air_temp"]) == 9 + assert len(res["results"]["timestamps"]) == 9 + + +@pytest.mark.parametrize( + "symbols", + [ + ["aoss.tower.wind_speed", "aoss.tower.wind_direction"], + ["wind_speed", "wind_direction"], + ], +) +def test_wind_speed_direction_json(symbols, client, influxdb_wind_fields_9_values): + symbol_param = ":".join(symbols) + site_inst_params = "&site=aoss&inst=tower" if "." not in symbols[0] else "" + + res = client.get(f"/api/data.json?symbols={symbol_param}{site_inst_params}&begin=-00:10:00&order=column") + res = json.loads(res.data.decode()) + assert res["code"] == 200 + assert res["num_results"] == 9 + for symbol_name in symbols: + assert symbol_name in res["results"]["data"] + assert not np.isnan(res["results"]["data"][symbol_name]).all() + assert len(list(res["results"]["data"].keys())) == len(symbols) + + +def test_one_symbol_two_insts_json_row(client, mock_influxdb_query): + fake_result = _fake_data( + "1m", + { + ("aoss", "tower"): ["time", "air_temp"], + ("mendota", "buoy"): ["time", "air_temp"], + }, + 9, + ) + with mock_influxdb_query(fake_result): + # row should be the default + res = client.get("/api/data.json?symbols=aoss.tower.air_temp:mendota.buoy.air_temp&begin=-00:10:00") + res = json.loads(res.data.decode()) + assert res["code"] == 200 + assert res["num_results"] == 9 + assert res["results"]["symbols"] == ["aoss.tower.air_temp", "mendota.buoy.air_temp"] + assert len(res["results"]["timestamps"]) == 9 + assert len(res["results"]["data"]) == 9 + assert len(res["results"]["data"][0]) == 2 - res = client.get(f"/api/data.json?symbols={symbol_param}{site_inst_params}&begin=-00:10:00&order=column") - res = json.loads(res.data.decode()) - assert res["code"] == 200 - assert res["num_results"] == 9 - for symbol_name in symbols: - assert symbol_name in res["results"]["data"] - assert not np.isnan(res["results"]["data"][symbol_name]).all() - assert len(list(res["results"]["data"].keys())) == len(symbols) - - def test_one_symbol_two_insts_json_row(self, client, mock_influxdb_query): - fake_result = _fake_data( - "1m", - { - ("aoss", "tower"): ["time", "air_temp"], - ("mendota", "buoy"): ["time", "air_temp"], - }, - 9, + +def test_one_symbol_three_insts_json_row(client, mock_influxdb_query): + fake_result = _fake_data( + "1m", + { + ("site1", "inst1"): ["time", "air_temp"], + ("site2", "inst2"): ["time", "air_temp"], + ("site3", "inst3"): ["time", "air_temp"], + }, + 9, + ) + # row should be the default + from metobsapi.util.data_responses import SYMBOL_TRANSLATIONS + + st = SYMBOL_TRANSLATIONS.copy() + st[("site1", "inst1")] = st[("aoss", "tower")] + st[("site2", "inst2")] = st[("aoss", "tower")] + st[("site3", "inst3")] = st[("aoss", "tower")] + with mock.patch("metobsapi.util.data_responses.SYMBOL_TRANSLATIONS", st), mock_influxdb_query(fake_result): + res = client.get( + "/api/data.json?symbols=site1.inst1.air_temp:site2.inst2.air_temp:site3.inst3.air_temp&begin=-00:10:00", ) - with mock_influxdb_query(fake_result): - # row should be the default - res = client.get("/api/data.json?symbols=aoss.tower.air_temp:mendota.buoy.air_temp&begin=-00:10:00") res = json.loads(res.data.decode()) assert res["code"] == 200 assert res["num_results"] == 9 - assert res["results"]["symbols"] == ["aoss.tower.air_temp", "mendota.buoy.air_temp"] + assert res["results"]["symbols"] == ["site1.inst1.air_temp", "site2.inst2.air_temp", "site3.inst3.air_temp"] assert len(res["results"]["timestamps"]) == 9 assert len(res["results"]["data"]) == 9 - assert len(res["results"]["data"][0]) == 2 - - def test_one_symbol_three_insts_json_row(self, client, mock_influxdb_query): - fake_result = _fake_data( - "1m", - { - ("site1", "inst1"): ["time", "air_temp"], - ("site2", "inst2"): ["time", "air_temp"], - ("site3", "inst3"): ["time", "air_temp"], - }, - 9, - ) - # row should be the default - from metobsapi.util.data_responses import SYMBOL_TRANSLATIONS as st - - st = st.copy() - st[("site1", "inst1")] = st[("aoss", "tower")] - st[("site2", "inst2")] = st[("aoss", "tower")] - st[("site3", "inst3")] = st[("aoss", "tower")] - with mock.patch("metobsapi.util.data_responses.SYMBOL_TRANSLATIONS", st), mock_influxdb_query(fake_result): - res = client.get( - "/api/data.json?symbols=site1.inst1.air_temp:site2.inst2.air_temp:site3.inst3.air_temp&begin=-00:10:00" - ) - res = json.loads(res.data.decode()) - assert res["code"] == 200 - assert res["num_results"] == 9 - assert res["results"]["symbols"] == ["site1.inst1.air_temp", "site2.inst2.air_temp", "site3.inst3.air_temp"] - assert len(res["results"]["timestamps"]) == 9 - assert len(res["results"]["data"]) == 9 - assert len(res["results"]["data"][0]) == 3 - - def test_one_symbol_csv(self, client, influxdb_air_temp_9_values): - # row should be the default - res = client.get("/api/data.csv?symbols=aoss.tower.air_temp&begin=-00:10:00") - res = res.data.decode() - # header, data, newline at end - lines = res.split("\n") - assert len(lines) == 5 + 9 + 1 - # time + 1 channel - assert len(lines[5].split(",")) == 2 - assert "# code: 200" in res - - def test_one_symbol_xml(self, client, influxdb_air_temp_9_values): - from xml.dom.minidom import parseString + assert len(res["results"]["data"][0]) == 3 - # row should be the default - res = client.get("/api/data.xml?symbols=aoss.tower.air_temp&begin=-00:10:00") - res = parseString(res.data.decode()) - # symbols: time and air_temp - assert len(res.childNodes[0].childNodes[0].childNodes) == 2 - # data rows - assert len(res.childNodes[0].childNodes[1].childNodes) == 9 - - def test_three_symbol_csv(self, client, influxdb_3_symbols_9_values): - """Test that multiple channels in a CSV file are structured properly.""" - # row should be the default - res = client.get( - "/api/data.csv?symbols=aoss.tower.air_temp:" "aoss.tower.rel_hum:aoss.tower.wind_speed&begin=-00:10:00" - ) - res = res.data.decode() - # header, data, newline at end - lines = res.split("\n") - assert len(lines) == 5 + 9 + 1 - # time + 3 channels - assert len(lines[5].split(",")) == 4 - assert "# code: 200" in res - - def test_three_symbol_csv_repeat(self, client, influxdb_3_symbols_9_values): - """Test that multiple channels in a CSV file are structured properly.""" - # row should be the default - res = client.get( - "/api/data.csv?symbols=aoss.tower.air_temp:" "aoss.tower.air_temp:aoss.tower.air_temp&begin=-00:10:00" - ) - res = res.data.decode() - # header, data, newline at end - lines = res.split("\n") - # header, data (one empty line), newline at end - assert len(lines) == 5 + 1 + 1 - # time + 1 channel - assert len(lines[5].split(",")) == 1 - assert "# code: 400" in res - - # @mock.patch('metobsapi.data_api.query') - # def test_jsonp_bad_symbol_400(self, query_func, client): - # XXX: Not currently possible with flask-json - # r = _fake_data('1m', {('aoss', 'tower'): ['time', 'air_temp']}, 9) - # query_func.return_value = r - # # row should be the default - # res = client.get('/api/data.json?site=aoss&inst=tower&symbols=bad&begin=-00:10:00&callback=test') - # assert res.status_code == 400 - # res = res.data.decode() - # assert res['code'] == 400 + +def test_one_symbol_csv(client, influxdb_air_temp_9_values): + # row should be the default + res = client.get("/api/data.csv?symbols=aoss.tower.air_temp&begin=-00:10:00") + res = res.data.decode() + # header, data, newline at end + lines = res.split("\n") + assert len(lines) == 5 + 9 + 1 + # time + 1 channel + assert len(lines[5].split(",")) == 2 + assert "# code: 200" in res + + +def test_one_symbol_xml(client, influxdb_air_temp_9_values): + from xml.dom.minidom import parseString + + # row should be the default + res = client.get("/api/data.xml?symbols=aoss.tower.air_temp&begin=-00:10:00") + res = parseString(res.data.decode()) + assert len(res.childNodes[0].childNodes[0].childNodes) == 2 + # data rows + assert len(res.childNodes[0].childNodes[1].childNodes) == 9 + + +def test_three_symbol_csv(client, influxdb_3_symbols_9_values): + """Test that multiple channels in a CSV file are structured properly.""" + # row should be the default + res = client.get( + "/api/data.csv?symbols=aoss.tower.air_temp:aoss.tower.rel_hum:aoss.tower.wind_speed&begin=-00:10:00", + ) + res = res.data.decode() + # header, data, newline at end + lines = res.split("\n") + assert len(lines) == 5 + 9 + 1 + # time + 3 channels + assert len(lines[5].split(",")) == 4 + assert "# code: 200" in res + + +def test_three_symbol_csv_repeat(client, influxdb_3_symbols_9_values): + """Test that multiple channels in a CSV file are structured properly.""" + # row should be the default + res = client.get( + "/api/data.csv?symbols=aoss.tower.air_temp:aoss.tower.air_temp:aoss.tower.air_temp&begin=-00:10:00", + ) + res = res.data.decode() + # header, data, newline at end + lines = res.split("\n") + # header, data (one empty line), newline at end + assert len(lines) == 5 + 1 + 1 + # time + 1 channel + assert len(lines[5].split(",")) == 1 + assert "# code: 400" in res + + +def test_temporary(tmp_path): + assert True diff --git a/metobsapi/tests/test_files_api.py b/metobsapi/tests/test_files_api.py index 0d9f930..32fe14b 100644 --- a/metobsapi/tests/test_files_api.py +++ b/metobsapi/tests/test_files_api.py @@ -18,8 +18,6 @@ class TestFilesAPI(unittest.TestCase): now = datetime.utcnow() cls._datetimes = [now, now - timedelta(days=1), now - timedelta(days=2)] create_fake_archive(file_responses.ARCHIVE_INFO, root=cls.archive_dir, datetimes=cls._datetimes) - # import subprocess - # subprocess.check_call(['/opt/local/bin/tree', cls.archive_dir]) @classmethod def tearDownClass(cls): @@ -40,19 +38,19 @@ class TestFilesAPI(unittest.TestCase): def test_bad_format(self): res = self.app.get("/api/files.fake") - self.assertIn(b"No data file format", res.data) + assert b"No data file format" in res.data def test_missing_streams(self): res = self.app.get("/api/files.json") - self.assertIn(b"stream", res.data) + assert b"stream" in res.data def test_bad_begin(self): res = self.app.get("/api/files.json?streams=test&begin=bad") - self.assertIn(b"timestamp", res.data) + assert b"timestamp" in res.data def test_bad_symbol(self): res = self.app.get("/api/files.json?streams=test") - self.assertIn(b"stream", res.data) + assert b"stream" in res.data def test_tower_daily_ascii_csv(self): res = self.app.get("/api/files.csv?streams=aoss.tower.ascii.l00.*") @@ -77,7 +75,7 @@ class TestFilesAPI(unittest.TestCase): def test_tower_daily_ascii_dated_json(self): dt = self._datetimes[1] begin = dt.strftime("%Y-%m-%d") - res = self.app.get("/api/files.json?streams=aoss.tower.ascii.l00.*&begin={}".format(begin)) + res = self.app.get(f"/api/files.json?streams=aoss.tower.ascii.l00.*&begin={begin}") fn = bytes(dt.strftime("aoss_tower.%Y-%m-%d.ascii"), encoding="utf-8") assert fn in res.data @@ -90,7 +88,7 @@ class TestFilesAPI(unittest.TestCase): """ dt = self._datetimes[1] begin = dt.strftime("%Y-%m-%d") - res = self.app.get("/api/files.json?streams=aoss.tower.ascii.l00.*&begin=-2&end={}".format(begin)) + res = self.app.get(f"/api/files.json?streams=aoss.tower.ascii.l00.*&begin=-2&end={begin}") for dt in self._datetimes[1:]: fn = bytes(dt.strftime("aoss_tower.%Y-%m-%d.ascii"), encoding="utf-8") assert fn in res.data @@ -115,7 +113,6 @@ class TestFilesAPI(unittest.TestCase): for dt in self._datetimes: fn = bytes(dt.strftime("aoss_tower.%Y-%m-%d.ascii"), encoding="utf-8") assert fn in res.data - # fn = bytes(self._datetimes[0].strftime('aoss_tower.%Y-%m-%d.ascii'), encoding='utf-8') # assert fn not in res.data def test_tower_all_patterns(self): diff --git a/metobsapi/tests/test_misc.py b/metobsapi/tests/test_misc.py index 5b0b275..ccfe00a 100644 --- a/metobsapi/tests/test_misc.py +++ b/metobsapi/tests/test_misc.py @@ -3,18 +3,7 @@ import shutil import tempfile import unittest -# class TestErrorHandlers(unittest.TestCase): -# def setUp(self): -# import metobsapi -# metobsapi.app.config['TESTING'] = True -# metobsapi.app.config['DEBUG'] = True -# self.app = metobsapi.app.test_client() -# -# def test_internal_server(self): -# with unittest.mock.patch('metobsapi.data_api.modify_data') as m: -# m.side_effect = ValueError("Random Test Error") -# res = self.app.get('/api/data.json') -# pass +SUCCESS_CODE = 200 class TestIndex(unittest.TestCase): @@ -30,7 +19,7 @@ class TestIndex(unittest.TestCase): def test_index(self): res = self.app.get("/api/") - self.assertIn(b"Metobs API", res.data) + assert b"Metobs API" in res.data class TestArchiveInfo(unittest.TestCase): @@ -45,8 +34,6 @@ class TestArchiveInfo(unittest.TestCase): now = datetime.utcnow() cls._datetimes = [now, now - timedelta(days=1), now - timedelta(days=2)] create_fake_archive(file_responses.ARCHIVE_INFO, root=cls.archive_dir, datetimes=cls._datetimes) - # import subprocess - # subprocess.check_call(['/opt/local/bin/tree', cls.archive_dir]) @classmethod def tearDownClass(cls): @@ -66,8 +53,8 @@ class TestArchiveInfo(unittest.TestCase): def test_archive_info(self): res = self.app.get("/api/archive/info") res = json.loads(res.data.decode()) - self.assertEqual(res["code"], 200) - self.assertIn("sites", res) + assert res["code"] == SUCCESS_CODE + assert "sites" in res if __name__ == "__main__": diff --git a/metobsapi/util/__init__.py b/metobsapi/util/__init__.py index dc57c9b..a889cf5 100644 --- a/metobsapi/util/__init__.py +++ b/metobsapi/util/__init__.py @@ -1,12 +1,11 @@ -#!/usr/bin/env python """Utility functions to help with testing and running hte MetObs API.""" -import os from datetime import datetime from enum import Enum +from pathlib import Path -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) -FAKE_ARCHIVE_PATH = os.path.join(SCRIPT_DIR, "..", "..", "fake_archive") +SCRIPT_DIR = Path(__file__).resolve().parent +FAKE_ARCHIVE_PATH = SCRIPT_DIR / ".." / ".." / "fake_archive" class ProductFrequency(Enum): @@ -22,10 +21,10 @@ class ProductFrequency(Enum): # Directory format for the type of data file frequency FREQUENCY_DIR_FMT = { - ProductFrequency.DAILY_DIR: os.path.join("%Y", "%m", "%d"), - ProductFrequency.DAILY_FILE: os.path.join("%Y", "%m", "%d"), - ProductFrequency.MONTHLY_DIR: os.path.join("%Y", "%m"), - ProductFrequency.MONTHLY_FILE: os.path.join("%Y", "%m"), + ProductFrequency.DAILY_DIR: Path("%Y", "%m", "%d"), + ProductFrequency.DAILY_FILE: Path("%Y", "%m", "%d"), + ProductFrequency.MONTHLY_DIR: Path("%Y", "%m"), + ProductFrequency.MONTHLY_FILE: Path("%Y", "%m"), } @@ -33,34 +32,23 @@ def create_fake_archive(archive_info, root=FAKE_ARCHIVE_PATH, datetimes=None): if datetimes is None: datetimes = [datetime.utcnow()] - curr_dir = os.getcwd() - os.makedirs(root, exist_ok=True) - os.chdir(root) + _generate_archive_paths(Path(root), archive_info, datetimes) + + +def _generate_archive_paths(root: Path, archive_info: dict, datetimes: list[datetime]) -> None: for site, site_info in archive_info.items(): - os.makedirs(site, exist_ok=True) - os.chdir(site) for inst, inst_info in site_info["instruments"].items(): - os.makedirs(inst, exist_ok=True) - os.chdir(inst) for level_name, level_info in inst_info["levels"].items(): - os.makedirs(level_name, exist_ok=True) - os.chdir(level_name) for version_name in level_info["versions"]: - os.makedirs(version_name, exist_ok=True) - os.chdir(version_name) for dt in datetimes: for pattern_info in level_info["products"].values(): if pattern_info["frequency"] not in FREQUENCY_DIR_FMT: - raise RuntimeError("Unknown frequency '%s'", pattern_info["frequency"]) + msg = f"Unknown frequency {pattern_info['frequency']!r}" + raise RuntimeError(msg) fmt = FREQUENCY_DIR_FMT[pattern_info["frequency"]] - dated_dir = dt.strftime(fmt) - os.makedirs(dated_dir, exist_ok=True) - os.chdir(dated_dir) - open(dt.strftime(pattern_info["pattern"]), "a").close() - os.chdir("../" * (fmt.count(os.sep) + 1)) - os.chdir("..") - os.chdir("..") - os.chdir("..") - os.chdir("..") - os.chdir(curr_dir) + dated_dir = dt.strftime(str(fmt)) + file_parent_dir = root / site / inst / level_name / version_name / dated_dir + file_parent_dir.mkdir(parents=True, exist_ok=True) + fake_file = file_parent_dir / dt.strftime(str(pattern_info["pattern"])) + fake_file.touch() diff --git a/metobsapi/util/data_responses.py b/metobsapi/util/data_responses.py index 849e8db..9be6a71 100644 --- a/metobsapi/util/data_responses.py +++ b/metobsapi/util/data_responses.py @@ -101,11 +101,11 @@ ERROR_MESSAGES = { "bad_order": (400, "'order' can only be 'column' or 'row' (default)"), "bad_epoch": ( 400, - "'epoch' can only be unspecified or {}".format(", ".join(["'{}'".format(x) for x in epoch_keys])), + "'epoch' can only be unspecified or {}".format(", ".join([f"'{x}'" for x in epoch_keys])), ), "bad_interval": ( 400, - "'interval' can only be unspecified or {}".format(", ".join(["'{}'".format(x) for x in INTERVALS.keys()])), + "'interval' can only be unspecified or {}".format(", ".join([f"'{x}'" for x in INTERVALS])), ), "malformed_timestamp": (400, "could not parse timestamp parameters 'begin' or 'end', check format"), "missing_symbols": (400, "'symbols' must be specified"), diff --git a/metobsapi/util/file_responses.py b/metobsapi/util/file_responses.py index e0b3137..2fcef97 100644 --- a/metobsapi/util/file_responses.py +++ b/metobsapi/util/file_responses.py @@ -1,12 +1,12 @@ """Configuration for file-based API responses.""" -import os from collections import defaultdict +from pathlib import Path from typing import TypedDict from metobsapi.util import FREQUENCY_DIR_FMT, ProductFrequency # ARM Data Levels: https://www.arm.gov/policies/datapolicies/formatting-and-file-naming-protocols -L00_DESCRIPTION = "raw data – primary raw data stream collected directly from instrument" +L00_DESCRIPTION = "raw data - primary raw data stream collected directly from instrument" LA0_DESCRIPTION = "converted to netCDF" L11_DESCRIPTION = "calibration factors applied and converted to geophysical units" LB1_DESCRIPTION = "QC checks applied to measurements" @@ -46,9 +46,6 @@ ARCHIVE_INFO: dict[str, SiteInfo] = { "versions": ("version_00",), "products": { # 'nc-monthly': { - # 'frequency': ProductFrequency.MONTHLY_DIR, - # 'pattern': 'aoss_tower.%Y-%m.nc', - # 'display_name': 'Monthly NetCDF file (aoss_tower.YYYY-MM.nc)', # }, "nc-daily": { "frequency": ProductFrequency.DAILY_FILE, @@ -164,7 +161,7 @@ for file_suffix in ( product_id = parts[1].lower() + "-" + parts[0].lower() nfo = { "frequency": ProductFrequency.DAILY_DIR, - "pattern": "%y%m%d{}".format(file_suffix), + "pattern": f"%y%m%d{file_suffix}", } ARCHIVE_INFO["aoss"]["instruments"]["aeri"]["levels"]["level_00"]["products"][product_id] = nfo @@ -189,13 +186,13 @@ for site, site_info in ARCHIVE_INFO.items(): ) all_products.append(stream_id) - path = os.path.join( - site, - inst, - level, - version, - FREQUENCY_DIR_FMT[pattern_info["frequency"]], - pattern_info["pattern"], + path = ( + Path(site) + / inst + / level + / version + / FREQUENCY_DIR_FMT[pattern_info["frequency"]] + / pattern_info["pattern"] ) stream_info = { @@ -207,8 +204,13 @@ for site, site_info in ARCHIVE_INFO.items(): "file_pattern": pattern_info["pattern"], } if "thumbnail_pattern" in pattern_info: - stream_info["thumbnail"] = path.replace( - pattern_info["pattern"], pattern_info["thumbnail_pattern"] + stream_info["thumbnail"] = ( + Path(site) + / inst + / level + / version + / FREQUENCY_DIR_FMT[pattern_info["frequency"]] + / pattern_info["thumbnail_pattern"] ) else: stream_info["thumbnail"] = None diff --git a/metobsapi/util/query_influx.py b/metobsapi/util/query_influx.py index a1fbb80..317fbd2 100644 --- a/metobsapi/util/query_influx.py +++ b/metobsapi/util/query_influx.py @@ -27,8 +27,7 @@ QueryResult: TypeAlias = list def query(symbols, begin, end, interval, epoch): handler = QueryHandler(symbols, begin, end, interval) - result = handler.query(epoch) - return result + return handler.query(epoch) class QueryHandler: @@ -48,11 +47,11 @@ class QueryHandler: def query(self, epoch): if not self.is_v2_db and InfluxDBClientv1 is None: - raise ImportError("Missing 'influxdb' dependency to communicate with InfluxDB v1 database") + msg = "Missing 'influxdb' dependency to communicate with InfluxDB v1 database" + raise ImportError(msg) if InfluxDBClientv1 is None and InfluxDBClientv2 is None: - raise ImportError( - "Missing 'influxdb-client' and legacy 'influxdb' dependencies to communicate " "with InfluxDB database" - ) + msg = "Missing 'influxdb-client' and legacy 'influxdb' dependencies to communicate with InfluxDB database" + raise ImportError(msg) if InfluxDBClientv2 is None: # fallback to v1 library @@ -62,7 +61,7 @@ class QueryHandler: # version 2 library query_str = _build_queries_v2(self._symbols, self._begin, self._end, self._interval) - result = _query_influxdbv2(query_str, epoch) + result = _query_influxdbv2(query_str) return self._convert_v2_result_to_api_dataframe(result) def _convert_v1_result_to_dataframe(self, result: QueryResult) -> pd.DataFrame: @@ -71,11 +70,11 @@ class QueryHandler: # we expect one result for each site/inst combination res = result[idx] data_points = res.get_points("metobs_" + self._interval, tags={"site": site, "inst": inst}) - frame = pd.DataFrame(data_points, columns=["time"] + influx_symbols) + frame = pd.DataFrame(data_points, columns=["time", *influx_symbols]) # rename channels to be site-based (time will be the index) frame.columns = ["time"] + [f"{site}.{inst}.{col_name}" for col_name in frame.columns[1:]] - frame.set_index("time", inplace=True) - frame.fillna(value=np.nan, inplace=True) + frame = frame.set_index("time") + frame = frame.fillna(value=np.nan) frames.append(frame) frame = pd.concat(frames, axis=1, copy=False) return frame @@ -85,7 +84,7 @@ class QueryHandler: for (site, inst), symbol_names in self._symbols.items(): frames_for_inst = [df for df in result if df["site"][0] == site and df["inst"][0] == inst] if not frames_for_inst: - data_frame = pd.DataFrame(columns=["_time"] + symbol_names) + data_frame = pd.DataFrame(columns=["_time", *symbol_names]) data_frame = data_frame.set_index("_time") else: data_frame = frames_for_inst[0] @@ -102,14 +101,14 @@ def _build_queries_v1(symbols, begin, end, interval): end = parse_dt_v1(end) where_clause = [] - where_clause.append("time >= {}".format(begin)) - where_clause.append("time <= {}".format(end)) + where_clause.append(f"time >= {begin}") + where_clause.append(f"time <= {end}") queries = [] for (site, inst), symbol_names in symbols.items(): wc = where_clause.copy() - wc.append("site='{}'".format(site)) - wc.append("inst='{}'".format(inst)) + wc.append(f"site='{site}'") + wc.append(f"inst='{inst}'") query = QUERY_FORMAT.format( symbol_list=", ".join(symbol_names), interval=interval, @@ -123,19 +122,19 @@ def _build_queries_v1(symbols, begin, end, interval): def parse_dt_v1(d): if d is None: return "now()" - elif isinstance(d, timedelta): - return "now() - {:d}s".format(int(d.total_seconds())) - else: - return d.strftime("'%Y-%m-%dT%H:%M:%SZ'") + if isinstance(d, timedelta): + total_seconds = int(d.total_seconds()) + return f"now() - {total_seconds:d}s" + return d.strftime("'%Y-%m-%dT%H:%M:%SZ'") def parse_dt_v2(d): if d is None: return "now()" - elif isinstance(d, timedelta): - return "-{:d}s".format(int(d.total_seconds())) - else: - return d.strftime("'%Y-%m-%dT%H:%M:%SZ'") + if isinstance(d, timedelta): + total_seconds = int(d.total_seconds()) + return f"-{total_seconds:d}s" + return d.strftime("'%Y-%m-%dT%H:%M:%SZ'") def _query_influxdbv1(query_str, epoch) -> QueryResult: @@ -157,7 +156,7 @@ def _query_influxdbv1(query_str, epoch) -> QueryResult: ) res = client.query(query_str, epoch=epoch) if not isinstance(res, list): - res = [res] + return [res] return res @@ -191,19 +190,15 @@ from(bucket:"metobs/forever") stop=range_stop, ) queries.append(this_query) - query_str = "".join(queries) - return query_str + return "".join(queries) -def _query_influxdbv2(query_str, epoch) -> QueryResult: +def _query_influxdbv2(query_str) -> QueryResult: client_kwargs = { "url": "http://{}:{}".format(current_app.config["INFLUXDB_HOST"], current_app.config["INFLUXDB_PORT"]), "org": current_app.config["INFLUXDB_ORG"], } - client_kwargs["token"] = current_app.config[ - "INFLUXDB_TOKEN" - ] # "HSHUoHr1MHIIvQrh7Wq-MrWorg1r5FqQegiuUKjGVDtf355eynUo8mNdTtpc2eG7oOw5p6xjlh-isRDQUuXWyA==" - print(client_kwargs) + client_kwargs["token"] = current_app.config["INFLUXDB_TOKEN"] with InfluxDBClientv2(**client_kwargs) as client: query_api = client.query_api() data_frame = query_api.query_data_frame(query_str, data_frame_index="_time") diff --git a/pyproject.toml b/pyproject.toml index d3c8c56..b4e7b00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,9 +26,26 @@ exclude = ''' ''' +[tool.ruff] +line-length = 120 +select = [ + "E", "F", "W", "C90", "I", "N", "D", "UP", "YTT", "S", "BLE", "B", "A", + "COM", "C4", "T10", "EXE", "ISC", "ICN", "INP", "PIE", "T20", "PYI", "PT", + "Q", "RSE", "RET", "SLF", "SIM", "TID", "TCH", "ARG", "PTH", "ERA", "PD", + "PGH", "PL", "TRY", "NPY", "RUF", +] +ignore = ["D100", "D101", "D102", "D103", "D104", "D106", "D107", "D203", "D213", "B008"] + +[tool.ruff.per-file-ignores] +"metobsapi/tests/*" = ["S", "ARG001", "ARG002", "PLR2004"] + [tool.mypy] python_version = "3.10" +[tool.coverage] +relative_files = true +omit = ["metobsapi/version.py"] + [project] name = "metobsapi" authors = [ diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 207d834..0000000 --- a/setup.cfg +++ /dev/null @@ -1,8 +0,0 @@ -[flake8] -max-line-length = 120 -ignore = D100,D101,D102,D103,D104,D106,D107,W503,E203,B008 - -[coverage:run] -relative_files = True -omit = - metobsapi/version.py -- GitLab