David Hoese · fd07d99b
--- a/mendotabuoy/level_b1/nc.py

+ 40

− 0
+++ b/mendotabuoy/level_b1/nc.py

+ 40

− 0
 @@ -8,6 +8,7 @@ import logging
 @@ -8,6 +8,7 @@ import logging
 import platform
 import pandas as pd
 from datetime import datetime
+from copy import deepcopy
 from mendotabuoy.meta import station as station_info, database_dict, db_rename, created_vars
 from mendotabuoy.level_00.metdata import MetdataNarrator, Metdata2MinNarrator
 from mendotabuoy.level_00.limnodata import LimnoDataNarrator
 @@ -93,6 +94,8 @@ def filter_unused_columns(frame, database):
 @@ -93,6 +94,8 @@ def filter_unused_columns(frame, database):
            # this key is created later
            continue
        elif isinstance(frame_key, (list, tuple)):
+            # the NetCDF variable could be multiple inputs (water temp)
+            # or we could have multiple options for which variable to use
            useful_vars.extend(frame_key)
        else:
            useful_vars.append(frame_key)
 @@ -175,12 +178,49 @@ def _get_data_frame(input_files):
 @@ -175,12 +178,49 @@ def _get_data_frame(input_files):
    return joined_frame
+def choose_database_variables(frame, database):
+    """Choose which one of multiple historical variables to use."""
+    for netcdf_var_name, var_info in list(database.items()):
+        # use 'name' for summary file
+        # otherwise 'name' should be equal to k
+        frame_key = var_info.get('_var', var_info['name'])
+        is_multi_var = var_info.get('_multi_var', False)
+        if is_multi_var:
+            continue
+        if frame_key is None or not isinstance(frame_key, (list, tuple)):
+            continue
+        # figure out which variable of our choices is in the frame
+        for idx, single_key in enumerate(frame_key):
+            if single_key in frame:
+                # this is the one we'll use
+                var_info = var_info.copy()
+                var_info['_var'] = single_key
+                # other metadata to filter based on this choice
+                if isinstance(var_info.get('units'), (list, tuple)):
+                    var_info['units'] = var_info['units'][idx]
+                LOG.info("Choosing level 00 field {} for netcdf variable "
+                         "{}".format(single_key, netcdf_var_name))
+                database[netcdf_var_name] = var_info
+                break
+        else:
+            # none of these variables were found (may not currently exist)
+            LOG.info("Removing {} from possible netcdf variables since it "
+                     "doesn't exist in the data frame".format(netcdf_var_name))
+            del database[netcdf_var_name]
 def create_giant_netcdf(input_files, output_fn, zlib, chunk_size,
                        start=None, end=None, interval_width=None,
                        summary=False,
                        database=database_dict, datastream=None):
+    database = deepcopy(database)
    depths = pd.Series(database['depth']['_values'], name='depth')
    frame = _get_data_frame(input_files)
+    # rewrite database with available variables
+    choose_database_variables(frame, database)
    # drop unused variables
    frame = filter_unused_columns(frame, database)
    # rename columns to netcdf friendly names