Skip to content
Snippets Groups Projects

Add ability for netcdf generation to find the best historical variable

Closed David Hoese requested to merge davidh/MendotaBuoy:bugfix-nc-gen into master
2 files
+ 45
4
Compare changes
  • Side-by-side
  • Inline
Files
2
+ 40
0
@@ -8,6 +8,7 @@ import logging
@@ -8,6 +8,7 @@ import logging
import platform
import platform
import pandas as pd
import pandas as pd
from datetime import datetime
from datetime import datetime
 
from copy import deepcopy
from mendotabuoy.meta import station as station_info, database_dict, db_rename, created_vars
from mendotabuoy.meta import station as station_info, database_dict, db_rename, created_vars
from mendotabuoy.level_00.metdata import MetdataNarrator, Metdata2MinNarrator
from mendotabuoy.level_00.metdata import MetdataNarrator, Metdata2MinNarrator
from mendotabuoy.level_00.limnodata import LimnoDataNarrator
from mendotabuoy.level_00.limnodata import LimnoDataNarrator
@@ -93,6 +94,8 @@ def filter_unused_columns(frame, database):
@@ -93,6 +94,8 @@ def filter_unused_columns(frame, database):
# this key is created later
# this key is created later
continue
continue
elif isinstance(frame_key, (list, tuple)):
elif isinstance(frame_key, (list, tuple)):
 
# the NetCDF variable could be multiple inputs (water temp)
 
# or we could have multiple options for which variable to use
useful_vars.extend(frame_key)
useful_vars.extend(frame_key)
else:
else:
useful_vars.append(frame_key)
useful_vars.append(frame_key)
@@ -175,12 +178,49 @@ def _get_data_frame(input_files):
@@ -175,12 +178,49 @@ def _get_data_frame(input_files):
return joined_frame
return joined_frame
 
def choose_database_variables(frame, database):
 
"""Choose which one of multiple historical variables to use."""
 
for netcdf_var_name, var_info in list(database.items()):
 
# use 'name' for summary file
 
# otherwise 'name' should be equal to k
 
frame_key = var_info.get('_var', var_info['name'])
 
is_multi_var = var_info.get('_multi_var', False)
 
if is_multi_var:
 
continue
 
 
if frame_key is None or not isinstance(frame_key, (list, tuple)):
 
continue
 
 
# figure out which variable of our choices is in the frame
 
for idx, single_key in enumerate(frame_key):
 
if single_key in frame:
 
# this is the one we'll use
 
var_info = var_info.copy()
 
var_info['_var'] = single_key
 
 
# other metadata to filter based on this choice
 
if isinstance(var_info.get('units'), (list, tuple)):
 
var_info['units'] = var_info['units'][idx]
 
LOG.info("Choosing level 00 field {} for netcdf variable "
 
"{}".format(single_key, netcdf_var_name))
 
database[netcdf_var_name] = var_info
 
break
 
else:
 
# none of these variables were found (may not currently exist)
 
LOG.info("Removing {} from possible netcdf variables since it "
 
"doesn't exist in the data frame".format(netcdf_var_name))
 
del database[netcdf_var_name]
 
 
def create_giant_netcdf(input_files, output_fn, zlib, chunk_size,
def create_giant_netcdf(input_files, output_fn, zlib, chunk_size,
start=None, end=None, interval_width=None,
start=None, end=None, interval_width=None,
summary=False,
summary=False,
database=database_dict, datastream=None):
database=database_dict, datastream=None):
 
database = deepcopy(database)
depths = pd.Series(database['depth']['_values'], name='depth')
depths = pd.Series(database['depth']['_values'], name='depth')
frame = _get_data_frame(input_files)
frame = _get_data_frame(input_files)
 
# rewrite database with available variables
 
choose_database_variables(frame, database)
# drop unused variables
# drop unused variables
frame = filter_unused_columns(frame, database)
frame = filter_unused_columns(frame, database)
# rename columns to netcdf friendly names
# rename columns to netcdf friendly names
Loading