Skip to content
Snippets Groups Projects
Unverified Commit 18835498 authored by David Hoese's avatar David Hoese
Browse files

Update netcdf generation to use 5s data in summary/monthly generation

parent cfc38e61
No related branches found
No related tags found
No related merge requests found
......@@ -20,9 +20,10 @@ KNOTS_9 = calc.knots_to_mps(9.)
KNOTS_5 = calc.knots_to_mps(5.)
KNOTS_3 = calc.knots_to_mps(3.)
KNOTS_2 = calc.knots_to_mps(2.)
DEFAULT_FLOAT_FILL = -9999.
def make_mean_dict(source_dict):
def make_summary_dict(source_dict):
"""Create the '_mean','_low','_high' file structure."""
dest_dict = {}
for key in source_dict:
......@@ -32,9 +33,6 @@ def make_mean_dict(source_dict):
return dest_dict
MEAN_DATABASE = make_mean_dict(schema.database)
def filter_array(arr, valid_min, valid_max, valid_delta):
"""Create QC field array.
......@@ -93,15 +91,15 @@ def create_variables(nc_file, first_stamp, database, chunk_sizes=None, zlib=Fals
coordinates = {
# fields: type, dimension, fill, valid_min, std_name, longname, units, valid_max, cf_role, axis
'time': [np.float64, ('time',), -999., None, None, "Hour offset from midnight",
'time': [np.float64, ('time',), DEFAULT_FLOAT_FILL, None, None, "Hour offset from midnight",
t_u, None, None, None],
'lon': [np.float32, tuple(), -999., -180., 'longitude', None, 'degrees_east', 180., None],
'lat': [np.float32, tuple(), -999., -90., 'latitude', None, 'degrees_north', 90., None],
'alt': [np.float32, tuple(), -999., None, 'height', 'vertical distance', 'm', None, None],
'lon': [np.float32, tuple(), DEFAULT_FLOAT_FILL, -180., 'longitude', None, 'degrees_east', 180., None],
'lat': [np.float32, tuple(), DEFAULT_FLOAT_FILL, -90., 'latitude', None, 'degrees_north', 90., None],
'alt': [np.float32, tuple(), DEFAULT_FLOAT_FILL, None, 'height', 'vertical distance', 'm', None, None],
# int64 for base_time would be best, but NetCDF4 Classic does not support it
# NetCDF4 Classic mode was chosen so users can use MFDatasets (multi-file datasets)
'base_time': [np.int32, tuple(), -999., None, 'time', btln, btu, None, None],
'time_offset': [np.float64, ('time',), -999., None, 'time', to_ln, to_u, None, None],
'base_time': [np.int32, tuple(), DEFAULT_FLOAT_FILL, None, 'time', btln, btu, None, None],
'time_offset': [np.float64, ('time',), DEFAULT_FLOAT_FILL, None, 'time', to_ln, to_u, None, None],
'station_name': ['c', ('max_len_station_name',), '\0', None, None, 'station name', None, None, 'timeseries_id'],
}
......@@ -154,7 +152,7 @@ def create_variables(nc_file, first_stamp, database, chunk_sizes=None, zlib=Fals
varTup = database[entry]
variable = nc_file.createVariable(entry, np.float32,
dimensions=('time',), fill_value=float(-999), zlib=zlib,
dimensions=('time',), fill_value=DEFAULT_FLOAT_FILL, zlib=zlib,
chunksizes=chunk_sizes)
variable.standard_name = varTup[1]
......@@ -250,7 +248,7 @@ def minute_averages(frame):
return new_frame.fillna(np.nan)
def average_over_interval(frame, interval_width):
def summary_over_interval(frame, interval_width):
"""takes a frame and an interval to average it over, and returns a minimum,
maximum, and average dataframe for that interval
"""
......@@ -309,7 +307,7 @@ def write_vars(nc_file, frame, database):
if varName not in fileVar:
LOG.debug('Unused input variable: %s', varName)
continue
fileVar[varName][:] = frame[varName].fillna(-999.).values
fileVar[varName][:] = frame[varName].fillna(DEFAULT_FLOAT_FILL).values
valid_min = database[varName][5]
valid_max = database[varName][6]
......@@ -354,14 +352,40 @@ def write_global_attributes(nc_file, input_sources):
def create_giant_netcdf(input_files, output_fn, zlib, chunk_size,
start=None, end=None, interval_width=None,
summary=False,
database=schema.database):
frame = get_data(input_files)
if frame.empty:
raise ValueError("No data found from input files: {}".format(", ".join(input_files)))
frame = minute_averages(frame)
if interval_width:
frame = average_over_interval(frame, interval_width)
# Add wind direction components so we can average wind direction properly
frame['wind_east'], frame['wind_north'], _ = calc.wind_vector_components(frame['wind_speed'], frame['wind_dir'])
# round up each 1 minute group so data at time T is the average of data
# from T - 1 (exclusive) to T (inclusive).
# new_frame = frame.resample('1T', closed='right', loffset='1T').mean()
new_frame = frame.resample('5S', closed='right', loffset='5S').mean()
# 2 minute rolling average of 5 second data (5 seconds * 24 = 120 seconds = 2 minutes)
winds_frame_5s = new_frame[['wind_speed', 'wind_east', 'wind_north']]
# winds_frame_5s = winds_frame_5s.resample('5S', closed='right', loffset='5S').mean()
winds_frame_2m = winds_frame_5s.rolling(24, win_type='boxcar').mean()
winds_frame_2m['gust'] = calculate_wind_gust(winds_frame_5s['wind_speed'], winds_frame_2m['wind_speed'])
# rolling average is used for mean output
new_frame.update(winds_frame_2m) # adds wind_speed, wind_east/north
new_frame['gust'] = winds_frame_2m['gust']
# average the values
if summary:
frame = summary_over_interval(frame, interval_width)
else:
frame = new_frame.resample(interval_width, closed='right', loffset=interval_width).mean()
# gust_idx = new_frame['gust'].resample(interval_width, closed='right', loffset=interval_width).apply(lambda arr_like: arr_like.argmax())
# frame['gust'][:] = new_frame['gust'][gust_idx.values]
# frame['wind_dir'] = calc.wind_vector_degrees(frame['wind_east'][gust_idx.values], frame['wind_north'][gust_idx.values])
frame['wind_dir'] = calc.wind_vector_degrees(frame['wind_east'], frame['wind_north'])
frame['gust'] = new_frame['gust'].resample(interval_width, closed='right', loffset=interval_width).max()
frame.fillna(np.nan, inplace=True)
if start and end:
frame = frame[start.strftime('%Y-%m-%d %H:%M:%S'): end.strftime('%Y-%m-%d %H:%M:%S')]
......@@ -371,6 +395,7 @@ def create_giant_netcdf(input_files, output_fn, zlib, chunk_size,
else:
chunk_sizes = [frame.shape[0]]
import ipdb; ipdb.set_trace()
first_stamp = dt.strptime(str(frame.index[0]), '%Y-%m-%d %H:%M:%S')
# NETCDF4_CLASSIC was chosen so that MFDataset reading would work. See:
# http://unidata.github.io/netcdf4-python/#netCDF4.MFDataset
......@@ -406,7 +431,7 @@ def main():
parser.add_argument('-e', '--end-time', type=_dt_convert,
help='End time of massive netcdf file. Formats allowed:' +
"\'YYYY-MM-DDTHH:MM:SS\', \'YYYY-MM-DD\'")
parser.add_argument('-n', '--interval',
parser.add_argument('-n', '--interval', default='1T',
help="""Width of the interval to average input data
over in Pandas offset format. If not specified, 1 minute averages are used. If
specified then '_high', '_mean', and '_low' versions of the data fields are
......@@ -414,6 +439,8 @@ written to the output NetCDF.
Use '1D' for daily or '5T' for 5 minute averages.
See this page for more details:
http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases""")
parser.add_argument('--summary', action='store_true',
help="Create a file with _low, _mean, _high versions of every variable name")
parser.add_argument('-f', '--fields', nargs='+', default=schema.met_vars,
help="Variable names to include in the NetCDF file (base name, no suffixes)")
parser.add_argument('--chunk-size', type=int, help='chunk size for the netCDF file')
......@@ -438,8 +465,9 @@ each input file is mapped to the corresponding output file.
elif not args.start_time and args.end_time:
raise ValueError('start time must be specified when end time is specified')
database = MEAN_DATABASE if args.interval else schema.database
mini_database = {k: database[k] for k in args.fields}
mini_database = {k: schema.database[k] for k in args.fields}
if args.summary:
mini_database = make_summary_dict(mini_database)
# Case 1: All inputs to 1 output file
# Case 2: Each input in to a separate output file
......@@ -455,7 +483,7 @@ each input file is mapped to the corresponding output file.
try:
create_giant_netcdf(in_files, out_fn, args.zlib,
args.chunk_size, args.start_time,
args.end_time, args.interval,
args.end_time, args.interval, args.summary,
mini_database)
success = True
except (ValueError, TypeError):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment