Skip to content
Snippets Groups Projects
Unverified Commit 5a00ce2e authored by David Hoese's avatar David Hoese
Browse files

Update netcdf summary creation to better handle wind fields

parent 18835498
No related branches found
No related tags found
No related merge requests found
......@@ -24,12 +24,20 @@ DEFAULT_FLOAT_FILL = -9999.
def make_summary_dict(source_dict):
"""Create the '_mean','_low','_high' file structure."""
"""Create the '_mean','_min','_max' file structure."""
dest_dict = {}
for key in source_dict:
dest_dict[key + '_high'] = source_dict[key]
dest_dict[key + '_mean'] = source_dict[key]
dest_dict[key + '_low'] = source_dict[key]
if key == 'wind_dir':
dest_dict['wind_speed_max_dir'] = source_dict[key]
dest_dict['wind_speed_mean_dir'] = source_dict[key]
dest_dict['wind_speed_min_dir'] = source_dict[key]
dest_dict['peak_gust_dir'] = source_dict[key]
elif key == 'gust':
dest_dict['peak_gust'] = source_dict[key]
else:
dest_dict[key + '_max'] = source_dict[key]
dest_dict[key + '_mean'] = source_dict[key]
dest_dict[key + '_min'] = source_dict[key]
return dest_dict
......@@ -254,16 +262,43 @@ def summary_over_interval(frame, interval_width):
"""
# round each timestamp to the nearest minute
# the value at time X is for the data X - interval_width minutes
gb = frame.resample(interval_width, closed='right', loffset=interval_width)
exclude = ['gust', 'wind_east', 'wind_north']
include = [c for c in frame.columns if c not in exclude]
gb = frame[include].resample(interval_width, closed='right', loffset=interval_width)
low = gb.min()
low.columns = [c + "_low" for c in low.columns]
low.rename(columns=lambda x: x + "_min", inplace=True)
high = gb.max()
high.columns = [c + "_high" for c in high.columns]
high.rename(columns=lambda x: x + "_max", inplace=True)
mean = gb.mean()
mean.columns = [c + "_mean" for c in mean.columns]
mean.rename(columns=lambda x: x + "_mean", inplace=True)
out_frames = pd.concat((low, high, mean), axis=1)
# wind fields need to be handled specially
ws_min_idx = frame['wind_speed'].resample(interval_width, closed='right', loffset=interval_width).apply(lambda arr_like: arr_like.argmin())
ws_max_idx = frame['wind_speed'].resample(interval_width, closed='right', loffset=interval_width).apply(lambda arr_like: arr_like.argmax())
# probably redundant but need to make sure the direction indexes are
# the same as those used in the wind speed values
# must use .values so we don't take data at out_frames index, but rather
# fill in the out_frames index values with the min/max values
out_frames['wind_speed_min'] = frame['wind_speed'][ws_min_idx].values
out_frames['wind_speed_max'] = frame['wind_speed'][ws_max_idx].values
out_frames['wind_speed_min_dir'] = calc.wind_vector_degrees(frame['wind_east'][ws_min_idx], frame['wind_north'][ws_min_idx]).values
out_frames['wind_speed_max_dir'] = calc.wind_vector_degrees(frame['wind_east'][ws_max_idx], frame['wind_north'][ws_max_idx]).values
we = frame['wind_east'].resample(interval_width, closed='right', loffset=interval_width).mean()
wn = frame['wind_north'].resample(interval_width, closed='right', loffset=interval_width).mean()
out_frames['wind_speed_mean_dir'] = calc.wind_vector_degrees(we, wn).values
gust_idx = frame['gust'].resample(interval_width, closed='right', loffset=interval_width).apply(lambda arr_like: arr_like.argmax())
# gusts may be NaN so this argmax will be NaN indexes which don't work great
gust_idx = gust_idx.astype('datetime64[ns]', copy=False)
peak_gust = frame['gust'][gust_idx]
out_frames['peak_gust'] = peak_gust.values
we = frame['wind_east'][gust_idx]
wn = frame['wind_north'][gust_idx]
out_frames['peak_gust_dir'] = calc.wind_vector_degrees(we, wn).values
return out_frames
......@@ -377,7 +412,7 @@ def create_giant_netcdf(input_files, output_fn, zlib, chunk_size,
# average the values
if summary:
frame = summary_over_interval(frame, interval_width)
frame = summary_over_interval(new_frame, interval_width)
else:
frame = new_frame.resample(interval_width, closed='right', loffset=interval_width).mean()
# gust_idx = new_frame['gust'].resample(interval_width, closed='right', loffset=interval_width).apply(lambda arr_like: arr_like.argmax())
......@@ -395,7 +430,6 @@ def create_giant_netcdf(input_files, output_fn, zlib, chunk_size,
else:
chunk_sizes = [frame.shape[0]]
import ipdb; ipdb.set_trace()
first_stamp = dt.strptime(str(frame.index[0]), '%Y-%m-%d %H:%M:%S')
# NETCDF4_CLASSIC was chosen so that MFDataset reading would work. See:
# http://unidata.github.io/netcdf4-python/#netCDF4.MFDataset
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment