From 5a00ce2e97a0d1e89a768a133a83d6781865132c Mon Sep 17 00:00:00 2001 From: davidh-ssec <david.hoese@ssec.wisc.edu> Date: Mon, 24 Apr 2017 14:51:17 -0500 Subject: [PATCH] Update netcdf summary creation to better handle wind fields --- aosstower/level_b1/nc.py | 54 ++++++++++++++++++++++++++++++++-------- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/aosstower/level_b1/nc.py b/aosstower/level_b1/nc.py index cfad128..60f4e9b 100644 --- a/aosstower/level_b1/nc.py +++ b/aosstower/level_b1/nc.py @@ -24,12 +24,20 @@ DEFAULT_FLOAT_FILL = -9999. def make_summary_dict(source_dict): - """Create the '_mean','_low','_high' file structure.""" + """Create the '_mean','_min','_max' file structure.""" dest_dict = {} for key in source_dict: - dest_dict[key + '_high'] = source_dict[key] - dest_dict[key + '_mean'] = source_dict[key] - dest_dict[key + '_low'] = source_dict[key] + if key == 'wind_dir': + dest_dict['wind_speed_max_dir'] = source_dict[key] + dest_dict['wind_speed_mean_dir'] = source_dict[key] + dest_dict['wind_speed_min_dir'] = source_dict[key] + dest_dict['peak_gust_dir'] = source_dict[key] + elif key == 'gust': + dest_dict['peak_gust'] = source_dict[key] + else: + dest_dict[key + '_max'] = source_dict[key] + dest_dict[key + '_mean'] = source_dict[key] + dest_dict[key + '_min'] = source_dict[key] return dest_dict @@ -254,16 +262,43 @@ def summary_over_interval(frame, interval_width): """ # round each timestamp to the nearest minute # the value at time X is for the data X - interval_width minutes - gb = frame.resample(interval_width, closed='right', loffset=interval_width) + exclude = ['gust', 'wind_east', 'wind_north'] + include = [c for c in frame.columns if c not in exclude] + gb = frame[include].resample(interval_width, closed='right', loffset=interval_width) low = gb.min() - low.columns = [c + "_low" for c in low.columns] + low.rename(columns=lambda x: x + "_min", inplace=True) high = gb.max() - high.columns = [c + "_high" for c in high.columns] + high.rename(columns=lambda x: x + "_max", inplace=True) mean = gb.mean() - mean.columns = [c + "_mean" for c in mean.columns] + mean.rename(columns=lambda x: x + "_mean", inplace=True) out_frames = pd.concat((low, high, mean), axis=1) + + # wind fields need to be handled specially + ws_min_idx = frame['wind_speed'].resample(interval_width, closed='right', loffset=interval_width).apply(lambda arr_like: arr_like.argmin()) + ws_max_idx = frame['wind_speed'].resample(interval_width, closed='right', loffset=interval_width).apply(lambda arr_like: arr_like.argmax()) + # probably redundant but need to make sure the direction indexes are + # the same as those used in the wind speed values + # must use .values so we don't take data at out_frames index, but rather + # fill in the out_frames index values with the min/max values + out_frames['wind_speed_min'] = frame['wind_speed'][ws_min_idx].values + out_frames['wind_speed_max'] = frame['wind_speed'][ws_max_idx].values + out_frames['wind_speed_min_dir'] = calc.wind_vector_degrees(frame['wind_east'][ws_min_idx], frame['wind_north'][ws_min_idx]).values + out_frames['wind_speed_max_dir'] = calc.wind_vector_degrees(frame['wind_east'][ws_max_idx], frame['wind_north'][ws_max_idx]).values + we = frame['wind_east'].resample(interval_width, closed='right', loffset=interval_width).mean() + wn = frame['wind_north'].resample(interval_width, closed='right', loffset=interval_width).mean() + out_frames['wind_speed_mean_dir'] = calc.wind_vector_degrees(we, wn).values + + gust_idx = frame['gust'].resample(interval_width, closed='right', loffset=interval_width).apply(lambda arr_like: arr_like.argmax()) + # gusts may be NaN so this argmax will be NaN indexes which don't work great + gust_idx = gust_idx.astype('datetime64[ns]', copy=False) + peak_gust = frame['gust'][gust_idx] + out_frames['peak_gust'] = peak_gust.values + we = frame['wind_east'][gust_idx] + wn = frame['wind_north'][gust_idx] + out_frames['peak_gust_dir'] = calc.wind_vector_degrees(we, wn).values + return out_frames @@ -377,7 +412,7 @@ def create_giant_netcdf(input_files, output_fn, zlib, chunk_size, # average the values if summary: - frame = summary_over_interval(frame, interval_width) + frame = summary_over_interval(new_frame, interval_width) else: frame = new_frame.resample(interval_width, closed='right', loffset=interval_width).mean() # gust_idx = new_frame['gust'].resample(interval_width, closed='right', loffset=interval_width).apply(lambda arr_like: arr_like.argmax()) @@ -395,7 +430,6 @@ def create_giant_netcdf(input_files, output_fn, zlib, chunk_size, else: chunk_sizes = [frame.shape[0]] - import ipdb; ipdb.set_trace() first_stamp = dt.strptime(str(frame.index[0]), '%Y-%m-%d %H:%M:%S') # NETCDF4_CLASSIC was chosen so that MFDataset reading would work. See: # http://unidata.github.io/netcdf4-python/#netCDF4.MFDataset -- GitLab