From 5a00ce2e97a0d1e89a768a133a83d6781865132c Mon Sep 17 00:00:00 2001
From: davidh-ssec <david.hoese@ssec.wisc.edu>
Date: Mon, 24 Apr 2017 14:51:17 -0500
Subject: [PATCH] Update netcdf summary creation to better handle wind fields

---
 aosstower/level_b1/nc.py | 54 ++++++++++++++++++++++++++++++++--------
 1 file changed, 44 insertions(+), 10 deletions(-)

diff --git a/aosstower/level_b1/nc.py b/aosstower/level_b1/nc.py
index cfad128..60f4e9b 100644
--- a/aosstower/level_b1/nc.py
+++ b/aosstower/level_b1/nc.py
@@ -24,12 +24,20 @@ DEFAULT_FLOAT_FILL = -9999.
 
 
 def make_summary_dict(source_dict):
-    """Create the '_mean','_low','_high' file structure."""
+    """Create the '_mean','_min','_max' file structure."""
     dest_dict = {}
     for key in source_dict:
-        dest_dict[key + '_high'] = source_dict[key]
-        dest_dict[key + '_mean'] = source_dict[key]
-        dest_dict[key + '_low'] = source_dict[key]
+        if key == 'wind_dir':
+            dest_dict['wind_speed_max_dir'] = source_dict[key]
+            dest_dict['wind_speed_mean_dir'] = source_dict[key]
+            dest_dict['wind_speed_min_dir'] = source_dict[key]
+            dest_dict['peak_gust_dir'] = source_dict[key]
+        elif key == 'gust':
+            dest_dict['peak_gust'] = source_dict[key]
+        else:
+            dest_dict[key + '_max'] = source_dict[key]
+            dest_dict[key + '_mean'] = source_dict[key]
+            dest_dict[key + '_min'] = source_dict[key]
     return dest_dict
 
 
@@ -254,16 +262,43 @@ def summary_over_interval(frame, interval_width):
     """
     # round each timestamp to the nearest minute
     # the value at time X is for the data X - interval_width minutes
-    gb = frame.resample(interval_width, closed='right', loffset=interval_width)
+    exclude = ['gust', 'wind_east', 'wind_north']
+    include = [c for c in frame.columns if c not in exclude]
+    gb = frame[include].resample(interval_width, closed='right', loffset=interval_width)
 
     low = gb.min()
-    low.columns = [c + "_low" for c in low.columns]
+    low.rename(columns=lambda x: x + "_min", inplace=True)
     high = gb.max()
-    high.columns = [c + "_high" for c in high.columns]
+    high.rename(columns=lambda x: x + "_max", inplace=True)
     mean = gb.mean()
-    mean.columns = [c + "_mean" for c in mean.columns]
+    mean.rename(columns=lambda x: x + "_mean", inplace=True)
 
     out_frames = pd.concat((low, high, mean), axis=1)
+
+    # wind fields need to be handled specially
+    ws_min_idx = frame['wind_speed'].resample(interval_width, closed='right', loffset=interval_width).apply(lambda arr_like: arr_like.argmin())
+    ws_max_idx = frame['wind_speed'].resample(interval_width, closed='right', loffset=interval_width).apply(lambda arr_like: arr_like.argmax())
+    # probably redundant but need to make sure the direction indexes are
+    # the same as those used in the wind speed values
+    # must use .values so we don't take data at out_frames index, but rather
+    # fill in the out_frames index values with the min/max values
+    out_frames['wind_speed_min'] = frame['wind_speed'][ws_min_idx].values
+    out_frames['wind_speed_max'] = frame['wind_speed'][ws_max_idx].values
+    out_frames['wind_speed_min_dir'] = calc.wind_vector_degrees(frame['wind_east'][ws_min_idx], frame['wind_north'][ws_min_idx]).values
+    out_frames['wind_speed_max_dir'] = calc.wind_vector_degrees(frame['wind_east'][ws_max_idx], frame['wind_north'][ws_max_idx]).values
+    we = frame['wind_east'].resample(interval_width, closed='right', loffset=interval_width).mean()
+    wn = frame['wind_north'].resample(interval_width, closed='right', loffset=interval_width).mean()
+    out_frames['wind_speed_mean_dir'] = calc.wind_vector_degrees(we, wn).values
+
+    gust_idx = frame['gust'].resample(interval_width, closed='right', loffset=interval_width).apply(lambda arr_like: arr_like.argmax())
+    # gusts may be NaN so this argmax will be NaN indexes which don't work great
+    gust_idx = gust_idx.astype('datetime64[ns]', copy=False)
+    peak_gust = frame['gust'][gust_idx]
+    out_frames['peak_gust'] = peak_gust.values
+    we = frame['wind_east'][gust_idx]
+    wn = frame['wind_north'][gust_idx]
+    out_frames['peak_gust_dir'] = calc.wind_vector_degrees(we, wn).values
+
     return out_frames
 
 
@@ -377,7 +412,7 @@ def create_giant_netcdf(input_files, output_fn, zlib, chunk_size,
 
     # average the values
     if summary:
-        frame = summary_over_interval(frame, interval_width)
+        frame = summary_over_interval(new_frame, interval_width)
     else:
         frame = new_frame.resample(interval_width, closed='right', loffset=interval_width).mean()
         # gust_idx = new_frame['gust'].resample(interval_width, closed='right', loffset=interval_width).apply(lambda arr_like: arr_like.argmax())
@@ -395,7 +430,6 @@ def create_giant_netcdf(input_files, output_fn, zlib, chunk_size,
     else:
         chunk_sizes = [frame.shape[0]]
 
-    import ipdb; ipdb.set_trace()
     first_stamp = dt.strptime(str(frame.index[0]), '%Y-%m-%d %H:%M:%S')
     # NETCDF4_CLASSIC was chosen so that MFDataset reading would work. See:
     # http://unidata.github.io/netcdf4-python/#netCDF4.MFDataset
-- 
GitLab