Update netcdf summary creation to better handle wind fields

5a00ce2e · David Hoese · 18835498 · 5a00ce2e
Unverified Commit 5a00ce2e authored 8 years ago by David Hoese
--- a/aosstower/level_b1/nc.py
+++ b/aosstower/level_b1/nc.py
@@ -24,12 +24,20 @@ DEFAULT_FLOAT_FILL = -9999.
 def make_summary_dict(source_dict):
-    """Create the '_mean','_low','_high' file structure."""
+    """Create the '_mean','_min','_max' file structure."""
    dest_dict = {}
    for key in source_dict:
-        dest_dict[key + '_high'] = source_dict[key]
+        if key == 'wind_dir':
-        dest_dict[key + '_mean'] = source_dict[key]
+            dest_dict['wind_speed_max_dir'] = source_dict[key]
-        dest_dict[key + '_low'] = source_dict[key]
+            dest_dict['wind_speed_mean_dir'] = source_dict[key]
+            dest_dict['wind_speed_min_dir'] = source_dict[key]
+            dest_dict['peak_gust_dir'] = source_dict[key]
+        elif key == 'gust':
+            dest_dict['peak_gust'] = source_dict[key]
+        else:
+            dest_dict[key + '_max'] = source_dict[key]
+            dest_dict[key + '_mean'] = source_dict[key]
+            dest_dict[key + '_min'] = source_dict[key]
    return dest_dict
@@ -254,16 +262,43 @@ def summary_over_interval(frame, interval_width):
    """
    # round each timestamp to the nearest minute
    # the value at time X is for the data X - interval_width minutes
-    gb = frame.resample(interval_width, closed='right', loffset=interval_width)
+    exclude = ['gust', 'wind_east', 'wind_north']
+    include = [c for c in frame.columns if c not in exclude]
+    gb = frame[include].resample(interval_width, closed='right', loffset=interval_width)
    low = gb.min()
-    low.columns = [c + "_low" for c in low.columns]
+    low.rename(columns=lambda x: x + "_min", inplace=True)
    high = gb.max()
-    high.columns = [c + "_high" for c in high.columns]
+    high.rename(columns=lambda x: x + "_max", inplace=True)
    mean = gb.mean()
-    mean.columns = [c + "_mean" for c in mean.columns]
+    mean.rename(columns=lambda x: x + "_mean", inplace=True)
    out_frames = pd.concat((low, high, mean), axis=1)
+    # wind fields need to be handled specially
+    ws_min_idx = frame['wind_speed'].resample(interval_width, closed='right', loffset=interval_width).apply(lambda arr_like: arr_like.argmin())
+    ws_max_idx = frame['wind_speed'].resample(interval_width, closed='right', loffset=interval_width).apply(lambda arr_like: arr_like.argmax())
+    # probably redundant but need to make sure the direction indexes are
+    # the same as those used in the wind speed values
+    # must use .values so we don't take data at out_frames index, but rather
+    # fill in the out_frames index values with the min/max values
+    out_frames['wind_speed_min'] = frame['wind_speed'][ws_min_idx].values
+    out_frames['wind_speed_max'] = frame['wind_speed'][ws_max_idx].values
+    out_frames['wind_speed_min_dir'] = calc.wind_vector_degrees(frame['wind_east'][ws_min_idx], frame['wind_north'][ws_min_idx]).values
+    out_frames['wind_speed_max_dir'] = calc.wind_vector_degrees(frame['wind_east'][ws_max_idx], frame['wind_north'][ws_max_idx]).values
+    we = frame['wind_east'].resample(interval_width, closed='right', loffset=interval_width).mean()
+    wn = frame['wind_north'].resample(interval_width, closed='right', loffset=interval_width).mean()
+    out_frames['wind_speed_mean_dir'] = calc.wind_vector_degrees(we, wn).values
+    gust_idx = frame['gust'].resample(interval_width, closed='right', loffset=interval_width).apply(lambda arr_like: arr_like.argmax())
+    # gusts may be NaN so this argmax will be NaN indexes which don't work great
+    gust_idx = gust_idx.astype('datetime64[ns]', copy=False)
+    peak_gust = frame['gust'][gust_idx]
+    out_frames['peak_gust'] = peak_gust.values
+    we = frame['wind_east'][gust_idx]
+    wn = frame['wind_north'][gust_idx]
+    out_frames['peak_gust_dir'] = calc.wind_vector_degrees(we, wn).values
    return out_frames
@@ -377,7 +412,7 @@ def create_giant_netcdf(input_files, output_fn, zlib, chunk_size,
    # average the values
    if summary:
-        frame = summary_over_interval(frame, interval_width)
+        frame = summary_over_interval(new_frame, interval_width)
    else:
        frame = new_frame.resample(interval_width, closed='right', loffset=interval_width).mean()
        # gust_idx = new_frame['gust'].resample(interval_width, closed='right', loffset=interval_width).apply(lambda arr_like: arr_like.argmax())
@@ -395,7 +430,6 @@ def create_giant_netcdf(input_files, output_fn, zlib, chunk_size,
    else:
        chunk_sizes = [frame.shape[0]]
-    import ipdb; ipdb.set_trace()
    first_stamp = dt.strptime(str(frame.index[0]), '%Y-%m-%d %H:%M:%S')
    # NETCDF4_CLASSIC was chosen so that MFDataset reading would work. See:
    # http://unidata.github.io/netcdf4-python/#netCDF4.MFDataset