Skip to content
Snippets Groups Projects
Commit 24452d52 authored by kgao's avatar kgao
Browse files

Fixed rolling averages bug

If dates don't line up exactly at the minute mark, the frame gets filled with np.nans
now frame takes closest record that is < than the minute mark and sets
that record into the file
parent 71b74e7a
No related branches found
No related tags found
No related merge requests found
from nc import getData, createGiantNetCDF, writeDimensions, createVariables, writeVars
from nc import getData, createGiantNetCDF, writeDimensions, createVariables, writeVars, minuteAverages
from datetime import datetime as dt
from datetime import timedelta as delta
import pandas as pd
......@@ -24,7 +24,7 @@ def writeBack(frame, filename, cur_dt):
outputFilename = 'aoss_tower.' + startString.split(" ")[0] + '.day.nc'
#create file
createGiantNetCDF(startObj, endObj, inputFiles, outputFilename, True, None, False)
createGiantNetCDF(startObj, endObj, inputFiles, outputFilename, True, None)
def createDirectory(stamp):
if os.path.exists("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m')):
......@@ -47,7 +47,7 @@ def createNCFile(frame, filename):
createDirectory(firstStamp)
outputName = 'aoss_tower.' + firstStamp.strftime('%Y-%m-%d') + '.nc'
outputName = 'aoss_tower.' + firstStamp.strftime('%Y-%m-%d') + '.day.nc'
ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC')
......@@ -90,11 +90,16 @@ def create_files(start_date, end_date):
if(len(DFList) <= 1):
frame = DFList[0]
frame = minuteAverages(frame)
createNCFile(frame, filename)
elif(len(DFList) == 2):
if(len(list(DFList[0].index)) > len(list(DFList[1].index))):
frame = DFList[0]
frame = minuteAverages(frame)
createNCFile(frame, filename)
#get second frame
......@@ -104,6 +109,9 @@ def create_files(start_date, end_date):
else:
frame = DFList[1]
frame = minuteAverages(frame)
createNCFile(frame, filename)
#get second frame
......@@ -128,4 +136,4 @@ def createYesterdayFile():
create_files(dt.today() - delta(days=1), dt.today() - delta(days=1))
#createYesterdayFile()
create_files(dt(2003,5,28), dt.today() - delta(days=1))
create_files(dt(2009,5,2), dt(2009,5,2))#dt.today() - delta(days=1))
......@@ -20,10 +20,10 @@ def filterArray(array, valid_min, valid_max):
if value == float(-99999):
qcControl.append(np.byte(0b1))
elif value < valid_min:
elif valid_min != '' and value < float(valid_min):
qcControl.append(np.byte(0b10))
elif value > valid_max:
elif valid_max != '' and value > float(valid_max):
qcControl.append(np.byte(0b100))
else:
......@@ -180,6 +180,22 @@ def getGust(rollingAvg, speeds):
return gust
#gets the rolling mean closest to the nearest minute
def getRolling(series, minutes):
returnSeries = series.rolling(25, win_type='boxcar').mean()
data = {}
for minute in minutes:
#doesn't go past the minute
closestStamp = returnSeries.index.asof(minute)
data[minute] = returnSeries[returnSeries.index.asof(minute)]
returnSeries = pd.Series(data)
return returnSeries
def minuteAverages(frame):
frame['minute'] = [(ts + delta(minutes=1)).replace(second=0) for ts in frame.index]
newFrame = frame.groupby('minute').mean()
......@@ -191,7 +207,7 @@ def minuteAverages(frame):
windSeries = frame['wind_speed']
windSeries = windSeries.rolling(25, win_type='boxcar').mean()
windSeries = getRolling(windSeries, list(newFrame.index))
newFrame['wind_speed'] = windSeries
......@@ -205,8 +221,6 @@ def minuteAverages(frame):
gust = getGust(rollingAvg, maxSpeed)
#gust = pd.DataFrame({'gust': gust, 'minute': list(newFrame.index)}, dtype=np.float64
newFrame['gust'] = gust
if 'wind_direction' in columns:
......@@ -214,7 +228,7 @@ def minuteAverages(frame):
windDirSeries = frame['wind_direction']
windDirSeries = windDirSeries.rolling(25, win_type='boxcar').mean()
windDirSeries = getRolling(windDirSeries, list(newFrame.index))
newFrame['wind_direction'] = windDirSeries
......@@ -293,12 +307,10 @@ def writeVars(ncFile, frame):
dataArray = np.asarray(dataList)
fileVar[varName][:] = dataArray
if parser.database[varName][5] != '':
valid_min = float(parser.database[varName][5])
valid_max = float(parser.database[varName][6])
valid_min = parser.database[varName][5]
valid_max = parser.database[varName][6]
fileVar['qc_' + varName][:] = filterArray(dataArray, valid_min, valid_max)
fileVar['qc_' + varName][:] = filterArray(dataArray, valid_min, valid_max)
coordinates = ['lon', 'lat', 'alt', 'base_time', 'time_offset', 'station_name', 'time']
......@@ -337,13 +349,14 @@ def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize):
frame = getData(inputFiles)
frame = minuteAverages(frame)
if(frame.empty):
return False
else:
if(start and end):
frame = frame[start.strftime('%Y-%m-%d %H:%M:%S'): end.strftime('%Y-%m-%d %H:%M:%S')]
print(frame)
if(default):
chunksizes = [len(list(frame.index))]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment