Skip to content
Snippets Groups Projects
Commit 3dd305e2 authored by Matthew Westphall's avatar Matthew Westphall
Browse files

added --interval command line arg which adds function of monthly to daily

parent be2ebfeb
Branches
Tags
No related merge requests found
from bundle import createYesterdayFile
createYesterdayFile()
This diff is collapsed.
from nc import getData, createGiantNetCDF, writeDimensions, createVariables, writeVars, minuteAverages
from datetime import datetime as dt
from datetime import timedelta as delta
import pandas as pd
import time
from netCDF4 import Dataset
import os
def writeBack(frame, filename, cur_dt):
#get start
startString = str(frame.index[0])
startObj = dt.strptime(startString.split(" ")[0], '%Y-%m-%d')
createDirectory(startObj)
#get end
endObj = startObj.replace(hour=23, minute=59, second=59, microsecond=59)
#input file
inputFiles = []
inputFiles.append(filename)
inputFiles.append(createFilename(cur_dt - delta(days=1)))
#output filename
outputFilename = 'aoss_tower.' + startString.split(" ")[0] + '.day.nc'
#create file
createGiantNetCDF(startObj, endObj, inputFiles, outputFilename, True, None)
def createDirectory(stamp):
if os.path.exists("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m')):
os.chdir("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m'))
else:
os.makedirs("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m'))
os.chdir("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m'))
def createFilename(date):
filename = '/mnt/inst-data/aoss-tower/'
filename += date.strftime('%Y/%m/')
filename += 'rig_tower.' + date.strftime('%Y-%m-%d') + '.ascii'
return filename
def createNCFile(frame, filename):
firstStamp = dt.strptime(str(list(frame.index)[0]), '%Y-%m-%d %H:%M:%S')
createDirectory(firstStamp)
outputName = 'aoss_tower.' + firstStamp.strftime('%Y-%m-%d') + '.day.nc'
ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC')
ncFile = writeDimensions(ncFile)
ncFile = createVariables(ncFile, firstStamp, [len(list(frame.index))], True)
ncFile.inputFiles = ', '.join([filename])
ncFile = writeVars(ncFile, frame)
ncFile.close()
def create_files(start_date, end_date):
# for each file's name based on date - does not rely on what dates are
# in each file so should be similar code for buoy
# get file, discover which full dates excluding the start are in it
# from there we create the netcdf files
# for the start, we always recreate the netcdf using the previous file
# ignore the last unfulfilled date
cur_dt = start_date
for day in range((end_date - start_date).days + 1):
filename = createFilename(cur_dt)
print('grabbing ' + filename)
df = getData([filename])
if(df.empty):
cur_dt += delta(days = 1)
continue
DFList = []
for group in df.groupby(df.index.day):
DFList.append(group[1])
if(len(DFList) <= 1):
frame = DFList[0]
frame = minuteAverages(frame)
createNCFile(frame, filename)
elif(len(DFList) == 2):
if(len(list(DFList[0].index)) > len(list(DFList[1].index))):
frame = DFList[0]
frame = minuteAverages(frame)
createNCFile(frame, filename)
#get second frame
frame = DFList[1]
writeBack(frame, filename, cur_dt)
else:
frame = DFList[1]
frame = minuteAverages(frame)
createNCFile(frame, filename)
#get second frame
frame = DFList[0]
writeBack(frame, filename, cur_dt)
else:
print(len(DFList))
for frame in DFList:
print(len(list(frame.index)))
print(list(frame.index)[0])
exit(0)
#increment day
cur_dt += delta(days = 1)
def createYesterdayFile():
create_files(dt.today() - delta(days=1), dt.today() - delta(days=1))
#createYesterdayFile()
create_files(dt(2009,5,2), dt(2009,5,2))#dt.today() - delta(days=1))
File deleted
time
lon
station_name
lat
base_time
time_offset
alt
rh_shield_freq_mean
qc_rh_shield_freq_mean
paro_cal_sig_low
qc_paro_cal_sig_low
air_temp_4_low
qc_air_temp_4_low
paro_air_temp_period_low
qc_paro_air_temp_period_low
air_temp_3_high
qc_air_temp_3_high
air_temp_2_low
qc_air_temp_2_low
pressure_mean
qc_pressure_mean
rh_mean
qc_rh_mean
rtd_shield_freq_mean
qc_rtd_shield_freq_mean
box_rh_high
qc_box_rh_high
box_air_temp_high
qc_box_air_temp_high
paro_air_temp_high
qc_paro_air_temp_high
air_temp_5_mean
qc_air_temp_5_mean
box_air_temp_low
qc_box_air_temp_low
precip_mean
qc_precip_mean
rh_shield_freq_high
qc_rh_shield_freq_high
rh_low
qc_rh_low
box_presure_high
qc_box_presure_high
paro_cal_sig_mean
qc_paro_cal_sig_mean
dewpoint_mean
qc_dewpoint_mean
paro_cal_sig_high
qc_paro_cal_sig_high
dewpoint_high
qc_dewpoint_high
paro_air_temp_period_mean
qc_paro_air_temp_period_mean
box_temp_low
qc_box_temp_low
altimeter_low
qc_altimeter_low
precip_low
qc_precip_low
rh_shield_freq_low
qc_rh_shield_freq_low
paro_air_temp_period_high
qc_paro_air_temp_period_high
wind_speed_low
qc_wind_speed_low
paro_pressure_period_low
qc_paro_pressure_period_low
paro_air_temp_low
qc_paro_air_temp_low
box_rh_low
qc_box_rh_low
air_temp_4_mean
qc_air_temp_4_mean
air_temp_6_3m_high
qc_air_temp_6_3m_high
gust_high
qc_gust_high
air_temp_5_low
qc_air_temp_5_low
wind_dir_high
qc_wind_dir_high
box_presure_low
qc_box_presure_low
pressure_low
qc_pressure_low
rh_high
qc_rh_high
rtd_shield_freq_low
qc_rtd_shield_freq_low
wind_speed_mean
qc_wind_speed_mean
air_temp_high
qc_air_temp_high
precip_high
qc_precip_high
accum_precip_high
qc_accum_precip_high
solar_flux_high
qc_solar_flux_high
paro_pressure_period_mean
qc_paro_pressure_period_mean
air_temp_4_high
qc_air_temp_4_high
peak_wind
qc_peak_wind
gust_low
qc_gust_low
rtd_shield_freq_high
qc_rtd_shield_freq_high
wind_dir_mean
qc_wind_dir_mean
box_temp_high
qc_box_temp_high
accum_precip_low
qc_accum_precip_low
pressure_high
qc_pressure_high
air_temp_6_3m_low
qc_air_temp_6_3m_low
wind_dir_low
qc_wind_dir_low
air_temp_3_low
qc_air_temp_3_low
air_temp_mean
qc_air_temp_mean
altimeter_mean
qc_altimeter_mean
air_temp_2_high
qc_air_temp_2_high
altimeter_high
qc_altimeter_high
box_presure_mean
qc_box_presure_mean
air_temp_low
qc_air_temp_low
box_air_temp_mean
qc_box_air_temp_mean
box_temp_mean
qc_box_temp_mean
paro_air_temp_mean
qc_paro_air_temp_mean
solar_flux_low
qc_solar_flux_low
wind_speed_high
qc_wind_speed_high
air_temp_2_mean
qc_air_temp_2_mean
accum_precip_mean
qc_accum_precip_mean
dewpoint_low
qc_dewpoint_low
box_rh_mean
qc_box_rh_mean
air_temp_3_mean
qc_air_temp_3_mean
solar_flux_mean
qc_solar_flux_mean
paro_pressure_period_high
qc_paro_pressure_period_high
air_temp_6_3m_mean
qc_air_temp_6_3m_mean
air_temp_5_high
qc_air_temp_5_high
......@@ -4,6 +4,7 @@ import logging
import pandas as pd
from datetime import datetime as dt
from aosstower.l00 import parser
import avg_database
from netCDF4 import Dataset
import numpy as np
import platform
......@@ -43,7 +44,7 @@ def writeDimensions(ncFile):
return ncFile
def createVariables(ncFile, firstStamp, chunksizes, zlib):
def createVariables(ncFile, firstStamp, chunksizes, zlib, database=parser.database):
#base_time long name
btln = 'base time as unix timestamp'
......@@ -115,11 +116,11 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib):
if 'time' in key:
variable.calendar = 'gregorian'
for entry in parser.database:
for entry in database:
if(entry == 'stamp'):
continue
varTup = parser.database[entry]
varTup = database[entry]
variable = ncFile.createVariable(entry, np.float32,
dimensions=('time'), fill_value=float(-99999), zlib=zlib, chunksizes=chunksizes)
......@@ -265,6 +266,24 @@ def minuteAverages(frame):
return newFrame.fillna(-99999)
def averageOverInterval(frame,interval_width):
"""takes a frame and an interval to average it over, and returns a minimum,
maximum, and average dataframe for that interval"""
ts = frame.index
#round each timestamp to the nearest n minutes
frame['interval'] = (ts.astype(int)-ts.astype(int)%(interval_width*60e9)).astype('datetime64[ns]')
outFrames = {}
outFrames['low'] = frame.groupby('interval').min()
outFrames['high'] = frame.groupby('interval').max()
outFrames['mean'] = frame.groupby('interval').mean()
del frame['interval']
for key in outFrames:
#append the appropriate suffix to each column
columns = outFrames[key].columns
outFrames[key].columns = ['_'.join([col,key]) for col in columns]
outFrames = pd.concat(outFrames.values(),axis=1)
return outFrames
def getData(inputFiles):
dictData = {}
......@@ -282,9 +301,8 @@ def getData(inputFiles):
return pd.DataFrame(dictData).transpose().replace(-99999, np.nan)
def writeVars(ncFile, frame):
def writeVars(ncFile, frame, database=parser.database):
stamps = list(frame.index)
baseDTObj = dt.strptime(str(stamps[0]).split(' ')[0], '%Y-%m-%d')
#find out how much time elapsed
......@@ -308,7 +326,6 @@ def writeVars(ncFile, frame):
counter += 1
fileVar = ncFile.variables
fileVar['base_time'].assignValue(baseTimeValue)
fileVar['time_offset'][:] = timeNumpy
fileVar['time'][:] = timeNumpy + baseTimeValue
......@@ -331,13 +348,16 @@ def writeVars(ncFile, frame):
#writes data into file
for varName in frame:
if varName not in fileVar:
logging.warn('Extraneous key: %s in frame'%varName)
continue
dataList = frame[varName].tolist()
dataArray = np.asarray(dataList)
fileVar[varName][:] = dataArray
valid_min = parser.database[varName][5]
valid_max = parser.database[varName][6]
valid_min = database[varName][5]
valid_max = database[varName][6]
fileVar['qc_' + varName][:] = filterArray(dataArray, valid_min, valid_max)
......@@ -366,7 +386,8 @@ def writeVars(ncFile, frame):
# @param input filenames - list of filenames
# @param output filename - filename of the netcdf file
def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize):
def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize,
interval_width = None, database=parser.database):
default = False
if(chunkSize):
......@@ -383,7 +404,9 @@ def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize):
else:
frame = minuteAverages(frame)
if interval_width:
frame = averageOverInterval(frame,interval_width)
if(start and end):
frame = frame[start.strftime('%Y-%m-%d %H:%M:%S'): end.strftime('%Y-%m-%d %H:%M:%S')]
......@@ -396,11 +419,11 @@ def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize):
ncFile = writeDimensions(ncFile)
ncFile = createVariables(ncFile, firstStamp, chunksizes, zlib)
ncFile = createVariables(ncFile, firstStamp, chunksizes, zlib,database)
ncFile.inputFiles = ', '.join(inputFiles)
ncFile = writeVars(ncFile, frame)
ncFile = writeVars(ncFile, frame,database)
ncFile.close()
......@@ -437,47 +460,53 @@ def _dt_convert(datetime_str):
return dt.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S')
except:
return dt.strptime(datetime_str, '%Y-%m-%d')
def main():
import argparse
#argparse description
parser = argparse.ArgumentParser(description="Convert level_00 aoss tower data to level_a0")
argparser = argparse.ArgumentParser(description="Convert level_00 aoss tower data to level_a0")
#argparse verbosity info
parser.add_argument('-v', '--verbose', action="count", default=int(os.environ.get("VERBOSITY", 2)),
argparser.add_argument('-v', '--verbose', action="count", default=int(os.environ.get("VERBOSITY", 2)),
dest='verbosity',
help='each occurrence increases verbosity 1 level through ERROR-WARNING-INFO-DEBUG (default INFO)')
#argparse start and end times
parser.add_argument('-s', '--start-time', type=_dt_convert,
argparser.add_argument('-s', '--start-time', type=_dt_convert,
help="Start time of massive netcdf file, if only -s is given, a netcdf file for only that day is given" +
". Formats allowed: \'YYYY-MM-DDTHH:MM:SS\', \'YYYY-MM-DD\'")
parser.add_argument('-e', '--end-time', type=_dt_convert, help='End time of massive netcdf file. Formats allowed:' +
argparser.add_argument('-e', '--end-time', type=_dt_convert, help='End time of massive netcdf file. Formats allowed:' +
"\'YYYY-MM-DDTHH:MM:SS\', \'YYYY-MM-DD\'")
parser.add_argument('-cs', '--chunk-size', type=int, help='chunk Size for the netCDF file')
parser.add_argument('-z', '--zlib', action='store_true', help='compress netCDF file with zlib')
argparser.add_argument('-i', '--interval', type=float,
help='Width of the interval to average input data over in minutes.'+
" If not specified, 1 is assumed. (Use 60 for one hour and 1440 for 1 day)")
argparser.add_argument('-cs', '--chunk-size', type=int, help='chunk Size for the netCDF file')
argparser.add_argument('-z', '--zlib', action='store_true', help='compress netCDF file with zlib')
parser.add_argument("input_files", nargs="+",
argparser.add_argument("input_files", nargs="+",
help="aoss_tower level_00 paths")
parser.add_argument('-o', '--output', required=True, nargs="+", help="filename pattern or filename. " +
argparser.add_argument('-o', '--output', required=True, nargs="+", help="filename pattern or filename. " +
"Should be along the lines of <filepath>/aoss_tower.YYYY-MM-DD.nc")
args = parser.parse_args()
args = argparser.parse_args()
levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
level=levels[min(3, args.verbosity)]
logging.basicConfig(level=level)
database = avg_database.AOSS_VARS if args.interval else parser.database
if(args.start_time and args.end_time):
result = createGiantNetCDF(args.start_time, args.end_time, args.input_files, args.output[0], args.zlib, args.chunk_size)
result = createGiantNetCDF(args.start_time, args.end_time, args.input_files, args.output[0], args.zlib, args.chunk_size,
args.interval, database)
if(result == False):
raise IOError('An empty ASCII file was found')
elif(args.start_time):
end_time = args.start_time.replace(hour=23, minute=59, second=59)
result = createGiantNetCDF(args.start_time, end_time, args.input_files, args.output[0], args.zlib, args.chunk_size)
result = createGiantNetCDF(args.start_time, end_time, args.input_files, args.output[0], args.zlib, args.chunk_size,
args.interval, database)
if(result == False):
raise IOError('An empty ASCII file was found')
......@@ -486,5 +515,6 @@ def main():
else:
createMultiple(args.input_files, args.output, args.zlib, args.chunk_size)
if __name__ == "__main__":
main()
File deleted
import platform
from netCDF4 import Dataset
from aosstower.l00 import parser
import calendar
import logging
from aosstower.level_b1.daily import nc
from datetime import datetime as dt
from collections import OrderedDict
import glob
import os
# CF Metadata 1.6 compliant structure for NetCDF file. This is essentially
......@@ -824,11 +829,58 @@ AOSS_VARS = OrderedDict({
})
def _dt_convert(date):
return None
return dt.strptime(date, '%Y-%m-%d')
def files_fr_directories(directories,file_ending='.ascii'):
"""gets a list of all raw data files in the given directories"""
files = []
for directory in directories:
files += glob.glob(os.path.join(directory,'*%s'%file_ending))
return files
def createGiantNetCDF(start, end, directories, output, zlib, chunk_size):
default = False
if(chunk_size):
chunksizes = [chunk_size]
else:
default = True
inputFiles = files_fr_directories(directories)
return False
frame = getData(inputFiles)
if(frame.empty):
return False
else:
frame = minuteAverages(frame)
if(start and end):
frame = frame[start.strftime('%Y-%m-%d %H:%M:%S'): end.strftime('%Y-%m-%d %H:%M:%S')]
if(default):
chunksizes = [len(list(frame.index))]
firstStamp = dt.strptime(str(list(frame.index)[0]), '%Y-%m-%d %H:%M:%S')
ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC')
ncFile = writeDimensions(ncFile)
ncFile = createVariables(ncFile, firstStamp, chunksizes, zlib)
ncFile.inputFiles = ', '.join(inputFiles)
ncFile = writeVars(ncFile, frame)
ncFile.close()
return True
def writeDimensions(ncFile):
for name, size in BASIC_STATION['dimensions'].items():
ncFile.createDimension(name, size)
......@@ -920,12 +972,13 @@ def main():
if(args.start_time and args.end_time):
result = createGiantNetCDF(args.start_time, args.end_time, args.input_files, args.output[0], args.zlib, args.chunk_size)
result = createGiantNetCDF(args.start_time, args.end_time, args.input_directories, args.output[0], args.zlib, args.chunk_size)
if(result == False):
raise IOError('An empty ASCII file was found')
elif(args.start_time):
end_time = args.start_time.replace(hour=23, minute=59, second=59)
last_day = calendar.monthrange(args.start_time.year, args.start_time.month)[1]
end_time = args.start_time.replace(day = last_day, hour=23, minute=59, second=59)
result = createGiantNetCDF(args.start_time, end_time, args.input_directories, args.output[0], args.zlib, args.chunk_size)
if(result == False):
raise IOError('An empty ASCII file was found')
......@@ -937,9 +990,4 @@ def main():
createMultiple(args.input_files, args.output, args.zlib, args.chunk_size)
if __name__ == "__main__":
# main()
ncFile = Dataset('aoss_tower.2013-06.SUMMARY.nc', 'w', format='NETCDF4_CLASSIC')
ncFile = writeDimensions(ncFile)
ncFile = createVariables(ncFile, dt(2003, 6, 1), [30], True)
ncFile.close()
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment