Skip to content
Snippets Groups Projects
Commit cf6ecdeb authored by kgao's avatar kgao
Browse files

Added netcdf daily regen and fixed globals

Added globals software_version, data_level, data_stream, and input_filenames
Added netcdf daily regen
grabs a netcdf file for each day, and goes through all ascii files
netcdf files are created at /mnt/inst-data/regen_netcdf/aoss/tower/YYYY/MM
parent d30a7365
No related branches found
No related tags found
No related merge requests found
!/usr/bin/env bash
PYENV=/home/kgao/env/
SCRIPT=/home/kgao/Code/finished/Aoss_Tower_a1_Conversion/convertFromASCIIToNETCDF/autoRun.py
(
flock -x -n 200 || exit $?
$PYENV/bin/python $SCRIPT
) 200>"/tmp/$(basename $0).lock"
from bundle import createYesterdayFile
createYesterdayFile()
from nc import getData, createGiantNetCDF, writeDimensions, createVariables, writeVars
from datetime import datetime as dt
from datetime import timedelta as delta
import pandas as pd
import time
from netCDF4 import Dataset
import os
def createDirectory(stamp):
if os.path.exists("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m')):
os.chdir("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m'))
else:
os.makedirs("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m'))
os.chdir("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m'))
def createFilename(date):
filename = '/mnt/inst-data/aoss-tower/'
filename += date.strftime('%Y/%m/')
filename += 'rig_tower.' + date.strftime('%Y-%m-%d') + '.ascii'
return filename
def createNCFile(frame, filename):
firstStamp = dt.strptime(str(list(frame.index)[0]), '%Y-%m-%d %H:%M:%S')
createDirectory(firstStamp)
outputName = 'aoss_tower.' + firstStamp.strftime('%Y-%m-%d') + '.nc'
ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC')
ncFile = writeDimensions(ncFile)
ncFile = createVariables(ncFile, firstStamp, [len(list(frame.index))], True)
ncFile.inputFiles = ', '.join([filename])
ncFile = writeVars(ncFile, frame)
ncFile.close()
def create_files(start_date, end_date):
# for each file's name based on date - does not rely on what dates are
# in each file so should be similar code for buoy
# get file, discover which full dates excluding the start are in it
# from there we create the netcdf files
# for the start, we always recreate the netcdf using the previous file
# ignore the last unfulfilled date
cur_dt = start_date
for day in range((end_date - start_date).days + 1):
filename = createFilename(cur_dt)
print('grabbing ' + filename)
df = getData([filename], False)[0]
if(df.empty):
cur_dt += delta(days = 1)
continue
DFList = []
for group in df.groupby(df.index.day):
DFList.append(group[1])
if(len(DFList) <= 1):
frame = DFList[0]
createNCFile(frame, filename)
elif(len(DFList) == 2):
if(len(list(DFList[0].index)) > len(list(DFList[1].index))):
frame = DFList[0]
createNCFile(frame, filename)
#get second frame
frame = DFList[1]
#get start
startString = str(frame.index[0])
startObj = dt.strptime(startString.split(" ")[0], '%Y-%m-%d')
createDirectory(startObj)
#get end
endObj = startObj.replace(hour=23, minute=59, second=59, microsecond=59)
#input file
inputFiles = []
inputFiles.append(filename)
inputFiles.append(createFilename(cur_dt - delta(days=1)))
#output filename
outputFilename = 'aoss_tower.' + startString.split(" ")[0] + '.nc'
#create file
createGiantNetCDF(startObj, endObj, inputFiles, outputFilename, True, None, False)
else:
print(len(DFList))
for frame in DFList:
print(len(list(frame.index)))
print(list(frame.index)[0])
exit(0)
#increment day
cur_dt += delta(days = 1)
def createYesterdayFile():
create_files(dt.today() - delta(days=1), dt.today() - delta(days=1))
#createYesterdayFile()
#create_files(dt(2003,5,28), dt.today() - delta(days=1))
...@@ -19,7 +19,7 @@ LOG = logging.getLogger(__name__) ...@@ -19,7 +19,7 @@ LOG = logging.getLogger(__name__)
def writeDimensions(ncFile): def writeDimensions(ncFile):
#ncFile.createDimension('time', len(stamps)) #ncFile.createDimension('time', len(stamps))
ncFile.createDimension('time', None) ncFile.createDimension('time', None)
ncFile.createDimension('strlen', 256) ncFile.createDimension('max_len_station_name', 32)
return ncFile return ncFile
...@@ -49,7 +49,7 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib): ...@@ -49,7 +49,7 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib):
'alt': [np.float32, None, float(-999), None, 'height', 'vertical distance', 'm', None, None], 'alt': [np.float32, None, float(-999), None, 'height', 'vertical distance', 'm', None, None],
'base_time': [np.float32, None, float(-999), None, 'time', btln, btu, None, None], 'base_time': [np.float32, None, float(-999), None, 'time', btln, btu, None, None],
'time_offset': [np.float32, 'time', float(-999), None, 'time', tln, tu, None, None], 'time_offset': [np.float32, 'time', float(-999), None, 'time', tln, tu, None, None],
'station_name': ['c', 'strlen', '-', None, None, 'station name', None, None, 'timeseries_id'], 'station_name': ['c', 'max_len_station_name', '-', None, None, 'station name', None, None, 'timeseries_id'],
'time': [np.float32, 'time', float(-999), None, None, "Time offset from epoch", "seconds since 1970-01-01 00:00:00Z", None, None, None] 'time': [np.float32, 'time', float(-999), None, None, "Time offset from epoch", "seconds since 1970-01-01 00:00:00Z", None, None, None]
} }
...@@ -57,9 +57,9 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib): ...@@ -57,9 +57,9 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib):
attr = coordinates[key] attr = coordinates[key]
if(attr[1]): if(attr[1]):
if attr[1] == 'strlen': if attr[1] == 'max_len_station_name':
if (chunksizes) and chunksizes[0] > 256: if (chunksizes) and chunksizes[0] > 32:
variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=[256]) variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=[32])
else: else:
variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=chunksizes) variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=chunksizes)
...@@ -110,6 +110,9 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib): ...@@ -110,6 +110,9 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib):
ncFile.conventions = 'CF-1.6' ncFile.conventions = 'CF-1.6'
ncFile.institution = 'UW SSEC' ncFile.institution = 'UW SSEC'
ncFile.featureType = 'timeSeries' ncFile.featureType = 'timeSeries'
ncFile.data_level = 'a0'
ncFile.datastream = 'aoss.tower.nc.la0.v00'
ncFile.software_version = '00'
#generate history #generate history
ncFile.history = ' '.join(platform.uname()) + " " + os.path.basename(__file__) ncFile.history = ' '.join(platform.uname()) + " " + os.path.basename(__file__)
...@@ -119,8 +122,6 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib): ...@@ -119,8 +122,6 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib):
def getData(inputFiles, no_empty): def getData(inputFiles, no_empty):
dictData = {} dictData = {}
for filename in inputFiles: for filename in inputFiles:
getFrames = list(parser.read_frames(filename)) getFrames = list(parser.read_frames(filename))
...@@ -202,11 +203,13 @@ def writeVars(ncFile, frame): ...@@ -202,11 +203,13 @@ def writeVars(ncFile, frame):
# @param output filename - filename of the netcdf file # @param output filename - filename of the netcdf file
def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize, no_empty): def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize, no_empty):
default = False
if(chunkSize): if(chunkSize):
chunksizes = [chunkSize] chunksizes = [chunkSize]
else: else:
chunksizes = None default = True
frame = getData(inputFiles, no_empty) frame = getData(inputFiles, no_empty)
...@@ -234,6 +237,9 @@ def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize, no_em ...@@ -234,6 +237,9 @@ def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize, no_em
if(start and end): if(start and end):
frame = frame[start.strftime('%Y-%m-%d %H:%M:%S'): end.strftime('%Y-%m-%d %H:%M:%S')] frame = frame[start.strftime('%Y-%m-%d %H:%M:%S'): end.strftime('%Y-%m-%d %H:%M:%S')]
if(default):
chunksizes = [len(list(frame.index))]
firstStamp = dt.strptime(str(list(frame.index)[0]), '%Y-%m-%d %H:%M:%S') firstStamp = dt.strptime(str(list(frame.index)[0]), '%Y-%m-%d %H:%M:%S')
ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC') ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC')
...@@ -241,6 +247,8 @@ def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize, no_em ...@@ -241,6 +247,8 @@ def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize, no_em
ncFile = writeDimensions(ncFile) ncFile = writeDimensions(ncFile)
ncFile = createVariables(ncFile, firstStamp, chunksizes, zlib) ncFile = createVariables(ncFile, firstStamp, chunksizes, zlib)
ncFile.inputFiles = ', '.join(inputFiles)
ncFile = writeVars(ncFile, frame) ncFile = writeVars(ncFile, frame)
......
Will write unit tests later, but for now, here are the commands Will write unit tests later, but for now, here are the commands
time python nc.py /mnt/inst-data/aoss-tower/2015/06/rig_tower.2015-06-06.ascii -o aoss_tower.2015-06-06.nc -z -cs 17280 time python nc.py /mnt/inst-data/aoss-tower/2015/06/rig_tower.2015-06-06.ascii -o aoss_tower.2015-06-06.nc -z --chunk-size 17280
time python nc.py /mnt/inst-data/aoss-tower/2003/05/rig_tower.2003-05-28.ascii /mnt/inst-data/aoss-tower/2003/07/rig_tower.2003-07-27.ascii /mnt/inst-data/aoss-tower/2009/05/rig_tower.2009-05-28.ascii /mnt/inst-data/aoss-tower/2014/04/rig_tower.2014-04-01.ascii -o aoss_tower.2003-05-28.nc -z -cs 17280 -s 2003-05-28T00:00:00 -e 2014-04-01T23:59:59 time python nc.py /mnt/inst-data/aoss-tower/2003/05/rig_tower.2003-05-28.ascii /mnt/inst-data/aoss-tower/2003/07/rig_tower.2003-07-27.ascii /mnt/inst-data/aoss-tower/2009/05/rig_tower.2009-05-28.ascii /mnt/inst-data/aoss-tower/2014/04/rig_tower.2014-04-01.ascii -o aoss_tower.2003-05-28.nc -z --chunk-size 17280 -s 2003-05-28T00:00:00 -e 2014-04-01T23:59:59
time python nc.py /mnt/inst-data/aoss-tower/2003/05/rig_tower.2003-05-28.ascii /mnt/inst-data/aoss-tower/2003/07/rig_tower.2003-07-27.ascii /mnt/inst-data/aoss-tower/2009/05/rig_tower.2009-05-28.ascii /mnt/inst-data/aoss-tower/2014/04/rig_tower.2014-04-01.ascii -o aoss_tower.2003-05-28.nc aoss_tower.2003-07-28.nc aoss_tower.2009-05-28.nc aoss_tower.2014-04-01.nc -z -cs 17280 time python nc.py /mnt/inst-data/aoss-tower/2003/05/rig_tower.2003-05-28.ascii /mnt/inst-data/aoss-tower/2003/07/rig_tower.2003-07-27.ascii /mnt/inst-data/aoss-tower/2009/05/rig_tower.2009-05-28.ascii /mnt/inst-data/aoss-tower/2014/04/rig_tower.2014-04-01.ascii -o aoss_tower.2003-05-28.nc aoss_tower.2003-07-28.nc aoss_tower.2009-05-28.nc aoss_tower.2014-04-01.nc -z --chunk-size 17280
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment