From cf6ecdeb69fb0775e328b28cc9cd06950dac7c6d Mon Sep 17 00:00:00 2001
From: kgao <kenny.gao@ssec.wisc.edu>
Date: Tue, 9 Aug 2016 14:43:22 +0000
Subject: [PATCH] Added netcdf daily regen and fixed globals Added globals
software_version, data_level, data_stream, and input_filenames Added netcdf
daily regen grabs a netcdf file for each day, and goes through all ascii
files netcdf files are created at
/mnt/inst-data/regen_netcdf/aoss/tower/YYYY/MM
---
aosstower/level_a0/autoRun.bash | 7 ++
aosstower/level_a0/autoRun.py | 3 +
aosstower/level_a0/bundle.py | 119 ++++++++++++++++++++++++++++++
aosstower/level_a0/nc.py | 24 ++++--
aosstower/level_a0/test_cases.txt | 6 +-
5 files changed, 148 insertions(+), 11 deletions(-)
create mode 100755 aosstower/level_a0/autoRun.bash
create mode 100755 aosstower/level_a0/autoRun.py
create mode 100644 aosstower/level_a0/bundle.py
diff --git a/aosstower/level_a0/autoRun.bash b/aosstower/level_a0/autoRun.bash
new file mode 100755
index 0000000..fc7d48d
--- /dev/null
+++ b/aosstower/level_a0/autoRun.bash
@@ -0,0 +1,7 @@
+!/usr/bin/env bash
+PYENV=/home/kgao/env/
+SCRIPT=/home/kgao/Code/finished/Aoss_Tower_a1_Conversion/convertFromASCIIToNETCDF/autoRun.py
+(
+ flock -x -n 200 || exit $?
+ $PYENV/bin/python $SCRIPT
+) 200>"/tmp/$(basename $0).lock"
diff --git a/aosstower/level_a0/autoRun.py b/aosstower/level_a0/autoRun.py
new file mode 100755
index 0000000..0d15a20
--- /dev/null
+++ b/aosstower/level_a0/autoRun.py
@@ -0,0 +1,3 @@
+from bundle import createYesterdayFile
+
+createYesterdayFile()
diff --git a/aosstower/level_a0/bundle.py b/aosstower/level_a0/bundle.py
new file mode 100644
index 0000000..918af3d
--- /dev/null
+++ b/aosstower/level_a0/bundle.py
@@ -0,0 +1,119 @@
+from nc import getData, createGiantNetCDF, writeDimensions, createVariables, writeVars
+from datetime import datetime as dt
+from datetime import timedelta as delta
+import pandas as pd
+import time
+from netCDF4 import Dataset
+import os
+
+def createDirectory(stamp):
+ if os.path.exists("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m')):
+ os.chdir("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m'))
+
+ else:
+ os.makedirs("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m'))
+ os.chdir("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m'))
+
+
+def createFilename(date):
+ filename = '/mnt/inst-data/aoss-tower/'
+ filename += date.strftime('%Y/%m/')
+ filename += 'rig_tower.' + date.strftime('%Y-%m-%d') + '.ascii'
+
+ return filename
+
+def createNCFile(frame, filename):
+ firstStamp = dt.strptime(str(list(frame.index)[0]), '%Y-%m-%d %H:%M:%S')
+
+ createDirectory(firstStamp)
+
+ outputName = 'aoss_tower.' + firstStamp.strftime('%Y-%m-%d') + '.nc'
+
+ ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC')
+
+ ncFile = writeDimensions(ncFile)
+
+ ncFile = createVariables(ncFile, firstStamp, [len(list(frame.index))], True)
+
+ ncFile.inputFiles = ', '.join([filename])
+
+ ncFile = writeVars(ncFile, frame)
+
+ ncFile.close()
+
+def create_files(start_date, end_date):
+ # for each file's name based on date - does not rely on what dates are
+ # in each file so should be similar code for buoy
+ # get file, discover which full dates excluding the start are in it
+ # from there we create the netcdf files
+ # for the start, we always recreate the netcdf using the previous file
+ # ignore the last unfulfilled date
+
+ cur_dt = start_date
+
+ for day in range((end_date - start_date).days + 1):
+ filename = createFilename(cur_dt)
+ print('grabbing ' + filename)
+
+ df = getData([filename], False)[0]
+
+ if(df.empty):
+ cur_dt += delta(days = 1)
+ continue
+
+ DFList = []
+
+ for group in df.groupby(df.index.day):
+ DFList.append(group[1])
+
+
+ if(len(DFList) <= 1):
+ frame = DFList[0]
+
+ createNCFile(frame, filename)
+
+ elif(len(DFList) == 2):
+ if(len(list(DFList[0].index)) > len(list(DFList[1].index))):
+ frame = DFList[0]
+ createNCFile(frame, filename)
+
+ #get second frame
+ frame = DFList[1]
+
+ #get start
+ startString = str(frame.index[0])
+ startObj = dt.strptime(startString.split(" ")[0], '%Y-%m-%d')
+ createDirectory(startObj)
+
+ #get end
+ endObj = startObj.replace(hour=23, minute=59, second=59, microsecond=59)
+
+ #input file
+ inputFiles = []
+ inputFiles.append(filename)
+ inputFiles.append(createFilename(cur_dt - delta(days=1)))
+
+ #output filename
+ outputFilename = 'aoss_tower.' + startString.split(" ")[0] + '.nc'
+
+ #create file
+ createGiantNetCDF(startObj, endObj, inputFiles, outputFilename, True, None, False)
+
+ else:
+ print(len(DFList))
+
+ for frame in DFList:
+ print(len(list(frame.index)))
+ print(list(frame.index)[0])
+ exit(0)
+
+
+
+ #increment day
+ cur_dt += delta(days = 1)
+
+def createYesterdayFile():
+ create_files(dt.today() - delta(days=1), dt.today() - delta(days=1))
+
+#createYesterdayFile()
+#create_files(dt(2003,5,28), dt.today() - delta(days=1))
diff --git a/aosstower/level_a0/nc.py b/aosstower/level_a0/nc.py
index fc140d7..55f700a 100644
--- a/aosstower/level_a0/nc.py
+++ b/aosstower/level_a0/nc.py
@@ -19,7 +19,7 @@ LOG = logging.getLogger(__name__)
def writeDimensions(ncFile):
#ncFile.createDimension('time', len(stamps))
ncFile.createDimension('time', None)
- ncFile.createDimension('strlen', 256)
+ ncFile.createDimension('max_len_station_name', 32)
return ncFile
@@ -49,7 +49,7 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib):
'alt': [np.float32, None, float(-999), None, 'height', 'vertical distance', 'm', None, None],
'base_time': [np.float32, None, float(-999), None, 'time', btln, btu, None, None],
'time_offset': [np.float32, 'time', float(-999), None, 'time', tln, tu, None, None],
- 'station_name': ['c', 'strlen', '-', None, None, 'station name', None, None, 'timeseries_id'],
+ 'station_name': ['c', 'max_len_station_name', '-', None, None, 'station name', None, None, 'timeseries_id'],
'time': [np.float32, 'time', float(-999), None, None, "Time offset from epoch", "seconds since 1970-01-01 00:00:00Z", None, None, None]
}
@@ -57,9 +57,9 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib):
attr = coordinates[key]
if(attr[1]):
- if attr[1] == 'strlen':
- if (chunksizes) and chunksizes[0] > 256:
- variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=[256])
+ if attr[1] == 'max_len_station_name':
+ if (chunksizes) and chunksizes[0] > 32:
+ variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=[32])
else:
variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=chunksizes)
@@ -110,6 +110,9 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib):
ncFile.conventions = 'CF-1.6'
ncFile.institution = 'UW SSEC'
ncFile.featureType = 'timeSeries'
+ ncFile.data_level = 'a0'
+ ncFile.datastream = 'aoss.tower.nc.la0.v00'
+ ncFile.software_version = '00'
#generate history
ncFile.history = ' '.join(platform.uname()) + " " + os.path.basename(__file__)
@@ -119,8 +122,6 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib):
def getData(inputFiles, no_empty):
dictData = {}
-
-
for filename in inputFiles:
getFrames = list(parser.read_frames(filename))
@@ -202,11 +203,13 @@ def writeVars(ncFile, frame):
# @param output filename - filename of the netcdf file
def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize, no_empty):
+ default = False
+
if(chunkSize):
chunksizes = [chunkSize]
else:
- chunksizes = None
+ default = True
frame = getData(inputFiles, no_empty)
@@ -234,6 +237,9 @@ def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize, no_em
if(start and end):
frame = frame[start.strftime('%Y-%m-%d %H:%M:%S'): end.strftime('%Y-%m-%d %H:%M:%S')]
+ if(default):
+ chunksizes = [len(list(frame.index))]
+
firstStamp = dt.strptime(str(list(frame.index)[0]), '%Y-%m-%d %H:%M:%S')
ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC')
@@ -241,6 +247,8 @@ def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize, no_em
ncFile = writeDimensions(ncFile)
ncFile = createVariables(ncFile, firstStamp, chunksizes, zlib)
+
+ ncFile.inputFiles = ', '.join(inputFiles)
ncFile = writeVars(ncFile, frame)
diff --git a/aosstower/level_a0/test_cases.txt b/aosstower/level_a0/test_cases.txt
index 3048809..a78a2cb 100644
--- a/aosstower/level_a0/test_cases.txt
+++ b/aosstower/level_a0/test_cases.txt
@@ -1,6 +1,6 @@
Will write unit tests later, but for now, here are the commands
-time python nc.py /mnt/inst-data/aoss-tower/2015/06/rig_tower.2015-06-06.ascii -o aoss_tower.2015-06-06.nc -z -cs 17280
+time python nc.py /mnt/inst-data/aoss-tower/2015/06/rig_tower.2015-06-06.ascii -o aoss_tower.2015-06-06.nc -z --chunk-size 17280
-time python nc.py /mnt/inst-data/aoss-tower/2003/05/rig_tower.2003-05-28.ascii /mnt/inst-data/aoss-tower/2003/07/rig_tower.2003-07-27.ascii /mnt/inst-data/aoss-tower/2009/05/rig_tower.2009-05-28.ascii /mnt/inst-data/aoss-tower/2014/04/rig_tower.2014-04-01.ascii -o aoss_tower.2003-05-28.nc -z -cs 17280 -s 2003-05-28T00:00:00 -e 2014-04-01T23:59:59
+time python nc.py /mnt/inst-data/aoss-tower/2003/05/rig_tower.2003-05-28.ascii /mnt/inst-data/aoss-tower/2003/07/rig_tower.2003-07-27.ascii /mnt/inst-data/aoss-tower/2009/05/rig_tower.2009-05-28.ascii /mnt/inst-data/aoss-tower/2014/04/rig_tower.2014-04-01.ascii -o aoss_tower.2003-05-28.nc -z --chunk-size 17280 -s 2003-05-28T00:00:00 -e 2014-04-01T23:59:59
-time python nc.py /mnt/inst-data/aoss-tower/2003/05/rig_tower.2003-05-28.ascii /mnt/inst-data/aoss-tower/2003/07/rig_tower.2003-07-27.ascii /mnt/inst-data/aoss-tower/2009/05/rig_tower.2009-05-28.ascii /mnt/inst-data/aoss-tower/2014/04/rig_tower.2014-04-01.ascii -o aoss_tower.2003-05-28.nc aoss_tower.2003-07-28.nc aoss_tower.2009-05-28.nc aoss_tower.2014-04-01.nc -z -cs 17280
+time python nc.py /mnt/inst-data/aoss-tower/2003/05/rig_tower.2003-05-28.ascii /mnt/inst-data/aoss-tower/2003/07/rig_tower.2003-07-27.ascii /mnt/inst-data/aoss-tower/2009/05/rig_tower.2009-05-28.ascii /mnt/inst-data/aoss-tower/2014/04/rig_tower.2014-04-01.ascii -o aoss_tower.2003-05-28.nc aoss_tower.2003-07-28.nc aoss_tower.2009-05-28.nc aoss_tower.2014-04-01.nc -z --chunk-size 17280
--
GitLab