diff --git a/aosstower/level_a0/autoRun.bash b/aosstower/level_a0/autoRun.bash new file mode 100755 index 0000000000000000000000000000000000000000..fc7d48d1ad37dee42afbdf8de8a9c5e887bbc3ce --- /dev/null +++ b/aosstower/level_a0/autoRun.bash @@ -0,0 +1,7 @@ +!/usr/bin/env bash +PYENV=/home/kgao/env/ +SCRIPT=/home/kgao/Code/finished/Aoss_Tower_a1_Conversion/convertFromASCIIToNETCDF/autoRun.py +( + flock -x -n 200 || exit $? + $PYENV/bin/python $SCRIPT +) 200>"/tmp/$(basename $0).lock" diff --git a/aosstower/level_a0/autoRun.py b/aosstower/level_a0/autoRun.py new file mode 100755 index 0000000000000000000000000000000000000000..0d15a20c65b5a587bb1f81cde94ba340c130db2f --- /dev/null +++ b/aosstower/level_a0/autoRun.py @@ -0,0 +1,3 @@ +from bundle import createYesterdayFile + +createYesterdayFile() diff --git a/aosstower/level_a0/bundle.py b/aosstower/level_a0/bundle.py new file mode 100644 index 0000000000000000000000000000000000000000..918af3de5acbd53b47b25547043a8f240062381d --- /dev/null +++ b/aosstower/level_a0/bundle.py @@ -0,0 +1,119 @@ +from nc import getData, createGiantNetCDF, writeDimensions, createVariables, writeVars +from datetime import datetime as dt +from datetime import timedelta as delta +import pandas as pd +import time +from netCDF4 import Dataset +import os + +def createDirectory(stamp): + if os.path.exists("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m')): + os.chdir("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m')) + + else: + os.makedirs("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m')) + os.chdir("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m')) + + +def createFilename(date): + filename = '/mnt/inst-data/aoss-tower/' + filename += date.strftime('%Y/%m/') + filename += 'rig_tower.' + date.strftime('%Y-%m-%d') + '.ascii' + + return filename + +def createNCFile(frame, filename): + firstStamp = dt.strptime(str(list(frame.index)[0]), '%Y-%m-%d %H:%M:%S') + + createDirectory(firstStamp) + + outputName = 'aoss_tower.' + firstStamp.strftime('%Y-%m-%d') + '.nc' + + ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC') + + ncFile = writeDimensions(ncFile) + + ncFile = createVariables(ncFile, firstStamp, [len(list(frame.index))], True) + + ncFile.inputFiles = ', '.join([filename]) + + ncFile = writeVars(ncFile, frame) + + ncFile.close() + +def create_files(start_date, end_date): + # for each file's name based on date - does not rely on what dates are + # in each file so should be similar code for buoy + # get file, discover which full dates excluding the start are in it + # from there we create the netcdf files + # for the start, we always recreate the netcdf using the previous file + # ignore the last unfulfilled date + + cur_dt = start_date + + for day in range((end_date - start_date).days + 1): + filename = createFilename(cur_dt) + print('grabbing ' + filename) + + df = getData([filename], False)[0] + + if(df.empty): + cur_dt += delta(days = 1) + continue + + DFList = [] + + for group in df.groupby(df.index.day): + DFList.append(group[1]) + + + if(len(DFList) <= 1): + frame = DFList[0] + + createNCFile(frame, filename) + + elif(len(DFList) == 2): + if(len(list(DFList[0].index)) > len(list(DFList[1].index))): + frame = DFList[0] + createNCFile(frame, filename) + + #get second frame + frame = DFList[1] + + #get start + startString = str(frame.index[0]) + startObj = dt.strptime(startString.split(" ")[0], '%Y-%m-%d') + createDirectory(startObj) + + #get end + endObj = startObj.replace(hour=23, minute=59, second=59, microsecond=59) + + #input file + inputFiles = [] + inputFiles.append(filename) + inputFiles.append(createFilename(cur_dt - delta(days=1))) + + #output filename + outputFilename = 'aoss_tower.' + startString.split(" ")[0] + '.nc' + + #create file + createGiantNetCDF(startObj, endObj, inputFiles, outputFilename, True, None, False) + + else: + print(len(DFList)) + + for frame in DFList: + print(len(list(frame.index))) + print(list(frame.index)[0]) + exit(0) + + + + #increment day + cur_dt += delta(days = 1) + +def createYesterdayFile(): + create_files(dt.today() - delta(days=1), dt.today() - delta(days=1)) + +#createYesterdayFile() +#create_files(dt(2003,5,28), dt.today() - delta(days=1)) diff --git a/aosstower/level_a0/nc.py b/aosstower/level_a0/nc.py index fc140d72117923222595248412b2a7ec53adcfd7..55f700a1464645039476ee405b3551dfd28f1086 100644 --- a/aosstower/level_a0/nc.py +++ b/aosstower/level_a0/nc.py @@ -19,7 +19,7 @@ LOG = logging.getLogger(__name__) def writeDimensions(ncFile): #ncFile.createDimension('time', len(stamps)) ncFile.createDimension('time', None) - ncFile.createDimension('strlen', 256) + ncFile.createDimension('max_len_station_name', 32) return ncFile @@ -49,7 +49,7 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib): 'alt': [np.float32, None, float(-999), None, 'height', 'vertical distance', 'm', None, None], 'base_time': [np.float32, None, float(-999), None, 'time', btln, btu, None, None], 'time_offset': [np.float32, 'time', float(-999), None, 'time', tln, tu, None, None], - 'station_name': ['c', 'strlen', '-', None, None, 'station name', None, None, 'timeseries_id'], + 'station_name': ['c', 'max_len_station_name', '-', None, None, 'station name', None, None, 'timeseries_id'], 'time': [np.float32, 'time', float(-999), None, None, "Time offset from epoch", "seconds since 1970-01-01 00:00:00Z", None, None, None] } @@ -57,9 +57,9 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib): attr = coordinates[key] if(attr[1]): - if attr[1] == 'strlen': - if (chunksizes) and chunksizes[0] > 256: - variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=[256]) + if attr[1] == 'max_len_station_name': + if (chunksizes) and chunksizes[0] > 32: + variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=[32]) else: variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=chunksizes) @@ -110,6 +110,9 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib): ncFile.conventions = 'CF-1.6' ncFile.institution = 'UW SSEC' ncFile.featureType = 'timeSeries' + ncFile.data_level = 'a0' + ncFile.datastream = 'aoss.tower.nc.la0.v00' + ncFile.software_version = '00' #generate history ncFile.history = ' '.join(platform.uname()) + " " + os.path.basename(__file__) @@ -119,8 +122,6 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib): def getData(inputFiles, no_empty): dictData = {} - - for filename in inputFiles: getFrames = list(parser.read_frames(filename)) @@ -202,11 +203,13 @@ def writeVars(ncFile, frame): # @param output filename - filename of the netcdf file def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize, no_empty): + default = False + if(chunkSize): chunksizes = [chunkSize] else: - chunksizes = None + default = True frame = getData(inputFiles, no_empty) @@ -234,6 +237,9 @@ def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize, no_em if(start and end): frame = frame[start.strftime('%Y-%m-%d %H:%M:%S'): end.strftime('%Y-%m-%d %H:%M:%S')] + if(default): + chunksizes = [len(list(frame.index))] + firstStamp = dt.strptime(str(list(frame.index)[0]), '%Y-%m-%d %H:%M:%S') ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC') @@ -241,6 +247,8 @@ def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize, no_em ncFile = writeDimensions(ncFile) ncFile = createVariables(ncFile, firstStamp, chunksizes, zlib) + + ncFile.inputFiles = ', '.join(inputFiles) ncFile = writeVars(ncFile, frame) diff --git a/aosstower/level_a0/test_cases.txt b/aosstower/level_a0/test_cases.txt index 304880947b5ec35ecb311b87495d219e3b722baf..a78a2cbad80017ad6ea7409bd2c3c46e0f41192e 100644 --- a/aosstower/level_a0/test_cases.txt +++ b/aosstower/level_a0/test_cases.txt @@ -1,6 +1,6 @@ Will write unit tests later, but for now, here are the commands -time python nc.py /mnt/inst-data/aoss-tower/2015/06/rig_tower.2015-06-06.ascii -o aoss_tower.2015-06-06.nc -z -cs 17280 +time python nc.py /mnt/inst-data/aoss-tower/2015/06/rig_tower.2015-06-06.ascii -o aoss_tower.2015-06-06.nc -z --chunk-size 17280 -time python nc.py /mnt/inst-data/aoss-tower/2003/05/rig_tower.2003-05-28.ascii /mnt/inst-data/aoss-tower/2003/07/rig_tower.2003-07-27.ascii /mnt/inst-data/aoss-tower/2009/05/rig_tower.2009-05-28.ascii /mnt/inst-data/aoss-tower/2014/04/rig_tower.2014-04-01.ascii -o aoss_tower.2003-05-28.nc -z -cs 17280 -s 2003-05-28T00:00:00 -e 2014-04-01T23:59:59 +time python nc.py /mnt/inst-data/aoss-tower/2003/05/rig_tower.2003-05-28.ascii /mnt/inst-data/aoss-tower/2003/07/rig_tower.2003-07-27.ascii /mnt/inst-data/aoss-tower/2009/05/rig_tower.2009-05-28.ascii /mnt/inst-data/aoss-tower/2014/04/rig_tower.2014-04-01.ascii -o aoss_tower.2003-05-28.nc -z --chunk-size 17280 -s 2003-05-28T00:00:00 -e 2014-04-01T23:59:59 -time python nc.py /mnt/inst-data/aoss-tower/2003/05/rig_tower.2003-05-28.ascii /mnt/inst-data/aoss-tower/2003/07/rig_tower.2003-07-27.ascii /mnt/inst-data/aoss-tower/2009/05/rig_tower.2009-05-28.ascii /mnt/inst-data/aoss-tower/2014/04/rig_tower.2014-04-01.ascii -o aoss_tower.2003-05-28.nc aoss_tower.2003-07-28.nc aoss_tower.2009-05-28.nc aoss_tower.2014-04-01.nc -z -cs 17280 +time python nc.py /mnt/inst-data/aoss-tower/2003/05/rig_tower.2003-05-28.ascii /mnt/inst-data/aoss-tower/2003/07/rig_tower.2003-07-27.ascii /mnt/inst-data/aoss-tower/2009/05/rig_tower.2009-05-28.ascii /mnt/inst-data/aoss-tower/2014/04/rig_tower.2014-04-01.ascii -o aoss_tower.2003-05-28.nc aoss_tower.2003-07-28.nc aoss_tower.2009-05-28.nc aoss_tower.2014-04-01.nc -z --chunk-size 17280