Skip to content
Snippets Groups Projects
Commit c0418f94 authored by kgao's avatar kgao
Browse files

Got MFdataset to work

Changed netcdf4 file format to netcdf4_CLASSIC
changed strlen from unlimited to 256
added a no-empty flag
made -o flag required
parent 2f9e69d0
No related branches found
No related tags found
No related merge requests found
...@@ -16,10 +16,12 @@ LOG = logging.getLogger(__name__) ...@@ -16,10 +16,12 @@ LOG = logging.getLogger(__name__)
# no parameters # no parameters
# no returns # no returns
def writeDimensions(ncFile, stamps): def writeDimensions(ncFile):
#ncFile.createDimension('time', len(stamps)) #ncFile.createDimension('time', len(stamps))
ncFile.createDimension('time', None) ncFile.createDimension('time', None)
ncFile.createDimension('strlen', None) ncFile.createDimension('strlen', 256)
print(ncFile)
return ncFile return ncFile
...@@ -53,9 +55,16 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib): ...@@ -53,9 +55,16 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib):
for key in coordinates: for key in coordinates:
attr = coordinates[key] attr = coordinates[key]
#create variable
if(attr[1]): if(attr[1]):
variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=chunksizes) if attr[1] == 'strlen':
if chunksizes[0] > 256:
variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=[256])
else:
variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=chunksizes)
else:
variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=chunksizes)
else: else:
variable = ncFile.createVariable(key, attr[0], fill_value=attr[1], zlib=zlib, chunksizes=chunksizes) variable = ncFile.createVariable(key, attr[0], fill_value=attr[1], zlib=zlib, chunksizes=chunksizes)
...@@ -106,11 +115,18 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib): ...@@ -106,11 +115,18 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib):
return ncFile return ncFile
def getData(inputFiles): def getData(inputFiles, no_empty):
dictData = {} dictData = {}
for filename in inputFiles: for filename in inputFiles:
for frame in parser.read_frames(filename): getFrames = list(parser.read_frames(filename))
if(len(getFrames) == 0 and no_empty):
return [None, False]
for frame in getFrames:
if 'stamp' not in frame: if 'stamp' not in frame:
continue continue
...@@ -119,7 +135,7 @@ def getData(inputFiles): ...@@ -119,7 +135,7 @@ def getData(inputFiles):
dictData[stamp] = frame dictData[stamp] = frame
return pd.DataFrame(dictData).transpose() return [pd.DataFrame(dictData).transpose(), True]
def writeVars(ncFile, frame): def writeVars(ncFile, frame):
stamps = list(frame.index) stamps = list(frame.index)
...@@ -183,41 +199,55 @@ def writeVars(ncFile, frame): ...@@ -183,41 +199,55 @@ def writeVars(ncFile, frame):
# @param input filenames - list of filenames # @param input filenames - list of filenames
# @param output filename - filename of the netcdf file # @param output filename - filename of the netcdf file
def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize): def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize, no_empty):
chunksizes = [chunkSize] chunksizes = [chunkSize]
frame = getData(inputFiles) frame = getData(inputFiles, no_empty)
if(start and end): if(not frame[1]):
frame = frame[start.strftime('%Y-%m-%d %H:%M:%S'): end.strftime('%Y-%m-%d %H:%M:%S')] raise IOError('An empty ASCII file was found')
firstStamp = dt.strptime(str(list(frame.index)[0]), '%Y-%m-%d %H:%M:%S') if(frame[0].empty):
ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC')
#ncfile = writeDimensions(ncFile)
#if(start):
# ncFile = createVariables(ncFile, start, chunksizes, zlib)
if not outputName: #else:
outputName = firstStamp.strftime('aoss_tower.%Y-%m-%d.nc') # firstEmpty = inputFiles[0].split('/')
# filename = firstEmpty[len(firstEmpty) - 1]
# first = dt.strptime(filename, 'rig_tower.%Y-%m-%d.ascii')
# ncFile = createVariables(ncFile, first, chunksizes, zlib)
ncFile = Dataset(outputName, 'w') ncFile.close()
else:
frame = frame[0]
if(start and end):
frame = frame[start.strftime('%Y-%m-%d %H:%M:%S'): end.strftime('%Y-%m-%d %H:%M:%S')]
firstStamp = dt.strptime(str(list(frame.index)[0]), '%Y-%m-%d %H:%M:%S')
ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC')
ncFile = writeDimensions(ncFile, list(frame.index)) ncFile = writeDimensions(ncFile)
ncFile = createVariables(ncFile, firstStamp, chunksizes, zlib) ncFile = createVariables(ncFile, firstStamp, chunksizes, zlib)
ncFile = writeVars(ncFile, frame) ncFile = writeVars(ncFile, frame)
ncFile.close() ncFile.close()
def createMultiple(filenames, outputFilenames, zlib, chunkSize): def createMultiple(filenames, outputFilenames, zlib, chunkSize, no_empty):
if(outputFilenames and len(filenames) != len(outputFilenames)): if(outputFilenames and len(filenames) != len(outputFilenames)):
print('USAGE: number of output filenames must equal number of input filenames when start and end times are not specified') print('USAGE: number of output filenames must equal number of input filenames when start and end times are not specified')
exit(0) exit(0)
for idx, filename in enumerate(filenames): for idx, filename in enumerate(filenames):
if(outputFilenames): createGiantNetCDF(None, None, [filename], outputFilenames[idx], zlib, chunkSize, no_empty)
createGiantNetCDF(None, None, [filename], outputFilenames[idx], zlib, chunkSize)
else:
createGiantNetCDF(None, None, [filename], None, zlib, chunkSize)
#The purpose of this method is to take a string in the format #The purpose of this method is to take a string in the format
# YYYY-mm-ddTHH:MM:SS and convert that to a datetime object # YYYY-mm-ddTHH:MM:SS and convert that to a datetime object
# used in coordination with argparse -s and -e params # used in coordination with argparse -s and -e params
...@@ -244,11 +274,13 @@ def main(): ...@@ -244,11 +274,13 @@ def main():
parser.add_argument('-e', '--end-time', type=_dt_convert, help='End time of massive netcdf file') parser.add_argument('-e', '--end-time', type=_dt_convert, help='End time of massive netcdf file')
parser.add_argument('-cs', '--chunk-size', type=int, help='chunk Size for the netCDF file') parser.add_argument('-cs', '--chunk-size', type=int, help='chunk Size for the netCDF file')
parser.add_argument('-z', '--zlib', action='store_true', help='compress netCDF file with zlib') parser.add_argument('-z', '--zlib', action='store_true', help='compress netCDF file with zlib')
parser.add_argument('--no-empty', '--no-empty', action='store_true', help='allow empty nc files or not,' +
' if not and there is an empty file, an exception is raised')
parser.add_argument("input_files", nargs="+", parser.add_argument("input_files", nargs="+",
help="aoss_tower level_00 paths") help="aoss_tower level_00 paths")
parser.add_argument('-o', '--output', nargs="+", help="filename pattern or filename. " + parser.add_argument('-o', '--output', required=True, nargs="+", help="filename pattern or filename. " +
"Should be along the lines of <filepath>/aoss_tower.YYYY-MM-DD.nc") "Should be along the lines of <filepath>/aoss_tower.YYYY-MM-DD.nc")
args = parser.parse_args() args = parser.parse_args()
...@@ -256,10 +288,15 @@ def main(): ...@@ -256,10 +288,15 @@ def main():
level=levels[min(3, args.verbosity)] level=levels[min(3, args.verbosity)]
logging.basicConfig(level=level) logging.basicConfig(level=level)
print(args)
if(args.start_time and args.end_time): if(args.start_time and args.end_time):
createGiantNetCDF(args.start_time, args.end_time, args.input_files, args.output, args.zlib, args.chunk_size) createGiantNetCDF(args.start_time, args.end_time, args.input_files, args.output[0], args.zlib, args.chunk_size, args.no_empty)
elif(args.start_time or args.end_time):
print('USAGE: start time and end time must both be specified or not specified')
else: else:
createMultiple(args.input_files, args.output, args.zlib, args.chunk_size) createMultiple(args.input_files, args.output, args.zlib, args.chunk_size, args.no_empty)
if __name__ == "__main__": if __name__ == "__main__":
main() main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment