diff --git a/aosstower/level_a0/nc.py b/aosstower/level_a0/nc.py index c412df5aaec52a0063682bdcada3e72d35cd4096..3d75f18fb1e3a381728a7f83afdeed7fd9b4eb1d 100644 --- a/aosstower/level_a0/nc.py +++ b/aosstower/level_a0/nc.py @@ -16,10 +16,12 @@ LOG = logging.getLogger(__name__) # no parameters # no returns -def writeDimensions(ncFile, stamps): +def writeDimensions(ncFile): #ncFile.createDimension('time', len(stamps)) ncFile.createDimension('time', None) - ncFile.createDimension('strlen', None) + ncFile.createDimension('strlen', 256) + + print(ncFile) return ncFile @@ -53,9 +55,16 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib): for key in coordinates: attr = coordinates[key] - #create variable if(attr[1]): - variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=chunksizes) + if attr[1] == 'strlen': + if chunksizes[0] > 256: + variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=[256]) + + else: + variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=chunksizes) + + else: + variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=chunksizes) else: variable = ncFile.createVariable(key, attr[0], fill_value=attr[1], zlib=zlib, chunksizes=chunksizes) @@ -106,11 +115,18 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib): return ncFile -def getData(inputFiles): +def getData(inputFiles, no_empty): dictData = {} + + for filename in inputFiles: - for frame in parser.read_frames(filename): + getFrames = list(parser.read_frames(filename)) + + if(len(getFrames) == 0 and no_empty): + return [None, False] + + for frame in getFrames: if 'stamp' not in frame: continue @@ -119,7 +135,7 @@ def getData(inputFiles): dictData[stamp] = frame - return pd.DataFrame(dictData).transpose() + return [pd.DataFrame(dictData).transpose(), True] def writeVars(ncFile, frame): stamps = list(frame.index) @@ -183,41 +199,55 @@ def writeVars(ncFile, frame): # @param input filenames - list of filenames # @param output filename - filename of the netcdf file -def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize): +def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize, no_empty): chunksizes = [chunkSize] - frame = getData(inputFiles) + frame = getData(inputFiles, no_empty) - if(start and end): - frame = frame[start.strftime('%Y-%m-%d %H:%M:%S'): end.strftime('%Y-%m-%d %H:%M:%S')] + if(not frame[1]): + raise IOError('An empty ASCII file was found') - firstStamp = dt.strptime(str(list(frame.index)[0]), '%Y-%m-%d %H:%M:%S') + if(frame[0].empty): + ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC') + #ncfile = writeDimensions(ncFile) + + #if(start): + # ncFile = createVariables(ncFile, start, chunksizes, zlib) - if not outputName: - outputName = firstStamp.strftime('aoss_tower.%Y-%m-%d.nc') + #else: + # firstEmpty = inputFiles[0].split('/') + # filename = firstEmpty[len(firstEmpty) - 1] + # first = dt.strptime(filename, 'rig_tower.%Y-%m-%d.ascii') + # ncFile = createVariables(ncFile, first, chunksizes, zlib) - ncFile = Dataset(outputName, 'w') + ncFile.close() + + else: + frame = frame[0] + + if(start and end): + frame = frame[start.strftime('%Y-%m-%d %H:%M:%S'): end.strftime('%Y-%m-%d %H:%M:%S')] + + firstStamp = dt.strptime(str(list(frame.index)[0]), '%Y-%m-%d %H:%M:%S') + + ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC') - ncFile = writeDimensions(ncFile, list(frame.index)) + ncFile = writeDimensions(ncFile) - ncFile = createVariables(ncFile, firstStamp, chunksizes, zlib) + ncFile = createVariables(ncFile, firstStamp, chunksizes, zlib) - ncFile = writeVars(ncFile, frame) + ncFile = writeVars(ncFile, frame) - ncFile.close() + ncFile.close() -def createMultiple(filenames, outputFilenames, zlib, chunkSize): +def createMultiple(filenames, outputFilenames, zlib, chunkSize, no_empty): if(outputFilenames and len(filenames) != len(outputFilenames)): print('USAGE: number of output filenames must equal number of input filenames when start and end times are not specified') exit(0) for idx, filename in enumerate(filenames): - if(outputFilenames): - createGiantNetCDF(None, None, [filename], outputFilenames[idx], zlib, chunkSize) + createGiantNetCDF(None, None, [filename], outputFilenames[idx], zlib, chunkSize, no_empty) - else: - createGiantNetCDF(None, None, [filename], None, zlib, chunkSize) - #The purpose of this method is to take a string in the format # YYYY-mm-ddTHH:MM:SS and convert that to a datetime object # used in coordination with argparse -s and -e params @@ -244,11 +274,13 @@ def main(): parser.add_argument('-e', '--end-time', type=_dt_convert, help='End time of massive netcdf file') parser.add_argument('-cs', '--chunk-size', type=int, help='chunk Size for the netCDF file') parser.add_argument('-z', '--zlib', action='store_true', help='compress netCDF file with zlib') + parser.add_argument('--no-empty', '--no-empty', action='store_true', help='allow empty nc files or not,' + + ' if not and there is an empty file, an exception is raised') parser.add_argument("input_files", nargs="+", help="aoss_tower level_00 paths") - parser.add_argument('-o', '--output', nargs="+", help="filename pattern or filename. " + + parser.add_argument('-o', '--output', required=True, nargs="+", help="filename pattern or filename. " + "Should be along the lines of <filepath>/aoss_tower.YYYY-MM-DD.nc") args = parser.parse_args() @@ -256,10 +288,15 @@ def main(): level=levels[min(3, args.verbosity)] logging.basicConfig(level=level) + print(args) + if(args.start_time and args.end_time): - createGiantNetCDF(args.start_time, args.end_time, args.input_files, args.output, args.zlib, args.chunk_size) + createGiantNetCDF(args.start_time, args.end_time, args.input_files, args.output[0], args.zlib, args.chunk_size, args.no_empty) + + elif(args.start_time or args.end_time): + print('USAGE: start time and end time must both be specified or not specified') else: - createMultiple(args.input_files, args.output, args.zlib, args.chunk_size) + createMultiple(args.input_files, args.output, args.zlib, args.chunk_size, args.no_empty) if __name__ == "__main__": main()