import os import sys import logging import pandas as pd from datetime import datetime as dt from aosstower.l00 import parser from netCDF4 import Dataset import numpy as np import platform from aosstower import station LOG = logging.getLogger(__name__) # The purpose of this function is to write the dimensions # for the nc file # no parameters # no returns def writeDimensions(ncFile): #ncFile.createDimension('time', len(stamps)) ncFile.createDimension('time', None) ncFile.createDimension('strlen', 256) print(ncFile) return ncFile def createVariables(ncFile, firstStamp, chunksizes, zlib): #base_time long name btln = 'base time as unix timestamp' #base time units btu = 'seconds since 1970-01-01 00:00:00' #base time string bts = firstStamp.strftime('%Y-%m-%d 00:00:00Z') #time long name tln = 'time offset from midnight UTC' #time units tu = 'seconds since ' + firstStamp.strftime('%Y-%m-%d 00:00:00Z') coordinates = { #fields: type, dimension, fille, positive, valid_min, std_name, longname, units, valid_max, cf_role, axis 'lon': [np.float32, None, float(-999), '-180L', 'longitude', None, 'degrees_east', '180L', None], 'lat': [np.float32, None, float(-999), '-90L', 'latitude', None, 'degrees_north', '90L', None], 'alt': [np.float32, None, float(-999), None, 'height', 'vertical distance', 'm', None, None], 'base_time': [np.float32, None, float(-999), None, 'time', btln, btu, None, None], 'time': [np.float32, 'time', float(-999), None, 'time', tln, tu, None, None], 'station_name': ['c', 'strlen', '-', None, None, 'station name', None, None, 'timeseries_id'] } for key in coordinates: attr = coordinates[key] if(attr[1]): if attr[1] == 'strlen': if chunksizes[0] > 256: variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=[256]) else: variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=chunksizes) else: variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=chunksizes) else: variable = ncFile.createVariable(key, attr[0], fill_value=attr[1], zlib=zlib, chunksizes=chunksizes) #create var attributes if key == 'alt': variable.positive = 'up' variable.axis = 'Z' if(attr[3]): variable.valid_min = attr[3] variable.valid_max = attr[7] if(attr[4]): variable.standard_name = attr[4] if(attr[5]): variable.long_name = attr[5] if(attr[6]): variable.units = attr[6] if(attr[8]): variable.cf_role = attr[8] if key == 'base_time': variable.string = bts for entry in parser.database: if(entry == 'stamp'): continue varTup = parser.database[entry] variable = ncFile.createVariable(entry, np.float32, dimensions=('time'), fill_value=float(-999), zlib=zlib, chunksizes=chunksizes) variable.standard_name = varTup[1] variable.description = varTup[3] variable.units = varTup[4] #create global attributes ncFile.source = 'surface observation' ncFile.conventions = 'CF-1.6' ncFile.institution = 'UW SSEC' ncFile.featureType = 'timeSeries' #generate history ncFile.history = ' '.join(platform.uname()) + " " + os.path.basename(__file__) return ncFile def getData(inputFiles, no_empty): dictData = {} for filename in inputFiles: getFrames = list(parser.read_frames(filename)) if(len(getFrames) == 0 and no_empty): return [None, False] for frame in getFrames: if 'stamp' not in frame: continue stamp = frame['stamp'] del frame['stamp'] dictData[stamp] = frame return [pd.DataFrame(dictData).transpose(), True] def writeVars(ncFile, frame): stamps = list(frame.index) baseDTObj = dt.strptime(str(stamps[0]).split(' ')[0], '%Y-%m-%d') #find out how much time elapsed #since the origin to the start of the day #in seconds baseTimeValue = baseDTObj - dt(1970,1,1) baseTimeValue = baseTimeValue.total_seconds() #create time numpy timeNumpy = np.empty(len(stamps), dtype='float32') counter = 0 #write stamps in, yo for stamp in stamps: stampObj = dt.strptime(str(stamp), '%Y-%m-%d %H:%M:%S') timeValue = (stampObj - baseDTObj).total_seconds() timeNumpy[counter] = timeValue counter += 1 fileVar = ncFile.variables fileVar['base_time'].assignValue(baseTimeValue) fileVar['time'][:] = timeNumpy #write coordinate var values to file #alt might not be right, need to verify fileVar['lon'].assignValue(station.LONGITUDE) fileVar['lat'].assignValue(station.LATITUDE) fileVar['alt'].assignValue(328) #might change stationName = ("AOSS Tower") #transfer station name into array of chars statChars = list(stationName) statNumpy = np.asarray(statChars) #write station name to file fileVar['station_name'][0:len(statNumpy)] = statNumpy #writes data into file for varName in frame: dataList = frame[varName].tolist() dataArray = np.asarray(dataList) fileVar[varName][:] = dataArray return ncFile #The purpose of this method is to take a begin date, and end date # input filenames and output filename and create a netCDF file # based upon that # @param start time - a start datetime object # @param end time - an end datetime object # @param input filenames - list of filenames # @param output filename - filename of the netcdf file def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize, no_empty): chunksizes = [chunkSize] frame = getData(inputFiles, no_empty) if(not frame[1]): raise IOError('An empty ASCII file was found') if(frame[0].empty): ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC') #ncfile = writeDimensions(ncFile) #if(start): # ncFile = createVariables(ncFile, start, chunksizes, zlib) #else: # firstEmpty = inputFiles[0].split('/') # filename = firstEmpty[len(firstEmpty) - 1] # first = dt.strptime(filename, 'rig_tower.%Y-%m-%d.ascii') # ncFile = createVariables(ncFile, first, chunksizes, zlib) ncFile.close() else: frame = frame[0] if(start and end): frame = frame[start.strftime('%Y-%m-%d %H:%M:%S'): end.strftime('%Y-%m-%d %H:%M:%S')] firstStamp = dt.strptime(str(list(frame.index)[0]), '%Y-%m-%d %H:%M:%S') ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC') ncFile = writeDimensions(ncFile) ncFile = createVariables(ncFile, firstStamp, chunksizes, zlib) ncFile = writeVars(ncFile, frame) ncFile.close() def createMultiple(filenames, outputFilenames, zlib, chunkSize, no_empty): if(outputFilenames and len(filenames) != len(outputFilenames)): print('USAGE: number of output filenames must equal number of input filenames when start and end times are not specified') exit(0) for idx, filename in enumerate(filenames): createGiantNetCDF(None, None, [filename], outputFilenames[idx], zlib, chunkSize, no_empty) #The purpose of this method is to take a string in the format # YYYY-mm-ddTHH:MM:SS and convert that to a datetime object # used in coordination with argparse -s and -e params # @param datetime string # @return datetime object def _dt_convert(datetime_str): #parse datetime string, return datetime object return dt.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S') def main(): import argparse #argparse description parser = argparse.ArgumentParser(description="Convert level_00 aoss tower data to level_a0") #argparse verbosity info parser.add_argument('-v', '--verbose', action="count", default=int(os.environ.get("VERBOSITY", 2)), dest='verbosity', help='each occurrence increases verbosity 1 level through ERROR-WARNING-INFO-DEBUG (default INFO)') #argparse start and end times parser.add_argument('-s', '--start-time', type=_dt_convert, help="Start time of massive netcdf file") parser.add_argument('-e', '--end-time', type=_dt_convert, help='End time of massive netcdf file') parser.add_argument('-cs', '--chunk-size', type=int, help='chunk Size for the netCDF file') parser.add_argument('-z', '--zlib', action='store_true', help='compress netCDF file with zlib') parser.add_argument('--no-empty', '--no-empty', action='store_true', help='allow empty nc files or not,' + ' if not and there is an empty file, an exception is raised') parser.add_argument("input_files", nargs="+", help="aoss_tower level_00 paths") parser.add_argument('-o', '--output', required=True, nargs="+", help="filename pattern or filename. " + "Should be along the lines of <filepath>/aoss_tower.YYYY-MM-DD.nc") args = parser.parse_args() levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG] level=levels[min(3, args.verbosity)] logging.basicConfig(level=level) print(args) if(args.start_time and args.end_time): createGiantNetCDF(args.start_time, args.end_time, args.input_files, args.output[0], args.zlib, args.chunk_size, args.no_empty) elif(args.start_time or args.end_time): print('USAGE: start time and end time must both be specified or not specified') else: createMultiple(args.input_files, args.output, args.zlib, args.chunk_size, args.no_empty) if __name__ == "__main__": main()