Skip to content
Snippets Groups Projects
nc.py 10 KiB
Newer Older
kgao's avatar
kgao committed
import os
import sys
import logging
import pandas as pd
from datetime import datetime as dt
from aosstower.l00 import parser
from netCDF4 import Dataset
import numpy as np 
import platform
from aosstower import station

LOG = logging.getLogger(__name__)

# The purpose of this function is to write the dimensions
# for the nc file
# no parameters
# no returns

kgao's avatar
kgao committed
def writeDimensions(ncFile):
kgao's avatar
kgao committed
    #ncFile.createDimension('time', len(stamps))
    ncFile.createDimension('time', None)
kgao's avatar
kgao committed
    ncFile.createDimension('strlen', 256)

    print(ncFile)
kgao's avatar
kgao committed

    return ncFile

kgao's avatar
kgao committed
def createVariables(ncFile, firstStamp, chunksizes, zlib):
 
kgao's avatar
kgao committed
    #base_time long name
    btln = 'base time as unix timestamp'

    #base time units
    btu = 'seconds since 1970-01-01 00:00:00'

    #base time string
    bts = firstStamp.strftime('%Y-%m-%d 00:00:00Z')

    #time long name
    tln = 'time offset from midnight UTC'

    #time units
    tu = 'seconds since ' + firstStamp.strftime('%Y-%m-%d 00:00:00Z')

    coordinates = {
                      #fields: type, dimension, fille, positive, valid_min, std_name, longname, units, valid_max, cf_role, axis
                      'lon': [np.float32, None, float(-999), '-180L', 'longitude', None, 'degrees_east', '180L', None],
                      'lat': [np.float32, None, float(-999), '-90L', 'latitude', None, 'degrees_north', '90L', None],
                      'alt': [np.float32, None, float(-999), None, 'height', 'vertical distance', 'm', None, None],
                      'base_time': [np.float32, None, float(-999), None, 'time', btln, btu, None, None],
                      'time': [np.float32, 'time', float(-999), None, 'time', tln, tu, None, None],
                      'station_name': ['c', 'strlen', '-', None, None, 'station name', None, None, 'timeseries_id']   
                  }

    for key in coordinates:
        attr = coordinates[key]

        if(attr[1]):
kgao's avatar
kgao committed
            if attr[1] == 'strlen':
                if chunksizes[0] > 256:
                    variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=[256])

                else:
                    variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=chunksizes)

            else:
                variable = ncFile.createVariable(key, attr[0], dimensions=(attr[1]), fill_value=attr[2], zlib=zlib, chunksizes=chunksizes)
kgao's avatar
kgao committed
        else:
kgao's avatar
kgao committed
            variable = ncFile.createVariable(key, attr[0], fill_value=attr[1], zlib=zlib, chunksizes=chunksizes)
kgao's avatar
kgao committed

        #create var attributes
        if key == 'alt':
            variable.positive = 'up'
            variable.axis = 'Z'

        if(attr[3]):
            variable.valid_min = attr[3]
            variable.valid_max = attr[7]

        if(attr[4]):
            variable.standard_name = attr[4]

        if(attr[5]):
            variable.long_name = attr[5]

        if(attr[6]):
            variable.units = attr[6]

        if(attr[8]):
             variable.cf_role = attr[8]

        if key == 'base_time':
            variable.string = bts

    for entry in parser.database:
        if(entry == 'stamp'):
            continue

        varTup = parser.database[entry]
        
        variable = ncFile.createVariable(entry, np.float32,
kgao's avatar
kgao committed
        dimensions=('time'), fill_value=float(-999), zlib=zlib, chunksizes=chunksizes)
kgao's avatar
kgao committed
        variable.standard_name = varTup[1]
        variable.description = varTup[3]
        variable.units = varTup[4]

    #create global attributes
    ncFile.source = 'surface observation'
    ncFile.conventions = 'CF-1.6'
    ncFile.institution = 'UW SSEC'
    ncFile.featureType = 'timeSeries'

    #generate history
    ncFile.history = ' '.join(platform.uname()) + " " + os.path.basename(__file__)
    
    return ncFile

kgao's avatar
kgao committed
def getData(inputFiles, no_empty):
kgao's avatar
kgao committed
    dictData = {}

kgao's avatar
kgao committed
    for filename in inputFiles:
kgao's avatar
kgao committed
        getFrames = list(parser.read_frames(filename))

        if(len(getFrames) == 0 and no_empty):
            return [None, False]

        for frame in getFrames:
kgao's avatar
kgao committed
            if 'stamp' not in frame:
                continue

            stamp = frame['stamp']
            del frame['stamp']

            dictData[stamp] = frame

kgao's avatar
kgao committed
    return [pd.DataFrame(dictData).transpose(), True]
kgao's avatar
kgao committed

def writeVars(ncFile, frame):
    stamps = list(frame.index)

    baseDTObj = dt.strptime(str(stamps[0]).split(' ')[0], '%Y-%m-%d')

    #find out how much time elapsed
    #since the origin to the start of the day
    #in seconds
    baseTimeValue = baseDTObj - dt(1970,1,1)
    baseTimeValue = baseTimeValue.total_seconds()

    #create time numpy
    timeNumpy = np.empty(len(stamps), dtype='float32')

    counter = 0

    #write stamps in, yo

    for stamp in stamps:
        stampObj = dt.strptime(str(stamp), '%Y-%m-%d %H:%M:%S')
        timeValue = (stampObj - baseDTObj).total_seconds()

        timeNumpy[counter] = timeValue
        counter += 1

    fileVar = ncFile.variables

    fileVar['base_time'].assignValue(baseTimeValue)
    fileVar['time'][:] = timeNumpy

    #write coordinate var values to file
    #alt might not be right, need to verify
    fileVar['lon'].assignValue(station.LONGITUDE)
    fileVar['lat'].assignValue(station.LATITUDE)
    fileVar['alt'].assignValue(328)

    #might change
    stationName = ("AOSS Tower")
    
    #transfer station name into array of chars
    statChars = list(stationName)
    statNumpy = np.asarray(statChars)

    #write station name to file
kgao's avatar
kgao committed
    fileVar['station_name'][0:len(statNumpy)] = statNumpy
kgao's avatar
kgao committed

    #writes data into file
    for varName in frame:
        dataList = frame[varName].tolist()
        dataArray = np.asarray(dataList)
        fileVar[varName][:] = dataArray
        
    return ncFile

#The purpose of this method is to take a begin date, and end date
# input filenames and output filename and create a netCDF file 
# based upon that
# @param start time - a start datetime object
# @param end time - an end datetime object
# @param input filenames - list of filenames
# @param output filename - filename of the netcdf file

kgao's avatar
kgao committed
def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize, no_empty):
kgao's avatar
kgao committed
    chunksizes = [chunkSize]

kgao's avatar
kgao committed
    frame = getData(inputFiles, no_empty)
kgao's avatar
kgao committed

kgao's avatar
kgao committed
    if(not frame[1]):
        raise IOError('An empty ASCII file was found')
kgao's avatar
kgao committed

kgao's avatar
kgao committed
    if(frame[0].empty):
        ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC')
        #ncfile = writeDimensions(ncFile)
        
        #if(start):
         #   ncFile = createVariables(ncFile, start, chunksizes, zlib)
kgao's avatar
kgao committed

kgao's avatar
kgao committed
        #else:
         #   firstEmpty = inputFiles[0].split('/')
          #  filename = firstEmpty[len(firstEmpty) - 1]
           # first = dt.strptime(filename, 'rig_tower.%Y-%m-%d.ascii')
           # ncFile = createVariables(ncFile, first, chunksizes, zlib)
kgao's avatar
kgao committed

kgao's avatar
kgao committed
        ncFile.close()
            
    else:
        frame = frame[0]

        if(start and end):
            frame = frame[start.strftime('%Y-%m-%d %H:%M:%S'): end.strftime('%Y-%m-%d %H:%M:%S')]

        firstStamp = dt.strptime(str(list(frame.index)[0]), '%Y-%m-%d %H:%M:%S')

        ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC')
kgao's avatar
kgao committed

kgao's avatar
kgao committed
        ncFile = writeDimensions(ncFile)
kgao's avatar
kgao committed

kgao's avatar
kgao committed
        ncFile = createVariables(ncFile, firstStamp, chunksizes, zlib)
kgao's avatar
kgao committed

kgao's avatar
kgao committed
        ncFile = writeVars(ncFile, frame)
kgao's avatar
kgao committed

kgao's avatar
kgao committed
        ncFile.close()
kgao's avatar
kgao committed
def createMultiple(filenames, outputFilenames, zlib, chunkSize, no_empty):
    if(outputFilenames and len(filenames) != len(outputFilenames)):
        print('USAGE: number of output filenames must equal number of input filenames when start and end times are not specified')
        exit(0)
    
    for idx, filename in enumerate(filenames):
kgao's avatar
kgao committed
        createGiantNetCDF(None, None, [filename], outputFilenames[idx], zlib, chunkSize, no_empty)
kgao's avatar
kgao committed
#The purpose of this method is to take a string in the format
# YYYY-mm-ddTHH:MM:SS and convert that to a datetime object
# used in coordination with argparse -s and -e params
# @param datetime string
# @return datetime object

def _dt_convert(datetime_str):
    #parse datetime string, return datetime object
    return dt.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S')
 
def main():
    import argparse

    #argparse description
    parser = argparse.ArgumentParser(description="Convert level_00 aoss tower data to level_a0")

    #argparse verbosity info
    parser.add_argument('-v', '--verbose', action="count", default=int(os.environ.get("VERBOSITY", 2)),
                         dest='verbosity',
                         help='each occurrence increases verbosity 1 level through ERROR-WARNING-INFO-DEBUG (default INFO)')

    #argparse start and end times
    parser.add_argument('-s', '--start-time', type=_dt_convert, help="Start time of massive netcdf file")
    parser.add_argument('-e', '--end-time', type=_dt_convert, help='End time of massive netcdf file')
kgao's avatar
kgao committed
    parser.add_argument('-cs', '--chunk-size', type=int, help='chunk Size for the netCDF file')
    parser.add_argument('-z', '--zlib', action='store_true', help='compress netCDF file with zlib')
kgao's avatar
kgao committed
    parser.add_argument('--no-empty', '--no-empty', action='store_true', help='allow empty nc files or not,' +
                         ' if not and there is an empty file, an exception is raised')
kgao's avatar
kgao committed

    parser.add_argument("input_files", nargs="+",
                         help="aoss_tower level_00 paths")

kgao's avatar
kgao committed
    parser.add_argument('-o', '--output', required=True, nargs="+", help="filename pattern or filename. " +
kgao's avatar
kgao committed
    "Should be along the lines of <filepath>/aoss_tower.YYYY-MM-DD.nc")
    args = parser.parse_args()

    levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG]
    level=levels[min(3, args.verbosity)]
    logging.basicConfig(level=level)

kgao's avatar
kgao committed
    print(args)

kgao's avatar
kgao committed
    if(args.start_time and args.end_time):
kgao's avatar
kgao committed
        createGiantNetCDF(args.start_time, args.end_time, args.input_files, args.output[0], args.zlib, args.chunk_size, args.no_empty)

    elif(args.start_time or args.end_time):
        print('USAGE: start time and end time must both be specified or not specified')
kgao's avatar
kgao committed

    else:
kgao's avatar
kgao committed
        createMultiple(args.input_files, args.output, args.zlib, args.chunk_size, args.no_empty)
kgao's avatar
kgao committed
if __name__ == "__main__":
    main()