from nc import getData, createGiantNetCDF, writeDimensions, createVariables, writeVars from datetime import datetime as dt from datetime import timedelta as delta import pandas as pd import time from netCDF4 import Dataset import os def writeBack(frame, filename, cur_dt): #get start startString = str(frame.index[0]) startObj = dt.strptime(startString.split(" ")[0], '%Y-%m-%d') createDirectory(startObj) #get end endObj = startObj.replace(hour=23, minute=59, second=59, microsecond=59) #input file inputFiles = [] inputFiles.append(filename) inputFiles.append(createFilename(cur_dt - delta(days=1))) #output filename outputFilename = 'aoss_tower.' + startString.split(" ")[0] + '.nc' #create file createGiantNetCDF(startObj, endObj, inputFiles, outputFilename, True, None, False) def createDirectory(stamp): if os.path.exists("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m')): os.chdir("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m')) else: os.makedirs("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m')) os.chdir("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m')) def createFilename(date): filename = '/mnt/inst-data/aoss-tower/' filename += date.strftime('%Y/%m/') filename += 'rig_tower.' + date.strftime('%Y-%m-%d') + '.ascii' return filename def createNCFile(frame, filename): firstStamp = dt.strptime(str(list(frame.index)[0]), '%Y-%m-%d %H:%M:%S') createDirectory(firstStamp) outputName = 'aoss_tower.' + firstStamp.strftime('%Y-%m-%d') + '.nc' ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC') ncFile = writeDimensions(ncFile) ncFile = createVariables(ncFile, firstStamp, [len(list(frame.index))], True) ncFile.inputFiles = ', '.join([filename]) ncFile = writeVars(ncFile, frame) ncFile.close() def create_files(start_date, end_date): # for each file's name based on date - does not rely on what dates are # in each file so should be similar code for buoy # get file, discover which full dates excluding the start are in it # from there we create the netcdf files # for the start, we always recreate the netcdf using the previous file # ignore the last unfulfilled date cur_dt = start_date for day in range((end_date - start_date).days + 1): filename = createFilename(cur_dt) print('grabbing ' + filename) df = getData([filename], False)[0] if(df.empty): cur_dt += delta(days = 1) continue DFList = [] for group in df.groupby(df.index.day): DFList.append(group[1]) if(len(DFList) <= 1): frame = DFList[0] createNCFile(frame, filename) elif(len(DFList) == 2): if(len(list(DFList[0].index)) > len(list(DFList[1].index))): frame = DFList[0] createNCFile(frame, filename) #get second frame frame = DFList[1] writeBack(frame, filename, cur_dt) else: frame = DFList[1] createNCFile(frame, filename) #get second frame frame = DFList[0] writeBack(frame, filename, cur_dt) else: print(len(DFList)) for frame in DFList: print(len(list(frame.index))) print(list(frame.index)[0]) exit(0) #increment day cur_dt += delta(days = 1) def createYesterdayFile(): create_files(dt.today() - delta(days=1), dt.today() - delta(days=1)) #createYesterdayFile() #create_files(dt(2003,5,28), dt.today() - delta(days=1))