diff --git a/aosstower/level_b1/__init__.py b/aosstower/level_b1/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/aosstower/level_b1/daily/autoRun.py b/aosstower/level_b1/daily/autoRun.py deleted file mode 100755 index 0d15a20c65b5a587bb1f81cde94ba340c130db2f..0000000000000000000000000000000000000000 --- a/aosstower/level_b1/daily/autoRun.py +++ /dev/null @@ -1,3 +0,0 @@ -from bundle import createYesterdayFile - -createYesterdayFile() diff --git a/aosstower/level_b1/daily/avg_database.py b/aosstower/level_b1/daily/avg_database.py new file mode 100644 index 0000000000000000000000000000000000000000..43b78fc15198cf9607d4013df43eff48d1d211d0 --- /dev/null +++ b/aosstower/level_b1/daily/avg_database.py @@ -0,0 +1,838 @@ +# CF Metadata 1.6 compliant structure for NetCDF file. This is essentially +# a pythonified version of the CDL +from collections import OrderedDict +import aosstower.schema as schema +BASIC_STATION = { + + 'globals': { + 'source': 'surface observation', + 'institution': 'UW SSEC', + 'featureType': 'timeSeries', + 'Conventions': 'ARM-1.2 CF-1.6', + 'data_level': 'b1', + 'datastream' : 'aoss.tower.nc-1mo-1d.lb1.v00', + 'software_version': '00' + }, + + 'dimensions': { + 'max_len_station_name': 32, + 'time': None # Unlimited + }, + + 'variables': OrderedDict({ + 'base_time': { + 'string': '', # Filled in by creator + 'standard_name': 'time', + 'long_name': 'base time as unix timestamp', + 'units': 'seconds since 1970-01-01 00:00:00 0:00', + '_type': 'd', + '_shape': tuple(), + }, + 'time_offset': { + 'long_name': 'time offset from base_time', + 'standard_name': 'time', + 'units': '', # Filled in by creator + '_type': 'd', + '_shape': ('time',), + }, + 'time': { + 'long_name': 'time offset from epoch', + 'standard_name': 'time', + 'units': 'seconds since 1970-01-01 00:00:00 0:00', + '_type': 'd', + '_shape': ('time',), + }, + 'station_name': { + 'cf_role': 'timeseries_id', + 'long_name': 'station name', + '_type': 'S1', + '_shape': ('max_len_station_name',), + }, + 'lat': { + 'standard_name': 'latitude', + 'units': 'degrees_north', + 'valid_min': -90, + 'valid_max': 90, + '_type': 'f', + '_shape': tuple(), + }, + 'lon': { + 'standard_name': 'longitude', + 'units': 'degrees_east', + 'valid_min': -180, + 'valid_max': 180, + '_type': 'f', + '_shape': tuple(), + }, + 'alt': { + 'long_name': 'vertical distance', + 'standard_name': 'height', + 'units': 'm', + 'positive': 'up', + 'axis': 'Z', + '_type': 'f', + '_shape': tuple(), + } + }) +} + +AOSS_VARS = OrderedDict({ + 'box_temp_high': { + 'standard_name': 'air_temperature', + 'units': 'degC', + 'description': 'Auxillary Temperature', + '_type': 'f', + '_shape': ('time',), + }, + + 'box_temp_low': { + 'standard_name': 'air_temperature', + 'units': 'degC', + 'description': 'Auxillary Temperature', + '_type': 'f', + '_shape': ('time',), + }, + + 'box_temp_mean': { + 'standard_name': 'air_temperature', + 'units': 'degC', + 'description': 'Auxillary Temperature', + '_type': 'f', + '_shape': ('time',), + }, + + 'box_presure_high': { + 'standard_name': 'air_pressure', + 'description': 'Pressure inside the data logger enclosure', + 'units': 'hpa', + 'valid_min': 850, + 'valid_max': '1100', + '_type': 'f', + '_shape': ('time',), + }, + + 'box_presure_low': { + 'standard_name': 'air_pressure', + 'description': 'Pressure inside the data logger enclosure', + 'units': 'hpa', + 'valid_min': 850, + 'valid_max': '1100', + '_type': 'f', + '_shape': ('time',), + }, + + 'box_presure_mean': { + 'standard_name': 'air_pressure', + 'description': 'Pressure inside the data logger enclosure', + 'units': 'hpa', + 'valid_min': 850, + 'valid_max': '1100', + '_type': 'f', + '_shape': ('time',), + }, + + 'paro_air_temp_period_high': { + 'standard_name': '', + 'description': '', + 'units': '1', + '_type': 'f', + '_shape': ('time',), + }, + + 'paro_air_temp_period_low': { + 'standard_name': '', + 'description': '', + 'units': '1', + '_type': 'f', + '_shape': ('time',), + }, + + 'paro_air_temp_period_mean': { + 'standard_name': '', + 'description': '', + 'units': '1', + '_type': 'f', + '_shape': ('time',), + }, + + 'paro_pressure_period_high': { + 'standard_name': '', + 'description': '', + 'units': '1', + '_type': 'f', + '_shape': ('time',), + }, + + 'paro_pressure_period_low': { + 'standard_name': '', + 'description': '', + 'units': '1', + '_type': 'f', + '_shape': ('time',), + }, + + 'paro_pressure_period_mean': { + 'standard_name': '', + 'description': '', + 'units': '1', + '_type': 'f', + '_shape': ('time',), + }, + + 'paro_air_temp_high': { + 'standard_name': 'air_temperature', + 'description': '', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'paro_air_temp_low': { + 'standard_name': 'air_temperature', + 'description': '', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'paro_air_temp_mean': { + 'standard_name': 'air_temperature', + 'description': '', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'pressure_high': { + 'standard_name': 'air_pressure', + 'description': 'Air pressure as measured from the PAROSCI pressure sensor', + 'units': 'hpa', + 'valid_min': 850, + 'valid_max': 1100, + '_type': 'f', + '_shape': ('time',), + }, + + 'pressure_low': { + 'standard_name': 'air_pressure', + 'description': 'Air pressure as measured from the PAROSCI pressure sensor', + 'units': 'hpa', + 'valid_min': 850, + 'valid_max': 1100, + '_type': 'f', + '_shape': ('time',), + }, + + 'pressure_mean': { + 'standard_name': 'air_pressure', + 'description': 'Air pressure as measured from the PAROSCI pressure sensor', + 'units': 'hpa', + 'valid_min': 850, + 'valid_max': 1100, + '_type': 'f', + '_shape': ('time',), + }, + + 'paro_cal_sig_high': { + 'standard_name': '', + 'description': '', + 'units': '', + '_type': 'f', + '_shape': ('time',), + }, + + 'paro_cal_sig_low': { + 'standard_name': '', + 'description': '', + 'units': '', + '_type': 'f', + '_shape': ('time',), + }, + + 'paro_cal_sig_mean': { + 'standard_name': '', + 'description': '', + 'units': '', + '_type': 'f', + '_shape': ('time',), + }, + + 'box_rh_high': { + 'standard_name': 'relative humidity', + 'description': 'Relative humidity inside the data logger enclosure', + 'units': '%', + 'valid_min': 0, + 'valid_max': 100, + '_type': 'f', + '_shape': ('time',), + }, + + 'box_rh_low': { + 'standard_name': 'relative humidity', + 'description': 'Relative humidity inside the data logger enclosure', + 'units': '%', + 'valid_min': 0, + 'valid_max': 100, + '_type': 'f', + '_shape': ('time',), + }, + + 'box_rh_mean': { + 'standard_name': 'relative humidity', + 'description': 'Relative humidity inside the data logger enclosure', + 'units': '%', + 'valid_min': 0, + 'valid_max': 100, + '_type': 'f', + '_shape': ('time',), + }, + + 'box_air_temp_high': { + 'standard_name': 'air_temperature', + 'description': 'Air temperature inside the data logger enclosure', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'box_air_temp_low': { + 'standard_name': 'air_temperature', + 'description': 'Air temperature inside the data logger enclosure', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'box_air_temp_mean': { + 'standard_name': 'air_temperature', + 'description': 'Air temperature inside the data logger enclosure', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'air_temp_2_high': { + 'standard_name': 'air_temperature', + 'description': 'Auxillary air temperature', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'air_temp_2_low': { + 'standard_name': 'air_temperature', + 'description': 'Auxillary air temperature', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'air_temp_2_mean': { + 'standard_name': 'air_temperature', + 'description': 'Auxillary air temperature', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'air_temp_3_high': { + 'standard_name': 'air_temperature', + 'description': 'Auxillary air temperature', + 'units' : 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'air_temp_3_low': { + 'standard_name': 'air_temperature', + 'description': 'Auxillary air temperature', + 'units' : 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'air_temp_3_mean': { + 'standard_name': 'air_temperature', + 'description': 'Auxillary air temperature', + 'units' : 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'air_temp_4_high': { + 'standard_name': 'air_temperature', + 'description' : 'Auxillary air temperature', + 'units' : 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'air_temp_4_low': { + 'standard_name': 'air_temperature', + 'description' : 'Auxillary air temperature', + 'units' : 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'air_temp_4_mean': { + 'standard_name': 'air_temperature', + 'description' : 'Auxillary air temperature', + 'units' : 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'air_temp_5_high': { + 'standard_name': 'air_temperature', + 'descripiton': 'Auxillary air temperature', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'air_temp_5_low': { + 'standard_name': 'air_temperature', + 'descripiton': 'Auxillary air temperature', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), +}, + + 'air_temp_5_mean': { + 'standard_name': 'air_temperature', + 'descripiton': 'Auxillary air temperature', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'wind_speed_high': { + 'standard_name': 'wind_speed', + 'description': 'wind_speed', + 'units': 'm*s^-1', + 'valid_min': 0, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'wind_speed_low': { + 'standard_name': 'wind_speed', + 'description': 'wind_speed', + 'units': 'm*s^-1', + 'valid_min': 0, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'wind_speed_mean': { + 'standard_name': 'wind_speed', + 'description': 'wind_speed', + 'units': 'm*s^-1', + 'valid_min': 0, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'wind_dir_high': { + 'standard_name': 'wind_direction', + 'description': 'wind_direction', + 'units': 'degrees', + 'valid_min': 0, + 'valid_max': 360, + '_type': 'f', + '_shape': ('time',), + }, + + 'wind_dir_low': { + 'standard_name': 'wind_direction', + 'description': 'wind_direction', + 'units': 'degrees', + 'valid_min': 0, + 'valid_max': 360, + '_type': 'f', + '_shape': ('time',), + }, + + 'wind_dir_mean': { + 'standard_name': 'wind_direction', + 'description': 'wind_direction', + 'units': 'degrees', + 'valid_min': 0, + 'valid_max': 360, + '_type': 'f', + '_shape': ('time',), + }, + + 'rh_shield_freq_high': { + 'standard_name': '', + 'descrption' : '', + 'units': 'hz', + '_type': 'f', + '_shape': ('time',), + }, + + 'rh_shield_freq_low': { + 'standard_name': '', + 'descrption' : '', + 'units': 'hz', + '_type': 'f', + '_shape': ('time',), + }, + + 'rh_shield_freq_mean': { + 'standard_name': '', + 'descrption' : '', + 'units': 'hz', + '_type': 'f', + '_shape': ('time',), + }, + + 'rh_high': { + 'standard_name': 'relative_humidity', + 'descripiton': 'Relative humidity', + 'units': '%', + 'valid_min': 0, + 'valid_max': 100, + '_type': 'f', + '_shape': ('time',), + }, + + 'rh_low': { + 'standard_name': 'relative_humidity', + 'descripiton': 'Relative humidity', + 'units': '%', + 'valid_min': 0, + 'valid_max': 100, + '_type': 'f', + '_shape': ('time',), + }, + + 'rh_mean': { + 'standard_name': 'relative_humidity', + 'descripiton': 'Relative humidity', + 'units': '%', + 'valid_min': 0, + 'valid_max': 100, + '_type': 'f', + '_shape': ('time',), + }, + + 'air_temp_6_3m_high': { + 'standard_name': 'air_temperature', + 'description': 'Air temperature 6.3m from tower base', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'air_temp_6_3m_low': { + 'standard_name': 'air_temperature', + 'description': 'Air temperature 6.3m from tower base', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'air_temp_6_3m_mean': { + 'standard_name': 'air_temperature', + 'description': 'Air temperature 6.3m from tower base', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'dewpoint_high': { + 'standard_name': 'dewpoint_temperature', + 'description': 'Calculated dewpoint temperature', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'dewpoint_low': { + 'standard_name': 'dewpoint_temperature', + 'description': 'Calculated dewpoint temperature', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'dewpoint_mean': { + 'standard_name': 'dewpoint_temperature', + 'description': 'Calculated dewpoint temperature', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'rtd_shield_freq_high': { + 'standard_name': '', + 'description': '', + 'units': '', + '_type': 'f', + '_shape': ('time',), + }, + + 'rtd_shield_freq_low': { + 'standard_name': '', + 'description': '', + 'units': '', + '_type': 'f', + '_shape': ('time',), + }, + + 'rtd_shield_freq_mean': { + 'standard_name': '', + 'description': '', + 'units': '', + '_type': 'f', + '_shape': ('time',), + }, + + 'air_temp_high': { + 'standard_name': 'air_temperature', + 'description': 'Air temperature', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'air_temp_low': { + 'standard_name': 'air_temperature', + 'description': 'Air temperature', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'air_temp_mean': { + 'standard_name': 'air_temperature', + 'description': 'Air temperature', + 'units': 'degC', + 'valid_min': -50, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'solar_flux_high': { + 'standard_name': 'solar_flux', + 'description': 'Solar flux', + 'units': 'w*m^-2', + 'valid_min': 0, + 'valid_max': 3000, + '_type': 'f', + '_shape': ('time',), + }, + + 'solar_flux_low': { + 'standard_name': 'solar_flux', + 'description': 'Solar flux', + 'units': 'w*m^-2', + 'valid_min': 0, + 'valid_max': 3000, + '_type': 'f', + '_shape': ('time',), + }, + + 'solar_flux_mean': { + 'standard_name': 'solar_flux', + 'description': 'Solar flux', + 'units': 'w*m^-2', + 'valid_min': 0, + 'valid_max': 3000, + '_type': 'f', + '_shape': ('time',), + }, + + 'precip_high': { + 'standard_name': '', + 'description': 'Precipitation', + 'units': 'mm', + 'valid_min': 0, + 'valid_max': 254, + '_type': 'f', + '_shape': ('time',), + }, + + 'precip_low': { + 'standard_name': '', + 'description': 'Precipitation', + 'units': 'mm', + 'valid_min': 0, + 'valid_max': 254, + '_type': 'f', + '_shape': ('time',), + }, + + 'precip_mean': { + 'standard_name': '', + 'description': 'Precipitation', + 'units': 'mm', + 'valid_min': 0, + 'valid_max': 254, + '_type': 'f', + '_shape': ('time',), + }, + + 'accum_precip_high': { + 'standard_name': 'axxumulated_precipitation', + 'description': 'Precipitation accumulated since 0Z', + 'units': 'mm', + 'valid_min': 0, + 'valid_max': 254, + '_type': 'f', + '_shape': ('time',), + }, + + 'accum_precip_low': { + 'standard_name': 'axxumulated_precipitation', + 'description': 'Precipitation accumulated since 0Z', + 'units': 'mm', + 'valid_min': 0, + 'valid_max': 254, + '_type': 'f', + '_shape': ('time',), + }, + + 'accum_precip_mean': { + 'standard_name': 'axxumulated_precipitation', + 'description': 'Precipitation accumulated since 0Z', + 'units': 'mm', + 'valid_min': 0, + 'valid_max': 254, + '_type': 'f', + '_shape': ('time',), + }, + + 'altimeter_high': { + 'standard_name': '', + 'description': '', + 'units': 'inHg', + '_type': 'f', + '_shape': ('time',), + }, + + 'altimeter_low': { + 'standard_name': '', + 'description': '', + 'units': 'inHg', + '_type': 'f', + '_shape': ('time',), + }, + + 'altimeter_mean': { + 'standard_name': '', + 'description': '', + 'units': 'inHg', + '_type': 'f', + '_shape': ('time',), + }, + + 'gust_high': + { + 'standard_name': 'wind_speed_of_gust', + 'descripiton': 'Wind gust over the previous 2 minutes', + 'units': 'm/s', + 'valid_min': 0, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'gust_low': + { + 'standard_name': 'wind_speed_of_gust', + 'descripiton': 'Wind gust over the previous 2 minutes', + 'units': 'm/s', + 'valid_min': 0, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + }, + + 'peak_wind': + { + 'standard_name': 'wind_speed', + 'description': 'fastest 5-second wind_speed', + 'units': 'm/s', + 'valid_min': 0, + 'valid_max': 50, + '_type': 'f', + '_shape': ('time',), + } +}) + +def make_Var_db(aoss_vars): + var_dict = {} + for key in aoss_vars: + #there's a typo in some var names + entry = aoss_vars[key] + key = key.replace('presure','pressure') + var_dict[key] = schema.Var( + entry.get('_type','f'), + entry.get('standard_name',''), + key, + entry.get('description',''), + entry.get('units',''), + entry.get('valid_min',''), + entry.get('valid_max',''), + ) + return var_dict +AOSS_VARS = make_Var_db(AOSS_VARS) diff --git a/aosstower/level_b1/daily/bundle.py b/aosstower/level_b1/daily/bundle.py deleted file mode 100644 index 66729cd1f49041f3931d137cfb69cb2d0cc7bd74..0000000000000000000000000000000000000000 --- a/aosstower/level_b1/daily/bundle.py +++ /dev/null @@ -1,139 +0,0 @@ -from nc import getData, createGiantNetCDF, writeDimensions, createVariables, writeVars, minuteAverages -from datetime import datetime as dt -from datetime import timedelta as delta -import pandas as pd -import time -from netCDF4 import Dataset -import os - -def writeBack(frame, filename, cur_dt): - #get start - startString = str(frame.index[0]) - startObj = dt.strptime(startString.split(" ")[0], '%Y-%m-%d') - createDirectory(startObj) - - #get end - endObj = startObj.replace(hour=23, minute=59, second=59, microsecond=59) - - #input file - inputFiles = [] - inputFiles.append(filename) - inputFiles.append(createFilename(cur_dt - delta(days=1))) - - #output filename - outputFilename = 'aoss_tower.' + startString.split(" ")[0] + '.day.nc' - - #create file - createGiantNetCDF(startObj, endObj, inputFiles, outputFilename, True, None) - -def createDirectory(stamp): - if os.path.exists("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m')): - os.chdir("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m')) - - else: - os.makedirs("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m')) - os.chdir("/mnt/inst-data/regen_netcdf/aoss/tower/" + stamp.strftime('%Y/%m')) - - -def createFilename(date): - filename = '/mnt/inst-data/aoss-tower/' - filename += date.strftime('%Y/%m/') - filename += 'rig_tower.' + date.strftime('%Y-%m-%d') + '.ascii' - - return filename - -def createNCFile(frame, filename): - firstStamp = dt.strptime(str(list(frame.index)[0]), '%Y-%m-%d %H:%M:%S') - - createDirectory(firstStamp) - - outputName = 'aoss_tower.' + firstStamp.strftime('%Y-%m-%d') + '.day.nc' - - ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC') - - ncFile = writeDimensions(ncFile) - - ncFile = createVariables(ncFile, firstStamp, [len(list(frame.index))], True) - - ncFile.inputFiles = ', '.join([filename]) - - ncFile = writeVars(ncFile, frame) - - ncFile.close() - -def create_files(start_date, end_date): - # for each file's name based on date - does not rely on what dates are - # in each file so should be similar code for buoy - # get file, discover which full dates excluding the start are in it - # from there we create the netcdf files - # for the start, we always recreate the netcdf using the previous file - # ignore the last unfulfilled date - - cur_dt = start_date - - for day in range((end_date - start_date).days + 1): - filename = createFilename(cur_dt) - print('grabbing ' + filename) - - df = getData([filename]) - - if(df.empty): - cur_dt += delta(days = 1) - continue - - DFList = [] - - for group in df.groupby(df.index.day): - DFList.append(group[1]) - - - if(len(DFList) <= 1): - frame = DFList[0] - - frame = minuteAverages(frame) - - createNCFile(frame, filename) - - elif(len(DFList) == 2): - if(len(list(DFList[0].index)) > len(list(DFList[1].index))): - frame = DFList[0] - - frame = minuteAverages(frame) - - createNCFile(frame, filename) - - #get second frame - frame = DFList[1] - - writeBack(frame, filename, cur_dt) - - else: - frame = DFList[1] - - frame = minuteAverages(frame) - - createNCFile(frame, filename) - - #get second frame - frame = DFList[0] - - writeBack(frame, filename, cur_dt) - - else: - print(len(DFList)) - - for frame in DFList: - print(len(list(frame.index))) - print(list(frame.index)[0]) - exit(0) - - - - #increment day - cur_dt += delta(days = 1) - -def createYesterdayFile(): - create_files(dt.today() - delta(days=1), dt.today() - delta(days=1)) - -#createYesterdayFile() -create_files(dt(2009,5,2), dt(2009,5,2))#dt.today() - delta(days=1)) diff --git a/aosstower/level_b1/daily/bundle.pyc b/aosstower/level_b1/daily/bundle.pyc deleted file mode 100644 index 99b5245907dca7496a03515d81c8f008c7504070..0000000000000000000000000000000000000000 Binary files a/aosstower/level_b1/daily/bundle.pyc and /dev/null differ diff --git a/aosstower/level_b1/daily/foo.txt b/aosstower/level_b1/daily/foo.txt new file mode 100644 index 0000000000000000000000000000000000000000..c2bf21f25a1811e6a816954575fa1beab89a6ee3 --- /dev/null +++ b/aosstower/level_b1/daily/foo.txt @@ -0,0 +1,163 @@ +time +lon +station_name +lat +base_time +time_offset +alt +rh_shield_freq_mean +qc_rh_shield_freq_mean +paro_cal_sig_low +qc_paro_cal_sig_low +air_temp_4_low +qc_air_temp_4_low +paro_air_temp_period_low +qc_paro_air_temp_period_low +air_temp_3_high +qc_air_temp_3_high +air_temp_2_low +qc_air_temp_2_low +pressure_mean +qc_pressure_mean +rh_mean +qc_rh_mean +rtd_shield_freq_mean +qc_rtd_shield_freq_mean +box_rh_high +qc_box_rh_high +box_air_temp_high +qc_box_air_temp_high +paro_air_temp_high +qc_paro_air_temp_high +air_temp_5_mean +qc_air_temp_5_mean +box_air_temp_low +qc_box_air_temp_low +precip_mean +qc_precip_mean +rh_shield_freq_high +qc_rh_shield_freq_high +rh_low +qc_rh_low +box_presure_high +qc_box_presure_high +paro_cal_sig_mean +qc_paro_cal_sig_mean +dewpoint_mean +qc_dewpoint_mean +paro_cal_sig_high +qc_paro_cal_sig_high +dewpoint_high +qc_dewpoint_high +paro_air_temp_period_mean +qc_paro_air_temp_period_mean +box_temp_low +qc_box_temp_low +altimeter_low +qc_altimeter_low +precip_low +qc_precip_low +rh_shield_freq_low +qc_rh_shield_freq_low +paro_air_temp_period_high +qc_paro_air_temp_period_high +wind_speed_low +qc_wind_speed_low +paro_pressure_period_low +qc_paro_pressure_period_low +paro_air_temp_low +qc_paro_air_temp_low +box_rh_low +qc_box_rh_low +air_temp_4_mean +qc_air_temp_4_mean +air_temp_6_3m_high +qc_air_temp_6_3m_high +gust_high +qc_gust_high +air_temp_5_low +qc_air_temp_5_low +wind_dir_high +qc_wind_dir_high +box_presure_low +qc_box_presure_low +pressure_low +qc_pressure_low +rh_high +qc_rh_high +rtd_shield_freq_low +qc_rtd_shield_freq_low +wind_speed_mean +qc_wind_speed_mean +air_temp_high +qc_air_temp_high +precip_high +qc_precip_high +accum_precip_high +qc_accum_precip_high +solar_flux_high +qc_solar_flux_high +paro_pressure_period_mean +qc_paro_pressure_period_mean +air_temp_4_high +qc_air_temp_4_high +peak_wind +qc_peak_wind +gust_low +qc_gust_low +rtd_shield_freq_high +qc_rtd_shield_freq_high +wind_dir_mean +qc_wind_dir_mean +box_temp_high +qc_box_temp_high +accum_precip_low +qc_accum_precip_low +pressure_high +qc_pressure_high +air_temp_6_3m_low +qc_air_temp_6_3m_low +wind_dir_low +qc_wind_dir_low +air_temp_3_low +qc_air_temp_3_low +air_temp_mean +qc_air_temp_mean +altimeter_mean +qc_altimeter_mean +air_temp_2_high +qc_air_temp_2_high +altimeter_high +qc_altimeter_high +box_presure_mean +qc_box_presure_mean +air_temp_low +qc_air_temp_low +box_air_temp_mean +qc_box_air_temp_mean +box_temp_mean +qc_box_temp_mean +paro_air_temp_mean +qc_paro_air_temp_mean +solar_flux_low +qc_solar_flux_low +wind_speed_high +qc_wind_speed_high +air_temp_2_mean +qc_air_temp_2_mean +accum_precip_mean +qc_accum_precip_mean +dewpoint_low +qc_dewpoint_low +box_rh_mean +qc_box_rh_mean +air_temp_3_mean +qc_air_temp_3_mean +solar_flux_mean +qc_solar_flux_mean +paro_pressure_period_high +qc_paro_pressure_period_high +air_temp_6_3m_mean +qc_air_temp_6_3m_mean +air_temp_5_high +qc_air_temp_5_high diff --git a/aosstower/level_b1/daily/nc.py b/aosstower/level_b1/daily/nc.py index 7000340a7cf4f6bd92db4b49dbc6eead3a47518a..f4a1d41b0b0e94e0667ca62741005110bc816ac9 100644 --- a/aosstower/level_b1/daily/nc.py +++ b/aosstower/level_b1/daily/nc.py @@ -4,6 +4,7 @@ import logging import pandas as pd from datetime import datetime as dt from aosstower.l00 import parser +import avg_database from netCDF4 import Dataset import numpy as np import platform @@ -43,7 +44,7 @@ def writeDimensions(ncFile): return ncFile -def createVariables(ncFile, firstStamp, chunksizes, zlib): +def createVariables(ncFile, firstStamp, chunksizes, zlib, database=parser.database): #base_time long name btln = 'base time as unix timestamp' @@ -115,11 +116,11 @@ def createVariables(ncFile, firstStamp, chunksizes, zlib): if 'time' in key: variable.calendar = 'gregorian' - for entry in parser.database: + for entry in database: if(entry == 'stamp'): continue - varTup = parser.database[entry] + varTup = database[entry] variable = ncFile.createVariable(entry, np.float32, dimensions=('time'), fill_value=float(-99999), zlib=zlib, chunksizes=chunksizes) @@ -265,6 +266,24 @@ def minuteAverages(frame): return newFrame.fillna(-99999) +def averageOverInterval(frame,interval_width): + """takes a frame and an interval to average it over, and returns a minimum, + maximum, and average dataframe for that interval""" + ts = frame.index + #round each timestamp to the nearest n minutes + frame['interval'] = (ts.astype(int)-ts.astype(int)%(interval_width*60e9)).astype('datetime64[ns]') + outFrames = {} + outFrames['low'] = frame.groupby('interval').min() + outFrames['high'] = frame.groupby('interval').max() + outFrames['mean'] = frame.groupby('interval').mean() + del frame['interval'] + for key in outFrames: + #append the appropriate suffix to each column + columns = outFrames[key].columns + outFrames[key].columns = ['_'.join([col,key]) for col in columns] + outFrames = pd.concat(outFrames.values(),axis=1) + return outFrames + def getData(inputFiles): dictData = {} @@ -282,9 +301,8 @@ def getData(inputFiles): return pd.DataFrame(dictData).transpose().replace(-99999, np.nan) -def writeVars(ncFile, frame): +def writeVars(ncFile, frame, database=parser.database): stamps = list(frame.index) - baseDTObj = dt.strptime(str(stamps[0]).split(' ')[0], '%Y-%m-%d') #find out how much time elapsed @@ -308,7 +326,6 @@ def writeVars(ncFile, frame): counter += 1 fileVar = ncFile.variables - fileVar['base_time'].assignValue(baseTimeValue) fileVar['time_offset'][:] = timeNumpy fileVar['time'][:] = timeNumpy + baseTimeValue @@ -331,13 +348,16 @@ def writeVars(ncFile, frame): #writes data into file for varName in frame: + if varName not in fileVar: + logging.warn('Extraneous key: %s in frame'%varName) + continue dataList = frame[varName].tolist() dataArray = np.asarray(dataList) fileVar[varName][:] = dataArray - valid_min = parser.database[varName][5] - valid_max = parser.database[varName][6] + valid_min = database[varName][5] + valid_max = database[varName][6] fileVar['qc_' + varName][:] = filterArray(dataArray, valid_min, valid_max) @@ -366,7 +386,8 @@ def writeVars(ncFile, frame): # @param input filenames - list of filenames # @param output filename - filename of the netcdf file -def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize): +def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize, + interval_width = None, database=parser.database): default = False if(chunkSize): @@ -383,7 +404,9 @@ def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize): else: frame = minuteAverages(frame) - + if interval_width: + frame = averageOverInterval(frame,interval_width) + if(start and end): frame = frame[start.strftime('%Y-%m-%d %H:%M:%S'): end.strftime('%Y-%m-%d %H:%M:%S')] @@ -396,11 +419,11 @@ def createGiantNetCDF(start, end, inputFiles, outputName, zlib, chunkSize): ncFile = writeDimensions(ncFile) - ncFile = createVariables(ncFile, firstStamp, chunksizes, zlib) + ncFile = createVariables(ncFile, firstStamp, chunksizes, zlib,database) ncFile.inputFiles = ', '.join(inputFiles) - ncFile = writeVars(ncFile, frame) + ncFile = writeVars(ncFile, frame,database) ncFile.close() @@ -437,47 +460,53 @@ def _dt_convert(datetime_str): return dt.strptime(datetime_str, '%Y-%m-%dT%H:%M:%S') except: return dt.strptime(datetime_str, '%Y-%m-%d') - + def main(): import argparse #argparse description - parser = argparse.ArgumentParser(description="Convert level_00 aoss tower data to level_a0") + argparser = argparse.ArgumentParser(description="Convert level_00 aoss tower data to level_a0") #argparse verbosity info - parser.add_argument('-v', '--verbose', action="count", default=int(os.environ.get("VERBOSITY", 2)), + argparser.add_argument('-v', '--verbose', action="count", default=int(os.environ.get("VERBOSITY", 2)), dest='verbosity', help='each occurrence increases verbosity 1 level through ERROR-WARNING-INFO-DEBUG (default INFO)') #argparse start and end times - parser.add_argument('-s', '--start-time', type=_dt_convert, + argparser.add_argument('-s', '--start-time', type=_dt_convert, help="Start time of massive netcdf file, if only -s is given, a netcdf file for only that day is given" + ". Formats allowed: \'YYYY-MM-DDTHH:MM:SS\', \'YYYY-MM-DD\'") - parser.add_argument('-e', '--end-time', type=_dt_convert, help='End time of massive netcdf file. Formats allowed:' + + argparser.add_argument('-e', '--end-time', type=_dt_convert, help='End time of massive netcdf file. Formats allowed:' + "\'YYYY-MM-DDTHH:MM:SS\', \'YYYY-MM-DD\'") - parser.add_argument('-cs', '--chunk-size', type=int, help='chunk Size for the netCDF file') - parser.add_argument('-z', '--zlib', action='store_true', help='compress netCDF file with zlib') + argparser.add_argument('-i', '--interval', type=float, + help='Width of the interval to average input data over in minutes.'+ + " If not specified, 1 is assumed. (Use 60 for one hour and 1440 for 1 day)") + argparser.add_argument('-cs', '--chunk-size', type=int, help='chunk Size for the netCDF file') + argparser.add_argument('-z', '--zlib', action='store_true', help='compress netCDF file with zlib') - parser.add_argument("input_files", nargs="+", + argparser.add_argument("input_files", nargs="+", help="aoss_tower level_00 paths") - parser.add_argument('-o', '--output', required=True, nargs="+", help="filename pattern or filename. " + + argparser.add_argument('-o', '--output', required=True, nargs="+", help="filename pattern or filename. " + "Should be along the lines of <filepath>/aoss_tower.YYYY-MM-DD.nc") - args = parser.parse_args() + args = argparser.parse_args() levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG] level=levels[min(3, args.verbosity)] logging.basicConfig(level=level) + database = avg_database.AOSS_VARS if args.interval else parser.database if(args.start_time and args.end_time): - result = createGiantNetCDF(args.start_time, args.end_time, args.input_files, args.output[0], args.zlib, args.chunk_size) + result = createGiantNetCDF(args.start_time, args.end_time, args.input_files, args.output[0], args.zlib, args.chunk_size, + args.interval, database) if(result == False): raise IOError('An empty ASCII file was found') elif(args.start_time): end_time = args.start_time.replace(hour=23, minute=59, second=59) - result = createGiantNetCDF(args.start_time, end_time, args.input_files, args.output[0], args.zlib, args.chunk_size) + result = createGiantNetCDF(args.start_time, end_time, args.input_files, args.output[0], args.zlib, args.chunk_size, + args.interval, database) if(result == False): raise IOError('An empty ASCII file was found') @@ -486,5 +515,6 @@ def main(): else: createMultiple(args.input_files, args.output, args.zlib, args.chunk_size) + if __name__ == "__main__": main() diff --git a/aosstower/level_b1/daily/nc.pyc b/aosstower/level_b1/daily/nc.pyc deleted file mode 100644 index 55c6b53922aadd2cd6e5fd6b266f2e5839dbdea0..0000000000000000000000000000000000000000 Binary files a/aosstower/level_b1/daily/nc.pyc and /dev/null differ diff --git a/aosstower/level_b1/monthly/nc_monthly.py b/aosstower/level_b1/monthly/nc_monthly.py index 8fd9c8c43e824094b94076f18fc93d6a95f837dd..b94edf2ed3dc90bc231e2ead1ef7985a64ec20b1 100644 --- a/aosstower/level_b1/monthly/nc_monthly.py +++ b/aosstower/level_b1/monthly/nc_monthly.py @@ -1,7 +1,12 @@ import platform from netCDF4 import Dataset +from aosstower.l00 import parser +import calendar +import logging +from aosstower.level_b1.daily import nc from datetime import datetime as dt from collections import OrderedDict +import glob import os # CF Metadata 1.6 compliant structure for NetCDF file. This is essentially @@ -824,11 +829,58 @@ AOSS_VARS = OrderedDict({ }) def _dt_convert(date): - return None + return dt.strptime(date, '%Y-%m-%d') + +def files_fr_directories(directories,file_ending='.ascii'): + """gets a list of all raw data files in the given directories""" + files = [] + for directory in directories: + files += glob.glob(os.path.join(directory,'*%s'%file_ending)) + return files def createGiantNetCDF(start, end, directories, output, zlib, chunk_size): + default = False + + if(chunk_size): + chunksizes = [chunk_size] + + else: + default = True + inputFiles = files_fr_directories(directories) + return False + frame = getData(inputFiles) + + if(frame.empty): + return False + + else: + + frame = minuteAverages(frame) + + if(start and end): + frame = frame[start.strftime('%Y-%m-%d %H:%M:%S'): end.strftime('%Y-%m-%d %H:%M:%S')] + + if(default): + chunksizes = [len(list(frame.index))] + + firstStamp = dt.strptime(str(list(frame.index)[0]), '%Y-%m-%d %H:%M:%S') + + ncFile = Dataset(outputName, 'w', format='NETCDF4_CLASSIC') + + ncFile = writeDimensions(ncFile) + + ncFile = createVariables(ncFile, firstStamp, chunksizes, zlib) + + ncFile.inputFiles = ', '.join(inputFiles) + + ncFile = writeVars(ncFile, frame) + + ncFile.close() + + return True + def writeDimensions(ncFile): for name, size in BASIC_STATION['dimensions'].items(): ncFile.createDimension(name, size) @@ -920,12 +972,13 @@ def main(): if(args.start_time and args.end_time): - result = createGiantNetCDF(args.start_time, args.end_time, args.input_files, args.output[0], args.zlib, args.chunk_size) + result = createGiantNetCDF(args.start_time, args.end_time, args.input_directories, args.output[0], args.zlib, args.chunk_size) if(result == False): raise IOError('An empty ASCII file was found') elif(args.start_time): - end_time = args.start_time.replace(hour=23, minute=59, second=59) + last_day = calendar.monthrange(args.start_time.year, args.start_time.month)[1] + end_time = args.start_time.replace(day = last_day, hour=23, minute=59, second=59) result = createGiantNetCDF(args.start_time, end_time, args.input_directories, args.output[0], args.zlib, args.chunk_size) if(result == False): raise IOError('An empty ASCII file was found') @@ -937,9 +990,4 @@ def main(): createMultiple(args.input_files, args.output, args.zlib, args.chunk_size) if __name__ == "__main__": - # main() - ncFile = Dataset('aoss_tower.2013-06.SUMMARY.nc', 'w', format='NETCDF4_CLASSIC') - ncFile = writeDimensions(ncFile) - ncFile = createVariables(ncFile, dt(2003, 6, 1), [30], True) - - ncFile.close() + main()