Commit 0aef0649 authored by Bruce Flynn's avatar Bruce Flynn

netcdf futzery

parent 2725c7c0
......@@ -11,7 +11,7 @@ RUN apt-get update && apt-get -y install \
# install Miniconda3
WORKDIR /tmp
RUN curl -o installer.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
RUN curl -so installer.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash installer.sh -b -p /miniconda && \
rm installer.sh
......
import logging
from datetime import datetime
from collections import defaultdict
from datetime import timedelta
from contextlib import contextmanager
from datetime import datetime
from datetime import timedelta
import numpy as np
from metobs.data.calc import wind_vector_degrees
from psycopg2.extensions import adapt as sqlescape
from sqlalchemy import (
create_engine,
sql,
......@@ -13,20 +15,15 @@ from sqlalchemy import (
Column,
String,
Text,
Integer,
Float,
DateTime,
)
from sqlalchemy.sql import compiler
from sqlalchemy.pool import NullPool
from sqlalchemy.exc import IntegrityError
from psycopg2.extensions import adapt as sqlescape
from metobs.data.calc import wind_vector_degrees
from sqlalchemy.sql import compiler
from amrc_aws.util import NaN
from amrc_aws import config
from amrc_aws.station_aliases import stations
from amrc_aws.util import NaN
LOG = logging.getLogger(__name__)
......@@ -93,7 +90,7 @@ def get_stations(start=None, end=None):
end = end or datetime.utcnow()
with connection() as conn:
query = sql.text('''
SELECT distinct(station) FROM station_dates
SELECT DISTINCT(station) FROM station_dates
WHERE stamp >= :start AND stamp <= :end
ORDER BY station
''')
......@@ -258,7 +255,7 @@ def create_schema():
"""
Create database schema. Must have already called init.
"""
data_t = Table(
Table(
'data', metadata,
Column('stamp', DateTime, primary_key=True),
Column('station', String, primary_key=True),
......@@ -267,10 +264,10 @@ def create_schema():
Column('rh', Float),
Column('pressure', Float),
Column('_wind_vector_north', Float),
Column('_wind_vector_east', Float))
data_t.create()
Column('_wind_vector_east', Float)
).create()
data_t = Table(
Table(
'data_q1h', metadata,
Column('stamp', DateTime, primary_key=True),
Column('station', String, primary_key=True),
......@@ -279,14 +276,23 @@ def create_schema():
Column('rh', Float),
Column('pressure', Float),
Column('_wind_vector_north', Float),
Column('_wind_vector_east', Float))
data_t.create()
Column('_wind_vector_east', Float)
).create()
station_dates_t = Table(
Table(
'station_dates', metadata,
Column('station', Text, primary_key=True),
Column('stamp', DateTime, primary_key=True))
station_dates_t.create()
Column('stamp', DateTime, primary_key=True)
).create()
Table(
'station_position', metadata,
Column('station', Text, primary_key=True),
Column('stamp', DateTime, primary_key=True),
Column('lat', Float),
Column('lon', Float),
Column('alt', Float),
).create()
def _compile_query(stmt):
......
"""
Implements CF Timeseries dataset according to:
H.2.1. Orthogonal multidimensional array representation of time series
http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/build/cf-conventions.html#idp9763584
One deviation is that this file currently does not have any lat, lon, alt data.
"""
import logging
import numpy as np
import netCDF4
import numpy as np
from .util import unixtime
LOG = logging.getLogger(__name__)
NAME_STRLEN = 48
schema = {
......@@ -21,7 +29,7 @@ schema = {
'time': None,
},
'variables': {
'station': {
'station_name': {
'dims': ('station',),
'type': str,
'attrs': {
......@@ -29,14 +37,39 @@ schema = {
'cf_role': 'timeseries_id'
},
},
# 'lat': {
# 'dims': ('station',),
# 'type': np.float32,
# 'attrs': {
# 'standard_name': 'latitude',
# 'units': 'degrees_north'
# }
# },
# 'lon': {
# 'dims': ('station',),
# 'type': np.float32,
# 'attrs': {
# 'standard_name': 'longitude',
# 'units': 'degrees_east'
# }
# },
# 'alt': {
# 'dims': ('station',),
# 'type': np.float32,
# 'attrs': {
# 'standard_name': 'height',
# 'units': 'm',
# 'positive': 'up',
# }
# },
'time': {
'dims': ('time',),
'type': np.int64,
'fill': -999,
'type': np.uint32,
'attrs': {
'long_name': 'time of measurement',
'standard_name': 'time',
'units': 'seconds since 1970-01-01T00:00:00Z',
'calendar': 'standard',
},
},
}
......@@ -51,7 +84,7 @@ variables = {
'long_name': 'air temperature',
'standard_name': 'air_temperature',
'units': 'degC',
'coordinates': 'station, time'
# 'coordinates': 'lat lon'
},
},
'pressure': {
......@@ -62,7 +95,7 @@ variables = {
'long_name': 'air pressure',
'standard_name': 'air_pressure',
'units': 'hPa',
'coordinates': 'time'
# 'coordinates': 'lat lon'
},
},
'wind_spd': {
......@@ -73,7 +106,7 @@ variables = {
'long_name': 'wind speed',
'standard_name': 'wind_speed',
'units': 'm/s',
'coordinates': 'time'
# 'coordinates': 'lat lon'
},
},
'wind_dir': {
......@@ -84,7 +117,7 @@ variables = {
'long_name': 'wind direction',
'standard_name': 'wind_from_direction',
'units': 'degrees',
'coordinates': 'time'
# 'coordinates': 'lat lon'
},
},
'rh': {
......@@ -95,7 +128,7 @@ variables = {
'long_name': 'relative humidity',
'standard_name': 'relative_humidity',
'units': '%',
'coordinates': 'time'
# 'coordinates': 'lat lon',
},
},
'vtempdiff': {
......@@ -104,7 +137,8 @@ variables = {
'fill': -999.0,
'attrs': {
'long_name': 'vertical temperature differential',
'coordinates': 'time'
'units': '1',
# 'coordinates': 'lat lon',
},
},
}
......@@ -115,34 +149,48 @@ def _fill_dataset(stations, symbols, data, nc):
# data[0:data.shape[0]:2,2]
for symidx, symbol in enumerate(symbols):
# prefill with -999.9
arr = np.ones((len(stations), data.shape[0]/len(stations))) * -999.0
arr = np.ones((len(stations), data.shape[0] / len(stations))) * -999.0
for staidx, station in enumerate(stations):
# to get the rows for a particular station
# staidx:data.shape[0]:num_stations
# to skip over the stamp colum
# symidx+1
arr[staidx,:] = data[staidx:data.shape[0]:num_stations,symidx+1].astype(float)
arr[staidx, :] = data[staidx:data.shape[0]:num_stations, symidx + 1].astype(float)
var = nc.variables[symbol]
# set to fill where currently NaN
arr[np.where(arr != arr)] = var._FillValue
var[:] = arr
# set station names
var = nc.variables['station']
var = nc.variables['station_name']
var[:] = np.array(stations)
# set times
var = nc.variables['time']
var[:] = [unixtime(d) for d in data[0:data.shape[0]:2,0]]
var[:] = np.array([unixtime(d) for d in data[0:data.shape[0]:2, 0]], dtype=np.uint32)
# nc.variables['lat'][:] = [43.0, 43.0]
# nc.variables['lon'][:] = [-89.0, -89.0]
# nc.variables['alt'][:] = [0, 0]
def write_slice_to_netcdf(stations, symbols, data, dest, attrs=None):
attrs = attrs or {}
"""
Write data slice as returned by `.db.get_*_slice` to a NetCDF file.
:param stations: List of station names
:param symbols: List of symbols. See `.app.symbol_names`
:param data: See `.db.get_*_slice`
:param dest: Filepath to write NetCDF file to
:param attrs:
dict or list of key/values of attributes to write as NetCDF global attrs.
"""
attrs = attrs or []
with netCDF4.Dataset(dest, mode='w') as nc:
# global attrs
for name, value in schema['globals'].items():
setattr(nc, name, value)
for name, value in attrs.items():
for name, value in dict(attrs).items():
setattr(nc, name, value)
# dimensions
......@@ -152,14 +200,14 @@ def write_slice_to_netcdf(stations, symbols, data, dest, attrs=None):
elif name == 'station':
nc.createDimension(name, len(stations))
elif name == 'time':
nc.createDimension(name, data.shape[0]/len(stations))
nc.createDimension(name, data.shape[0] / len(stations))
else:
nc.createDimension(name)
# schema variables
for name, dat in schema['variables'].items():
var = nc.createVariable(
name, dat['type'], dat['dims'],
fill_value=dat.get('fill'), zlib=True)
name, dat['type'], dat['dims'],
fill_value=dat.get('fill'), zlib=True)
for aname, value in dat['attrs'].items():
setattr(var, aname, value)
# data variables
......@@ -167,9 +215,9 @@ def write_slice_to_netcdf(stations, symbols, data, dest, attrs=None):
if name not in symbols:
continue
var = nc.createVariable(
name, dat['type'], dat['dims'],
fill_value=dat['fill'], zlib=True)
for name, value in dat['attrs'].items():
setattr(var, name, value)
name, dat['type'], dat['dims'],
fill_value=dat['fill'], zlib=True)
for aname, value in dat['attrs'].items():
setattr(var, aname, value)
_fill_dataset(stations, symbols, data, nc)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment