"""Read data from the database."""
from dataclasses import make_dataclass
import datetime
from urllib.request import urlopen
import asccol
from pyld import jsonld
from . import data_spec
from .records import get_link
Measurement = make_dataclass('Measurement',
['slug', 'field', 'title', 'units'])
measurements = {m.slug: m for m in (
Measurement('temperature', 1, 'temperature', '\u00b0C'),
Measurement('pressure', 2, 'pressure', 'hPa'),
Measurement('wind-speed', 3, 'wind speed', 'm/s'),
)}
# Special JSON property names/values
ACCESS_URL = 'http://www.w3.org/ns/dcat#accessURL'
DISTRIBUTION = 'http://www.w3.org/ns/dcat#Distribution'
def get_resources(link):
"""Fetch the download links for a dataset."""
doc = jsonld.flatten(link + '.jsonld')
for i in doc:
types = i.get('@type', [])
if DISTRIBUTION not in types:
continue
urls = i.get(ACCESS_URL, [])
for j in urls:
url = j.get('@id')
if url is None:
continue
# If 10-minute, 1-hour, and 3-hour data are available, only
# use the 1-hour.
if '10min' in url or '3hr' in url:
continue
yield url
def read_data(station, year):
"""Fetch data and convert it to a NumPy array."""
import numpy as np
spec = (data_spec.SOUTH_POLE if station['id'] == 'south-pole'
else data_spec.ONE_HOUR)
data = []
resource_list = get_resources(get_link(station, year))
for url in resource_list:
with urlopen(url) as f:
lines = map(bytes.decode, f)
for row in asccol.parse_data(lines, spec):
date = datetime.datetime(row.year, row.month, row.day,
*row.time)
data.append([
date,
row.temp,
row.pressure,
row.wind_speed,
])
data = np.array(data)
# Sort by date, since monthly URLs will be out of order.
data = data[data[:, 0].argsort()]
return data