"""Read data from the database.""" from dataclasses import make_dataclass import datetime from urllib.request import urlopen import asccol from pyld import jsonld from . import data_spec from .records import get_link Measurement = make_dataclass('Measurement', ['slug', 'field', 'title', 'units']) measurements = {m.slug: m for m in ( Measurement('temperature', 1, 'temperature', '\u00b0C'), Measurement('pressure', 2, 'pressure', 'hPa'), Measurement('wind-speed', 3, 'wind speed', 'm/s'), )} # Special JSON property names/values ACCESS_URL = 'http://www.w3.org/ns/dcat#accessURL' DISTRIBUTION = 'http://www.w3.org/ns/dcat#Distribution' def get_resources(link): """Fetch the download links for a dataset.""" doc = jsonld.flatten(link + '.jsonld') for i in doc: types = i.get('@type', []) if DISTRIBUTION not in types: continue urls = i.get(ACCESS_URL, []) for j in urls: url = j.get('@id') if url is None: continue # If 10-minute, 1-hour, and 3-hour data are available, only # use the 1-hour. if '10min' in url or '3hr' in url: continue yield url def read_data(station, year): """Fetch data and convert it to a NumPy array.""" import numpy as np spec = data_spec.ONE_HOUR data = [] resource_list = get_resources(get_link(station, year)) for url in resource_list: with urlopen(url) as f: lines = map(bytes.decode, f) for row in asccol.parse_data(lines, spec): date = datetime.datetime(row.year, row.month, row.day, *row.time) data.append([ date, row.temp, row.pressure, row.wind_speed, ]) data = np.array(data) # Sort by date, since monthly URLs will be out of order. data = data[data[:, 0].argsort()] return data