"""Read data from the database.""" from dataclasses import make_dataclass import datetime from urllib.request import urlopen import asccol import numpy as np from pyld import jsonld from . import data_spec from .records import get_link Measurement = make_dataclass('Measurement', ['url_name', 'field', 'title']) measurements = {m.url_name: m for m in ( Measurement('temperature', 1, 'Temperature (C)'), Measurement('pressure', 2, 'Pressure (hPa)'), Measurement('wind-speed', 3, 'Wind Speed (m/s)'), )} # Special JSON property names/values ACCESS_URL = 'http://www.w3.org/ns/dcat#accessURL' DISTRIBUTION = 'http://www.w3.org/ns/dcat#Distribution' def get_resources(link): """Fetch the download links for a dataset.""" doc = jsonld.flatten(link + '.jsonld') for i in doc: types = i.get('@type', []) if DISTRIBUTION not in types: continue urls = i.get(ACCESS_URL, []) for j in urls: url = j.get('@id') if url is None: continue # If 10-minute, 1-hour, and 3-hour data are available, only # use the 1-hour. if '10min' in url or '3hr' in url: continue yield url def read_data(station, year): """Fetch data and convert it to a NumPy array.""" spec = (data_spec.SOUTH_POLE if station['id'] == 'south-pole' else data_spec.ONE_HOUR) data = [] resource_list = get_resources(get_link(station, year)) for url in resource_list: with urlopen(url) as f: lines = map(bytes.decode, f) for row in asccol.parse_data(lines, spec): date = datetime.datetime(row.year, row.month, row.day, *row.time) data.append([ date, row.temp, row.pressure, row.wind_speed, ]) data = np.array(data) # Sort by date, since monthly URLs will be out of order. data = data[data[:, 0].argsort()] return data