"""Read data from the database."""

from dataclasses import make_dataclass
import datetime
from urllib.request import urlopen

import asccol
from pyld import jsonld

from . import data_spec
from .records import get_link

Measurement = make_dataclass('Measurement',
                             ['slug', 'field', 'title', 'units'])
measurements = {m.slug: m for m in (
    Measurement('temperature', 1, 'temperature', '\u00b0C'),
    Measurement('pressure', 2, 'pressure', 'hPa'),
    Measurement('wind-speed', 3, 'wind speed', 'm/s'),
)}
# Special JSON property names/values
ACCESS_URL = 'http://www.w3.org/ns/dcat#accessURL'
DISTRIBUTION = 'http://www.w3.org/ns/dcat#Distribution'


def get_resources(link):
    """Fetch the download links for a dataset."""
    doc = jsonld.flatten(link + '.jsonld')
    for i in doc:
        types = i.get('@type', [])
        if DISTRIBUTION not in types:
            continue
        urls = i.get(ACCESS_URL, [])
        for j in urls:
            url = j.get('@id')
            if url is None:
                continue
            # If 10-minute, 1-hour, and 3-hour data are available, only
            # use the 1-hour.
            if '10min' in url or '3hr' in url:
                continue
            yield url


def read_data(station, year):
    """Fetch data and convert it to a NumPy array."""
    import numpy as np
    spec = (data_spec.SOUTH_POLE if station['id'] == 'south-pole'
            else data_spec.ONE_HOUR)
    data = []
    resource_list = get_resources(get_link(station, year))
    for url in resource_list:
        with urlopen(url) as f:
            lines = map(bytes.decode, f)
            for row in asccol.parse_data(lines, spec):
                date = datetime.datetime(row.year, row.month, row.day,
                                         *row.time)
                data.append([
                    date,
                    row.temp,
                    row.pressure,
                    row.wind_speed,
                ])
    data = np.array(data)
    # Sort by date, since monthly URLs will be out of order.
    data = data[data[:, 0].argsort()]
    return data