"""
Data Versions
=============
There have so far been 3 changes to the format of the raw ASCII data over the
years.

Version 0
---------
Effective from incept to 2010-06-01T00:27:51Z.

The original data format was a key value, space separated data format
``<key> <value>``. There were a total of 16 data values including `TIME`:
`TIME, ACCURAIN, TEMP107_4, LI200X, TEMP107_1, RH41372, TEMP107_5, CS105,
PAROSCI, WSPD05305, TEMP107_3, CS10162, RAIN380M, TEMP107_2, TEMP41372,
WDIR05305`.

Version 1
---------
Effective 2010-06-01T00:27:51Z to 2012-12-03T17:34:17Z.

A CSV format file with a total of 28 values: station_id, year, doy, hhmm, sec,
box_pressure, paro_air_temp_period, paro_pressure_period, paro_air_temp,
pressure, paro_cal_sig, box_rh, box_air_temp, air_temp_2, air_temp_3,
air_temp_4, wind_speed, wind_dir, rh_shield_freq, rh, air_temp_6_3m, dewpoint,
rtd_shield_freq, air_temp, solar_flux, precip, accum_precip, altimeter.

Version 2
---------
Effective 2012-12-03T17:34:17Z to present.

Same as Version 1 with the addition of altimeter2 at the end. I'm not sure why
we have 2 altimeter values but as far as I know altimeter2 is not used.
"""

import re
import logging
from datetime import datetime, timedelta

from metobs import data as d
from aosstower.schema import database
from aosstower.frame import Frame

LOG = logging.getLogger(__name__)


class LineParseError(Exception):
    """Error parsing line of frame data.
    """
    @classmethod
    def raise_wrapped(cls, exception, msg=None):
        import sys
        traceback = sys.exc_info()[2]
        msg = msg or str(exception)
        raise cls(msg), None, traceback


def _make_frame(data):
    """Construct a frame from a list of tuples.
    """
    frame = Frame()
    for key, value in data:
        if key == 'stamp':
            continue
        if key in database:
            try:
                data[key] = database[key].type(data[key])
            except (ValueError, TypeError) as err:
                raise LineParseError("error converting '%s' using %s",
                                     data[key], database[key].type)
    return data


class ParserV0(object):
    """Parses Version 0 data lines.
    """

    # maps v0 names to names in schema db
    names = {'ACCURAIN': 'accum_precip',
             'TEMP107_1': 'box_air_temp',
             'TEMP107_2': 'air_temp_2',
             'TEMP107_3': 'air_temp_3',
             'TEMP107_4': 'air_temp_4',
             'TEMP107_5': 'air_temp_5',
             'LI200X': 'solar_flux',
             'RH41372': 'rh',
             'TEMP41372': 'air_temp',
             'CS105': 'box_pressure',
             'PAROSCI': 'pressure',
             'WSPD05305': 'wind_speed',
             'WDIR05305': 'wind_dir',
             'CS10162': 'box_rh',
             'RAIN380M': 'precip'}

    @staticmethod
    def maybe_mine(line):
        return line.startswith('TIME')

    def make_frame(self, line):
        parts = line.split()
        if len(parts) != 32:
            raise LineParseError("Expected 32 components", line)
        raw_data = [('version', 0)]
        for k1, v1 in zip(parts[0::2], parts[1::2]):
            if k1 == 'TIME':
                continue
            if k1 in self.names:
                raw_data.append((self.names[k1], v1))
            else:
                raise LineParseError("Unexpected var: %s" % k1, line)
        try:
            time_str = parts[1]
            unix_time = int(time_str)
            raw_data.append(('stamp', datetime.utcfromtimestamp(unix_time)))
        except (ValueError, TypeError):
            raise LineParseError("Could not parse stamp", line)
        return _make_frame(raw_data)


class ParserV1V2(object):
    """Parses Version 1 & 2 data lines.
    """

    names = ['station_id', 'year', 'doy', 'hhmm', 'sec', 'box_pressure',
             'paro_air_temp_period', 'paro_pressure_period', 'paro_air_temp',
             'pressure', 'paro_cal_sig', 'box_rh', 'box_air_temp',
             'air_temp_2', 'air_temp_3', 'air_temp_4', 'wind_speed', 'wind_dir',
             'rh_shield_freq', 'rh', 'air_temp_6_3m', 'dewpoint',
             'rtd_shield_freq', 'air_temp', 'solar_flux', 'precip',
             'accum_precip', 'altimeter']  # , 'altimeter2']

    @staticmethod
    def maybe_mine(line):
        return re.search('^\d,\d{4},\d{1,3}', line) is not None

    def _get_stamp(self, parts):
        year = int(parts[1])
        doy = int(parts[2])
        dt = datetime.strptime('{:d}.{:03d}'.format(int(year), int(doy)), '%Y.%j')
        secs = d.hhmm_to_offset(parts[3])
        secs += float(parts[4])
        secs -= (secs % 5)
        dt += timedelta(seconds=secs)
        return dt

    def make_frame(self, line):
        parts = line.split(',')
        if len(parts) not in [28, 29]:
            raise LineParseError("Expected 28 or 29 parts", line)
        version = 1 if len(parts) == 28 else 2
        raw_data = [('version', version)] + zip(self.names, parts)
        try:
            raw_data.append(('stamp', self._get_stamp(parts)))
        except (TypeError, ValueError):
            raise LineParseError("Could not parse timesamp", line)
        return _make_frame(raw_data)


def read_frames(source, error_handler=lambda *a: None):
    """Returns a generator for reading frames from `source`. Frames are
    checked line-by-line so frame line versions may be mixed.
    """
    if hasattr(source, 'readlines'):
        fptr = source
    else:
        fptr = open(source)

    for idx, line in enumerate(fptr.readlines()):
        if not line.strip() or line.startswith('#'):
            continue
        for parser in [ParserV1V2(), ParserV0()]:
            if parser.maybe_mine(line):
                try:
                    yield parser.parse(line)
                except LineParseError as err:
                    error_handler(idx + 1, line, err)
                break

        # yes, I know a for/else is obscure, but in this case it does
        # exactly what I need, it only executes if `break` does not execute
        else:
            error_handler(idx + 1, line, RuntimeError("no parser found", line))