""" Data Versions ============= There have so far been 3 changes to the format of the raw ASCII data over the years. Version 0 --------- Effective from incept to 2010-06-01T00:27:51Z. The original data format was a key value, space separated data format ``<key> <value>``. There were a total of 16 data values including `TIME`: `TIME, ACCURAIN, TEMP107_4, LI200X, TEMP107_1, RH41372, TEMP107_5, CS105, PAROSCI, WSPD05305, TEMP107_3, CS10162, RAIN380M, TEMP107_2, TEMP41372, WDIR05305`. Version 1 --------- Effective 2010-06-01T00:27:51Z to 2012-12-03T17:34:17Z. A CSV format file with a total of 28 values: station_id, year, doy, hhmm, sec, box_pressure, paro_air_temp_period, paro_pressure_period, paro_air_temp, pressure, paro_cal_sig, box_rh, box_air_temp, air_temp_2, air_temp_3, air_temp_4, wind_speed, wind_dir, rh_shield_freq, rh, air_temp_6_3m, dewpoint, rtd_shield_freq, air_temp, solar_flux, precip, accum_precip, altimeter. Version 2 --------- Effective 2012-12-03T17:34:17Z to present. Same as Version 1 with the addition of altimeter2 at the end. I'm not sure why we have 2 altimeter values but as far as I know altimeter2 is not used. """ import re import logging from datetime import datetime, timedelta from metobs import data as d from aosstower.schema import database from aosstower.frame import Frame LOG = logging.getLogger(__name__) class LineParseError(Exception): """Error parsing line of frame data. """ @classmethod def raise_wrapped(cls, exception, msg=None): import sys traceback = sys.exc_info()[2] msg = msg or str(exception) raise cls(msg), None, traceback def _make_frame(data): """Construct a frame from a list of tuples. """ frame = Frame() for key, value in data: if key == 'stamp': continue if key in database: try: data[key] = database[key].type(data[key]) except (ValueError, TypeError) as err: raise LineParseError("error converting '%s' using %s", data[key], database[key].type) return data class ParserV0(object): """Parses Version 0 data lines. """ # maps v0 names to names in schema db names = {'ACCURAIN': 'accum_precip', 'TEMP107_1': 'box_air_temp', 'TEMP107_2': 'air_temp_2', 'TEMP107_3': 'air_temp_3', 'TEMP107_4': 'air_temp_4', 'TEMP107_5': 'air_temp_5', 'LI200X': 'solar_flux', 'RH41372': 'rh', 'TEMP41372': 'air_temp', 'CS105': 'box_pressure', 'PAROSCI': 'pressure', 'WSPD05305': 'wind_speed', 'WDIR05305': 'wind_dir', 'CS10162': 'box_rh', 'RAIN380M': 'precip'} @staticmethod def maybe_mine(line): return line.startswith('TIME') def make_frame(self, line): parts = line.split() if len(parts) != 32: raise LineParseError("Expected 32 components", line) raw_data = [('version', 0)] for k1, v1 in zip(parts[0::2], parts[1::2]): if k1 == 'TIME': continue if k1 in self.names: raw_data.append((self.names[k1], v1)) else: raise LineParseError("Unexpected var: %s" % k1, line) try: time_str = parts[1] unix_time = int(time_str) raw_data.append(('stamp', datetime.utcfromtimestamp(unix_time))) except (ValueError, TypeError): raise LineParseError("Could not parse stamp", line) return _make_frame(raw_data) class ParserV1V2(object): """Parses Version 1 & 2 data lines. """ names = ['station_id', 'year', 'doy', 'hhmm', 'sec', 'box_pressure', 'paro_air_temp_period', 'paro_pressure_period', 'paro_air_temp', 'pressure', 'paro_cal_sig', 'box_rh', 'box_air_temp', 'air_temp_2', 'air_temp_3', 'air_temp_4', 'wind_speed', 'wind_dir', 'rh_shield_freq', 'rh', 'air_temp_6_3m', 'dewpoint', 'rtd_shield_freq', 'air_temp', 'solar_flux', 'precip', 'accum_precip', 'altimeter'] # , 'altimeter2'] @staticmethod def maybe_mine(line): return re.search('^\d,\d{4},\d{1,3}', line) is not None def _get_stamp(self, parts): year = int(parts[1]) doy = int(parts[2]) dt = datetime.strptime('{:d}.{:03d}'.format(int(year), int(doy)), '%Y.%j') secs = d.hhmm_to_offset(parts[3]) secs += float(parts[4]) secs -= (secs % 5) dt += timedelta(seconds=secs) return dt def make_frame(self, line): parts = line.split(',') if len(parts) not in [28, 29]: raise LineParseError("Expected 28 or 29 parts", line) version = 1 if len(parts) == 28 else 2 raw_data = [('version', version)] + zip(self.names, parts) try: raw_data.append(('stamp', self._get_stamp(parts))) except (TypeError, ValueError): raise LineParseError("Could not parse timesamp", line) return _make_frame(raw_data) def read_frames(source, error_handler=lambda *a: None): """Returns a generator for reading frames from `source`. Frames are checked line-by-line so frame line versions may be mixed. """ if hasattr(source, 'readlines'): fptr = source else: fptr = open(source) for idx, line in enumerate(fptr.readlines()): if not line.strip() or line.startswith('#'): continue for parser in [ParserV1V2(), ParserV0()]: if parser.maybe_mine(line): try: yield parser.parse(line) except LineParseError as err: error_handler(idx + 1, line, err) break # yes, I know a for/else is obscure, but in this case it does # exactly what I need, it only executes if `break` does not execute else: error_handler(idx + 1, line, RuntimeError("no parser found", line))