diff --git a/aosstower/l00/parser.py b/aosstower/l00/parser.py index 704d2665c08fbffcc666801c181dd8ee4bd1b928..bded52ab5f7d414ea229b08438455079c35f6a54 100644 --- a/aosstower/l00/parser.py +++ b/aosstower/l00/parser.py @@ -1,9 +1,40 @@ +""" +Data Versions +============= +There have so far been 3 changes to the format of the raw ASCII data over the +years. + +Version 0 +--------- +Effective from incept to 2010-06-01T00:27:51Z. + +The original data format was a key value, space separated data format +``<key> <value>``. There were a total of 16 data values including `TIME`: +`TIME, ACCURAIN, TEMP107_4, LI200X, TEMP107_1, RH41372, TEMP107_5, CS105, +PAROSCI, WSPD05305, TEMP107_3, CS10162, RAIN380M, TEMP107_2, TEMP41372, +WDIR05305`. + +Version 1 +--------- +Effective 2010-06-01T00:27:51Z to 2012-12-03T17:34:17Z. + +A CSV format file with a total of 28 values: station_id, year, doy, hhmm, sec, +box_pressure, paro_air_temp_period, paro_pressure_period, paro_air_temp, +pressure, paro_cal_sig, box_rh, box_air_temp, air_temp_2, air_temp_3, +air_temp_4, wind_speed, wind_dir, rh_shield_freq, rh, air_temp_6_3m, dewpoint, +rtd_shield_freq, air_temp, solar_flux, precip, accum_precip, altimeter. + +Version 2 +--------- +Effective 2012-12-03T17:34:17Z to present. + +Same as Version 1 with the addition of altimeter2 at the end. I'm not sure why +we have 2 altimeter values but as far as I know altimeter2 is not used. +""" -# coding=utf-8 import re import logging from datetime import datetime, timedelta -from collections import defaultdict from metobs import data as d @@ -45,7 +76,7 @@ def _make_record(data): continue if key in database: data[key] = database[key].type(data[key]) - return defaultdict(lambda: float('nan'), data) + return data class ParserV0(object): @@ -83,6 +114,7 @@ class ParserV0(object): raw_data[self.names[k1]] = v1 else: raise LineParseError("Unexpected var: %s" % k1, line) + raw_data['version'] = 0 try: time_str = parts[1] unix_time = int(time_str) @@ -92,7 +124,7 @@ class ParserV0(object): return _make_record(raw_data) -class ParserV1(object): +class ParserV1V2(object): names = ['station_id', 'year', 'doy', 'hhmm', 'sec', 'box_pressure', 'paro_air_temp_period', 'paro_pressure_period', 'paro_air_temp', @@ -100,7 +132,7 @@ class ParserV1(object): 'air_temp_2', 'air_temp_3', 'air_temp_4', 'wind_speed', 'wind_dir', 'rh_shield_freq', 'rh', 'air_temp_6_3m', 'dewpoint', 'rtd_shield_freq', 'air_temp', 'solar_flux', 'precip', - 'accum_precip', 'altimeter'] + 'accum_precip', 'altimeter'] # , 'altimeter2'] @staticmethod def maybe_mine(line): @@ -118,9 +150,10 @@ class ParserV1(object): def parse(self, line): parts = line.split(',') - if len(parts) != 29: - raise LineParseError("Expected 28 parts", line) + if len(parts) < 28: + raise LineParseError("Expected >= 28 parts", line) raw_data = {k: v for k, v in zip(self.names, parts)} + raw_data['version'] = 1 if len(parts) == 28 else 2 try: raw_data['stamp'] = self._get_stamp(raw_data) except (TypeError, ValueError): @@ -138,12 +171,17 @@ def read_records(source, error_handler=lambda *a: None): fptr = open(source) for idx, line in enumerate(fptr.readlines()): - for parser in [ParserV1(), ParserV0()]: + if not line.strip() or line.startswith('#'): + continue + for parser in [ParserV1V2(), ParserV0()]: if parser.maybe_mine(line): try: yield parser.parse(line) - break # forces 'else' to execute except LineParseError as err: error_handler(idx + 1, line, err) + break + + # yes, I know a for/else is obscure, but in this case it does + # exactly what I need, it only executes if `break` does not execute else: error_handler(idx + 1, line, RuntimeError("no parser found", line))