Remove defaultdict behavior.

Fix issue with number of values for CSV data. Document record version and effectivity dates.

Remove defaultdict behavior.
Fix issue with number of values for CSV data. Document record version and effectivity dates.
cfe645d9 · Bruce Flynn · 42f36a49 · cfe645d9
Commit cfe645d9 authored 11 years ago by Bruce Flynn
--- a/aosstower/l00/parser.py
+++ b/aosstower/l00/parser.py
+"""
+Data Versions
+=============
+There have so far been 3 changes to the format of the raw ASCII data over the
+years.
+
+Version 0
+---------
+Effective from incept to 2010-06-01T00:27:51Z.
+
+The original data format was a key value, space separated data format
+``<key> <value>``. There were a total of 16 data values including `TIME`:
+`TIME, ACCURAIN, TEMP107_4, LI200X, TEMP107_1, RH41372, TEMP107_5, CS105,
+PAROSCI, WSPD05305, TEMP107_3, CS10162, RAIN380M, TEMP107_2, TEMP41372,
+WDIR05305`.
+
+Version 1
+---------
+Effective 2010-06-01T00:27:51Z to 2012-12-03T17:34:17Z.
+
+A CSV format file with a total of 28 values: station_id, year, doy, hhmm, sec,
+box_pressure, paro_air_temp_period, paro_pressure_period, paro_air_temp,
+pressure, paro_cal_sig, box_rh, box_air_temp, air_temp_2, air_temp_3,
+air_temp_4, wind_speed, wind_dir, rh_shield_freq, rh, air_temp_6_3m, dewpoint,
+rtd_shield_freq, air_temp, solar_flux, precip, accum_precip, altimeter.
+
+Version 2
+---------
+Effective 2012-12-03T17:34:17Z to present.
+
+Same as Version 1 with the addition of altimeter2 at the end. I'm not sure why
+we have 2 altimeter values but as far as I know altimeter2 is not used.
+"""

-# coding=utf-8
 import re
 import logging
 from datetime import datetime, timedelta
-from collections import defaultdict

 from metobs import data as d

@@ -45,7 +76,7 @@ def _make_record(data):
            continue
        if key in database:
            data[key] = database[key].type(data[key])
-    return defaultdict(lambda: float('nan'), data)
+    return data


 class ParserV0(object):
@@ -83,6 +114,7 @@ class ParserV0(object):
                raw_data[self.names[k1]] = v1
            else:
                raise LineParseError("Unexpected var: %s" % k1, line)
+        raw_data['version'] = 0
        try:
            time_str = parts[1]
            unix_time = int(time_str)
@@ -92,7 +124,7 @@ class ParserV0(object):
        return _make_record(raw_data)


-class ParserV1(object):
+class ParserV1V2(object):

    names = ['station_id', 'year', 'doy', 'hhmm', 'sec', 'box_pressure',
             'paro_air_temp_period', 'paro_pressure_period', 'paro_air_temp',
@@ -100,7 +132,7 @@ class ParserV1(object):
             'air_temp_2', 'air_temp_3', 'air_temp_4', 'wind_speed', 'wind_dir',
             'rh_shield_freq', 'rh', 'air_temp_6_3m', 'dewpoint',
             'rtd_shield_freq', 'air_temp', 'solar_flux', 'precip',
-             'accum_precip', 'altimeter']
+             'accum_precip', 'altimeter']  # , 'altimeter2']

    @staticmethod
    def maybe_mine(line):
@@ -118,9 +150,10 @@ class ParserV1(object):

    def parse(self, line):
        parts = line.split(',')
-        if len(parts) != 29:
-            raise LineParseError("Expected 28 parts", line)
+        if len(parts) < 28:
+            raise LineParseError("Expected >= 28 parts", line)
        raw_data = {k: v for k, v in zip(self.names, parts)}
+        raw_data['version'] = 1 if len(parts) == 28 else 2
        try:
            raw_data['stamp'] = self._get_stamp(raw_data)
        except (TypeError, ValueError):
@@ -138,12 +171,17 @@ def read_records(source, error_handler=lambda *a: None):
        fptr = open(source)

    for idx, line in enumerate(fptr.readlines()):
-        for parser in [ParserV1(), ParserV0()]:
+        if not line.strip() or line.startswith('#'):
+            continue
+        for parser in [ParserV1V2(), ParserV0()]:
            if parser.maybe_mine(line):
                try:
                    yield parser.parse(line)
-                    break  # forces 'else' to execute
                except LineParseError as err:
                    error_handler(idx + 1, line, err)
+                break
+
+        # yes, I know a for/else is obscure, but in this case it does
+        # exactly what I need, it only executes if `break` does not execute
        else:
            error_handler(idx + 1, line, RuntimeError("no parser found", line))