Skip to content
Snippets Groups Projects
Commit cfe645d9 authored by Bruce Flynn's avatar Bruce Flynn
Browse files

Remove defaultdict behavior.

Fix issue with number of values for CSV data.
Document record version and effectivity dates.
parent 42f36a49
No related branches found
No related tags found
No related merge requests found
"""
Data Versions
=============
There have so far been 3 changes to the format of the raw ASCII data over the
years.
Version 0
---------
Effective from incept to 2010-06-01T00:27:51Z.
The original data format was a key value, space separated data format
``<key> <value>``. There were a total of 16 data values including `TIME`:
`TIME, ACCURAIN, TEMP107_4, LI200X, TEMP107_1, RH41372, TEMP107_5, CS105,
PAROSCI, WSPD05305, TEMP107_3, CS10162, RAIN380M, TEMP107_2, TEMP41372,
WDIR05305`.
Version 1
---------
Effective 2010-06-01T00:27:51Z to 2012-12-03T17:34:17Z.
A CSV format file with a total of 28 values: station_id, year, doy, hhmm, sec,
box_pressure, paro_air_temp_period, paro_pressure_period, paro_air_temp,
pressure, paro_cal_sig, box_rh, box_air_temp, air_temp_2, air_temp_3,
air_temp_4, wind_speed, wind_dir, rh_shield_freq, rh, air_temp_6_3m, dewpoint,
rtd_shield_freq, air_temp, solar_flux, precip, accum_precip, altimeter.
Version 2
---------
Effective 2012-12-03T17:34:17Z to present.
Same as Version 1 with the addition of altimeter2 at the end. I'm not sure why
we have 2 altimeter values but as far as I know altimeter2 is not used.
"""
# coding=utf-8
import re
import logging
from datetime import datetime, timedelta
from collections import defaultdict
from metobs import data as d
......@@ -45,7 +76,7 @@ def _make_record(data):
continue
if key in database:
data[key] = database[key].type(data[key])
return defaultdict(lambda: float('nan'), data)
return data
class ParserV0(object):
......@@ -83,6 +114,7 @@ class ParserV0(object):
raw_data[self.names[k1]] = v1
else:
raise LineParseError("Unexpected var: %s" % k1, line)
raw_data['version'] = 0
try:
time_str = parts[1]
unix_time = int(time_str)
......@@ -92,7 +124,7 @@ class ParserV0(object):
return _make_record(raw_data)
class ParserV1(object):
class ParserV1V2(object):
names = ['station_id', 'year', 'doy', 'hhmm', 'sec', 'box_pressure',
'paro_air_temp_period', 'paro_pressure_period', 'paro_air_temp',
......@@ -100,7 +132,7 @@ class ParserV1(object):
'air_temp_2', 'air_temp_3', 'air_temp_4', 'wind_speed', 'wind_dir',
'rh_shield_freq', 'rh', 'air_temp_6_3m', 'dewpoint',
'rtd_shield_freq', 'air_temp', 'solar_flux', 'precip',
'accum_precip', 'altimeter']
'accum_precip', 'altimeter'] # , 'altimeter2']
@staticmethod
def maybe_mine(line):
......@@ -118,9 +150,10 @@ class ParserV1(object):
def parse(self, line):
parts = line.split(',')
if len(parts) != 29:
raise LineParseError("Expected 28 parts", line)
if len(parts) < 28:
raise LineParseError("Expected >= 28 parts", line)
raw_data = {k: v for k, v in zip(self.names, parts)}
raw_data['version'] = 1 if len(parts) == 28 else 2
try:
raw_data['stamp'] = self._get_stamp(raw_data)
except (TypeError, ValueError):
......@@ -138,12 +171,17 @@ def read_records(source, error_handler=lambda *a: None):
fptr = open(source)
for idx, line in enumerate(fptr.readlines()):
for parser in [ParserV1(), ParserV0()]:
if not line.strip() or line.startswith('#'):
continue
for parser in [ParserV1V2(), ParserV0()]:
if parser.maybe_mine(line):
try:
yield parser.parse(line)
break # forces 'else' to execute
except LineParseError as err:
error_handler(idx + 1, line, err)
break
# yes, I know a for/else is obscure, but in this case it does
# exactly what I need, it only executes if `break` does not execute
else:
error_handler(idx + 1, line, RuntimeError("no parser found", line))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment