Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
A
AossTower
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
MetObs
AossTower
Commits
0f5bd87b
Commit
0f5bd87b
authored
5 years ago
by
William Roberts
Browse files
Options
Downloads
Patches
Plain Diff
Begin making time interval more dynamic
parent
9749b866
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
aosstower/level_00/influxdb.py
+36
-25
36 additions, 25 deletions
aosstower/level_00/influxdb.py
with
36 additions
and
25 deletions
aosstower/level_00/influxdb.py
+
36
−
25
View file @
0f5bd87b
...
...
@@ -57,37 +57,40 @@ SYMBOLS = list(SYMBOL_CONVERSIONS.values())
class
Updater
(
object
):
"""
Append weather record (taken as a dict) and do averages when enough data is ready.
At least 12 minutes of data is required to do averaging: 10 minutes of wind gusts,
At least 12 minutes of data is required to do averaging
for gust_10m
: 10 minutes of wind gusts,
and wind gusts need 2 minutes of data to calculate.
This class is created once at startup and calls rolling_average every time new data is available which tries to do
averaging every 5 minutes of data added.
"""
def
__init__
(
self
):
def
__init__
(
self
,
data_interval
=
timedelta
(
seconds
=
5
),
submit_interval
=
timedelta
(
minutes
=
5
)):
"""
intervals are timedelta objects.
"""
self
.
data
=
{}
self
.
start_time
=
None
self
.
data_interval
=
data_interval
.
total_seconds
()
self
.
submit_interval
=
submit_interval
.
total_seconds
()
def
rolling_average
(
self
,
record
):
self
.
start_time
=
self
.
start_time
if
self
.
start_time
else
record
[
'
timestamp
'
]
time_interval
=
(
record
[
'
timestamp
'
]
-
self
.
start_time
).
total_seconds
()
# Appending to a DataFrame is slow. Instead, this adds to a dict in chunks and passes it to the DataFrame.
time_mask
=
True
if
self
.
data
.
get
(
'
timestamp
'
)
is
not
None
:
time_mask
=
self
.
data
[
'
timestamp
'
]
>
record
[
'
timestamp
'
]
-
timedelta
(
minutes
=
12
)
for
key
in
record
:
if
self
.
data
.
get
(
key
)
is
None
:
self
.
data
[
key
]
=
np
.
array
([])
self
.
data
[
key
]
=
np
.
append
(
self
.
data
[
key
],
record
[
key
])
# If 5 minutes of data are ready, average current data in dict. Holds up to 15 minutes.
# 60 * 5 seconds = 5 minutes.
if
time_interval
>=
300
or
(
record
[
'
timestamp
'
].
minute
%
5
==
0
and
record
[
'
timestamp
'
].
second
==
0
):
# Appending to a DataFrame is slow. Instead, this adds to a dict in chunks and passes it to the DataFrame.
frame
=
self
.
_calculate_averages
()
frame
.
fillna
(
value
=
np
.
nan
,
inplace
=
True
)
# Keep data set within minimum window to improve speed.
# Wind gusts looks at 10 minute intervals, including the first data point which needs 2 minutes of data
# before it, totalling 12 minutes.
time_mask
=
frame
.
index
>
frame
.
index
[
-
1
]
-
timedelta
(
minutes
=
12
)
self
.
data
=
{
key
:
val
[
time_mask
]
for
key
,
val
in
self
.
data
.
items
()}
self
.
start_time
=
self
.
data
[
'
timestamp
'
][
-
1
]
if
record
[
'
timestamp
'
].
minute
%
5
==
0
and
record
[
'
timestamp
'
].
second
==
0
:
return
frame
self
.
data
[
key
]
=
np
.
append
(
self
.
data
[
key
][
time_mask
],
record
[
key
])
reference
=
pd
.
datetime
(
1
,
1
,
1
)
if
(
record
[
'
timestamp
'
]
-
reference
).
total_seconds
()
%
self
.
submit_interval
==
0
:
submit
=
True
elif
(
record
[
'
timestamp
'
]
-
reference
).
total_seconds
()
%
self
.
submit_interval
+
self
.
data_interval
>
\
self
.
submit_interval
:
submit
=
True
else
:
submit
=
False
# If data hits a submit_interval interval, return data.
if
submit
:
return
self
.
_calculate_averages
()
def
_calculate_averages
(
self
):
KNOTS_9
=
calc
.
knots_to_mps
(
9.
)
...
...
@@ -95,6 +98,7 @@ class Updater(object):
frame
=
pd
.
DataFrame
(
self
.
data
)
frame
.
set_index
(
'
timestamp
'
,
inplace
=
True
)
frame
.
mask
(
frame
==
-
99999.
,
inplace
=
True
)
full_frame
=
frame
.
asfreq
(
'
{0}S
'
.
format
(
self
.
data_interval
))
# Add wind direction components so we can average wind direction properly.
frame
[
'
wind_east
'
],
frame
[
'
wind_north
'
],
_
=
calc
.
wind_vector_components
(
frame
[
'
wind_speed
'
],
frame
[
'
wind_dir
'
])
...
...
@@ -105,21 +109,28 @@ class Updater(object):
"
it from air temp and relative humidity
"
)
frame
[
'
dewpoint
'
]
=
calc
.
dewpoint
(
frame
[
'
air_temp
'
],
frame
[
'
rh
'
])
# https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases
# 2 minute rolling average.
winds_frame_2m
=
frame
[[
'
wind_speed
'
,
'
wind_east
'
,
'
wind_north
'
]].
rolling
(
'
2T
'
).
mean
()
winds_frame_2m
=
frame
[[
'
wind_speed
'
,
'
wind_east
'
,
'
wind_north
'
]].
rolling
(
'
2T
'
,
closed
=
'
right
'
).
mean
()
frame
[
'
wind_speed_2m
'
]
=
winds_frame_2m
[
'
wind_speed
'
]
frame
[
'
wind_dir_2m
'
]
=
calc
.
wind_vector_degrees
(
winds_frame_2m
[
'
wind_east
'
],
winds_frame_2m
[
'
wind_north
'
])
frame_2
=
frame
[
frame
.
index
>
frame
.
index
[
-
1
]
-
timedelta
(
minutes
=
2
)]
full_2
=
full_frame
[
full_frame
.
index
>
full_frame
.
index
[
-
1
]
-
timedelta
(
minutes
=
2
)]
# Makes 2 minute averages nans if given less than 2 minutes of data.
if
len
(
full_2
)
<
120
/
self
.
data_interval
or
len
(
frame_2
)
!=
len
(
full_2
):
frame
[
'
wind_speed_2m
'
].
mask
(
frame
[
'
wind_speed_2m
'
]
>
-
1.
,
inplace
=
True
)
frame
[
'
wind_dir_2m
'
].
mask
(
frame
[
'
wind_dir_2m
'
]
>
-
1.
,
inplace
=
True
)
# 1 minute rolling peaks
wind_peak_1m
=
frame
[
'
wind_speed
'
].
rolling
(
window
=
'
1T
'
,
c
enter
=
False
).
max
()
wind_peak_1m
=
frame
[
'
wind_speed
'
].
rolling
(
window
=
'
1T
'
,
c
losed
=
'
right
'
).
max
()
# criteria for a fast wind to be considered a wind gust
gust_mask
=
(
winds_frame_2m
[
'
wind_speed
'
]
>=
KNOTS_9
)
&
\
(
wind_peak_1m
>=
winds_frame_2m
[
'
wind_speed
'
]
+
KNOTS_5
)
frame
[
'
gust_1m
'
]
=
wind_peak_1m
.
mask
(
~
gust_mask
)
frame
[
'
gust_10m
'
]
=
calculate_wind_gust
(
frame
[
'
wind_speed
'
],
winds_frame_2m
[
'
wind_speed
'
])
# Make 10 minute gusts before 12 minutes nans because data is insufficient.
if
(
self
.
data
[
'
timestamp
'
][
-
1
]
-
self
.
data
[
'
timestamp
'
][
0
]).
total_seconds
()
<
720
:
# Make
s
10 minute gusts before 12 minutes nans because data is insufficient.
if
len
(
full_frame
)
<
720
/
self
.
data_interval
or
len
(
frame
)
!=
len
(
full_frame
)
:
frame
[
'
gust_10m
'
].
mask
(
frame
[
'
gust_10m
'
]
>
-
1.
,
inplace
=
True
)
return
frame
return
frame
.
fillna
(
value
=
np
.
nan
)
def
convert_to_influx_frame
(
record_gen
,
symbols
,
debug
=
False
):
...
...
@@ -245,7 +256,7 @@ def main():
try
:
influx_gen
=
convert_to_influx_frame
(
record_gen
,
symbols
,
args
.
debug
)
influx_gen
=
influxdb
.
grouper
(
influx_gen
,
args
.
bulk
)
updater
=
Updater
()
updater
=
Updater
(
submit_interval
=
timedelta
(
minutes
=
30
)
)
import
time
t0
=
time
.
time
()
for
record
in
influx_gen
:
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment