Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
A
AossTower
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
MetObs
AossTower
Commits
18835498
Unverified
Commit
18835498
authored
8 years ago
by
David Hoese
Browse files
Options
Downloads
Patches
Plain Diff
Update netcdf generation to use 5s data in summary/monthly generation
parent
cfc38e61
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
aosstower/level_b1/nc.py
+48
-20
48 additions, 20 deletions
aosstower/level_b1/nc.py
with
48 additions
and
20 deletions
aosstower/level_b1/nc.py
+
48
−
20
View file @
18835498
...
...
@@ -20,9 +20,10 @@ KNOTS_9 = calc.knots_to_mps(9.)
KNOTS_5
=
calc
.
knots_to_mps
(
5.
)
KNOTS_3
=
calc
.
knots_to_mps
(
3.
)
KNOTS_2
=
calc
.
knots_to_mps
(
2.
)
DEFAULT_FLOAT_FILL
=
-
9999.
def
make_
mean
_dict
(
source_dict
):
def
make_
summary
_dict
(
source_dict
):
"""
Create the
'
_mean
'
,
'
_low
'
,
'
_high
'
file structure.
"""
dest_dict
=
{}
for
key
in
source_dict
:
...
...
@@ -32,9 +33,6 @@ def make_mean_dict(source_dict):
return
dest_dict
MEAN_DATABASE
=
make_mean_dict
(
schema
.
database
)
def
filter_array
(
arr
,
valid_min
,
valid_max
,
valid_delta
):
"""
Create QC field array.
...
...
@@ -93,15 +91,15 @@ def create_variables(nc_file, first_stamp, database, chunk_sizes=None, zlib=Fals
coordinates
=
{
# fields: type, dimension, fill, valid_min, std_name, longname, units, valid_max, cf_role, axis
'
time
'
:
[
np
.
float64
,
(
'
time
'
,),
-
999.
,
None
,
None
,
"
Hour offset from midnight
"
,
'
time
'
:
[
np
.
float64
,
(
'
time
'
,),
DEFAULT_FLOAT_FILL
,
None
,
None
,
"
Hour offset from midnight
"
,
t_u
,
None
,
None
,
None
],
'
lon
'
:
[
np
.
float32
,
tuple
(),
-
999.
,
-
180.
,
'
longitude
'
,
None
,
'
degrees_east
'
,
180.
,
None
],
'
lat
'
:
[
np
.
float32
,
tuple
(),
-
999.
,
-
90.
,
'
latitude
'
,
None
,
'
degrees_north
'
,
90.
,
None
],
'
alt
'
:
[
np
.
float32
,
tuple
(),
-
999.
,
None
,
'
height
'
,
'
vertical distance
'
,
'
m
'
,
None
,
None
],
'
lon
'
:
[
np
.
float32
,
tuple
(),
DEFAULT_FLOAT_FILL
,
-
180.
,
'
longitude
'
,
None
,
'
degrees_east
'
,
180.
,
None
],
'
lat
'
:
[
np
.
float32
,
tuple
(),
DEFAULT_FLOAT_FILL
,
-
90.
,
'
latitude
'
,
None
,
'
degrees_north
'
,
90.
,
None
],
'
alt
'
:
[
np
.
float32
,
tuple
(),
DEFAULT_FLOAT_FILL
,
None
,
'
height
'
,
'
vertical distance
'
,
'
m
'
,
None
,
None
],
# int64 for base_time would be best, but NetCDF4 Classic does not support it
# NetCDF4 Classic mode was chosen so users can use MFDatasets (multi-file datasets)
'
base_time
'
:
[
np
.
int32
,
tuple
(),
-
999.
,
None
,
'
time
'
,
btln
,
btu
,
None
,
None
],
'
time_offset
'
:
[
np
.
float64
,
(
'
time
'
,),
-
999.
,
None
,
'
time
'
,
to_ln
,
to_u
,
None
,
None
],
'
base_time
'
:
[
np
.
int32
,
tuple
(),
DEFAULT_FLOAT_FILL
,
None
,
'
time
'
,
btln
,
btu
,
None
,
None
],
'
time_offset
'
:
[
np
.
float64
,
(
'
time
'
,),
DEFAULT_FLOAT_FILL
,
None
,
'
time
'
,
to_ln
,
to_u
,
None
,
None
],
'
station_name
'
:
[
'
c
'
,
(
'
max_len_station_name
'
,),
'
\0
'
,
None
,
None
,
'
station name
'
,
None
,
None
,
'
timeseries_id
'
],
}
...
...
@@ -154,7 +152,7 @@ def create_variables(nc_file, first_stamp, database, chunk_sizes=None, zlib=Fals
varTup
=
database
[
entry
]
variable
=
nc_file
.
createVariable
(
entry
,
np
.
float32
,
dimensions
=
(
'
time
'
,),
fill_value
=
float
(
-
999
)
,
zlib
=
zlib
,
dimensions
=
(
'
time
'
,),
fill_value
=
DEFAULT_FLOAT_FILL
,
zlib
=
zlib
,
chunksizes
=
chunk_sizes
)
variable
.
standard_name
=
varTup
[
1
]
...
...
@@ -250,7 +248,7 @@ def minute_averages(frame):
return
new_frame
.
fillna
(
np
.
nan
)
def
average
_over_interval
(
frame
,
interval_width
):
def
summary
_over_interval
(
frame
,
interval_width
):
"""
takes a frame and an interval to average it over, and returns a minimum,
maximum, and average dataframe for that interval
"""
...
...
@@ -309,7 +307,7 @@ def write_vars(nc_file, frame, database):
if
varName
not
in
fileVar
:
LOG
.
debug
(
'
Unused input variable: %s
'
,
varName
)
continue
fileVar
[
varName
][:]
=
frame
[
varName
].
fillna
(
-
999.
).
values
fileVar
[
varName
][:]
=
frame
[
varName
].
fillna
(
DEFAULT_FLOAT_FILL
).
values
valid_min
=
database
[
varName
][
5
]
valid_max
=
database
[
varName
][
6
]
...
...
@@ -354,14 +352,40 @@ def write_global_attributes(nc_file, input_sources):
def
create_giant_netcdf
(
input_files
,
output_fn
,
zlib
,
chunk_size
,
start
=
None
,
end
=
None
,
interval_width
=
None
,
summary
=
False
,
database
=
schema
.
database
):
frame
=
get_data
(
input_files
)
if
frame
.
empty
:
raise
ValueError
(
"
No data found from input files: {}
"
.
format
(
"
,
"
.
join
(
input_files
)))
frame
=
minute_averages
(
frame
)
if
interval_width
:
frame
=
average_over_interval
(
frame
,
interval_width
)
# Add wind direction components so we can average wind direction properly
frame
[
'
wind_east
'
],
frame
[
'
wind_north
'
],
_
=
calc
.
wind_vector_components
(
frame
[
'
wind_speed
'
],
frame
[
'
wind_dir
'
])
# round up each 1 minute group so data at time T is the average of data
# from T - 1 (exclusive) to T (inclusive).
# new_frame = frame.resample('1T', closed='right', loffset='1T').mean()
new_frame
=
frame
.
resample
(
'
5S
'
,
closed
=
'
right
'
,
loffset
=
'
5S
'
).
mean
()
# 2 minute rolling average of 5 second data (5 seconds * 24 = 120 seconds = 2 minutes)
winds_frame_5s
=
new_frame
[[
'
wind_speed
'
,
'
wind_east
'
,
'
wind_north
'
]]
# winds_frame_5s = winds_frame_5s.resample('5S', closed='right', loffset='5S').mean()
winds_frame_2m
=
winds_frame_5s
.
rolling
(
24
,
win_type
=
'
boxcar
'
).
mean
()
winds_frame_2m
[
'
gust
'
]
=
calculate_wind_gust
(
winds_frame_5s
[
'
wind_speed
'
],
winds_frame_2m
[
'
wind_speed
'
])
# rolling average is used for mean output
new_frame
.
update
(
winds_frame_2m
)
# adds wind_speed, wind_east/north
new_frame
[
'
gust
'
]
=
winds_frame_2m
[
'
gust
'
]
# average the values
if
summary
:
frame
=
summary_over_interval
(
frame
,
interval_width
)
else
:
frame
=
new_frame
.
resample
(
interval_width
,
closed
=
'
right
'
,
loffset
=
interval_width
).
mean
()
# gust_idx = new_frame['gust'].resample(interval_width, closed='right', loffset=interval_width).apply(lambda arr_like: arr_like.argmax())
# frame['gust'][:] = new_frame['gust'][gust_idx.values]
# frame['wind_dir'] = calc.wind_vector_degrees(frame['wind_east'][gust_idx.values], frame['wind_north'][gust_idx.values])
frame
[
'
wind_dir
'
]
=
calc
.
wind_vector_degrees
(
frame
[
'
wind_east
'
],
frame
[
'
wind_north
'
])
frame
[
'
gust
'
]
=
new_frame
[
'
gust
'
].
resample
(
interval_width
,
closed
=
'
right
'
,
loffset
=
interval_width
).
max
()
frame
.
fillna
(
np
.
nan
,
inplace
=
True
)
if
start
and
end
:
frame
=
frame
[
start
.
strftime
(
'
%Y-%m-%d %H:%M:%S
'
):
end
.
strftime
(
'
%Y-%m-%d %H:%M:%S
'
)]
...
...
@@ -371,6 +395,7 @@ def create_giant_netcdf(input_files, output_fn, zlib, chunk_size,
else
:
chunk_sizes
=
[
frame
.
shape
[
0
]]
import
ipdb
;
ipdb
.
set_trace
()
first_stamp
=
dt
.
strptime
(
str
(
frame
.
index
[
0
]),
'
%Y-%m-%d %H:%M:%S
'
)
# NETCDF4_CLASSIC was chosen so that MFDataset reading would work. See:
# http://unidata.github.io/netcdf4-python/#netCDF4.MFDataset
...
...
@@ -406,7 +431,7 @@ def main():
parser
.
add_argument
(
'
-e
'
,
'
--end-time
'
,
type
=
_dt_convert
,
help
=
'
End time of massive netcdf file. Formats allowed:
'
+
"
\'
YYYY-MM-DDTHH:MM:SS
\'
,
\'
YYYY-MM-DD
\'
"
)
parser
.
add_argument
(
'
-n
'
,
'
--interval
'
,
parser
.
add_argument
(
'
-n
'
,
'
--interval
'
,
default
=
'
1T
'
,
help
=
"""
Width of the interval to average input data
over in Pandas offset format. If not specified, 1 minute averages are used. If
specified then
'
_high
'
,
'
_mean
'
, and
'
_low
'
versions of the data fields are
...
...
@@ -414,6 +439,8 @@ written to the output NetCDF.
Use
'
1D
'
for daily or
'
5T
'
for 5 minute averages.
See this page for more details:
http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
"""
)
parser
.
add_argument
(
'
--summary
'
,
action
=
'
store_true
'
,
help
=
"
Create a file with _low, _mean, _high versions of every variable name
"
)
parser
.
add_argument
(
'
-f
'
,
'
--fields
'
,
nargs
=
'
+
'
,
default
=
schema
.
met_vars
,
help
=
"
Variable names to include in the NetCDF file (base name, no suffixes)
"
)
parser
.
add_argument
(
'
--chunk-size
'
,
type
=
int
,
help
=
'
chunk size for the netCDF file
'
)
...
...
@@ -438,8 +465,9 @@ each input file is mapped to the corresponding output file.
elif
not
args
.
start_time
and
args
.
end_time
:
raise
ValueError
(
'
start time must be specified when end time is specified
'
)
database
=
MEAN_DATABASE
if
args
.
interval
else
schema
.
database
mini_database
=
{
k
:
database
[
k
]
for
k
in
args
.
fields
}
mini_database
=
{
k
:
schema
.
database
[
k
]
for
k
in
args
.
fields
}
if
args
.
summary
:
mini_database
=
make_summary_dict
(
mini_database
)
# Case 1: All inputs to 1 output file
# Case 2: Each input in to a separate output file
...
...
@@ -455,7 +483,7 @@ each input file is mapped to the corresponding output file.
try
:
create_giant_netcdf
(
in_files
,
out_fn
,
args
.
zlib
,
args
.
chunk_size
,
args
.
start_time
,
args
.
end_time
,
args
.
interval
,
args
.
end_time
,
args
.
interval
,
args
.
summary
,
mini_database
)
success
=
True
except
(
ValueError
,
TypeError
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment