Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
A
AossTower
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Deploy
Model registry
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
MetObs
AossTower
Commits
18835498
Unverified
Commit
18835498
authored
8 years ago
by
David Hoese
Browse files
Options
Downloads
Patches
Plain Diff
Update netcdf generation to use 5s data in summary/monthly generation
parent
cfc38e61
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
aosstower/level_b1/nc.py
+48
-20
48 additions, 20 deletions
aosstower/level_b1/nc.py
with
48 additions
and
20 deletions
aosstower/level_b1/nc.py
+
48
−
20
View file @
18835498
...
@@ -20,9 +20,10 @@ KNOTS_9 = calc.knots_to_mps(9.)
...
@@ -20,9 +20,10 @@ KNOTS_9 = calc.knots_to_mps(9.)
KNOTS_5
=
calc
.
knots_to_mps
(
5.
)
KNOTS_5
=
calc
.
knots_to_mps
(
5.
)
KNOTS_3
=
calc
.
knots_to_mps
(
3.
)
KNOTS_3
=
calc
.
knots_to_mps
(
3.
)
KNOTS_2
=
calc
.
knots_to_mps
(
2.
)
KNOTS_2
=
calc
.
knots_to_mps
(
2.
)
DEFAULT_FLOAT_FILL
=
-
9999.
def
make_
mean
_dict
(
source_dict
):
def
make_
summary
_dict
(
source_dict
):
"""
Create the
'
_mean
'
,
'
_low
'
,
'
_high
'
file structure.
"""
"""
Create the
'
_mean
'
,
'
_low
'
,
'
_high
'
file structure.
"""
dest_dict
=
{}
dest_dict
=
{}
for
key
in
source_dict
:
for
key
in
source_dict
:
...
@@ -32,9 +33,6 @@ def make_mean_dict(source_dict):
...
@@ -32,9 +33,6 @@ def make_mean_dict(source_dict):
return
dest_dict
return
dest_dict
MEAN_DATABASE
=
make_mean_dict
(
schema
.
database
)
def
filter_array
(
arr
,
valid_min
,
valid_max
,
valid_delta
):
def
filter_array
(
arr
,
valid_min
,
valid_max
,
valid_delta
):
"""
Create QC field array.
"""
Create QC field array.
...
@@ -93,15 +91,15 @@ def create_variables(nc_file, first_stamp, database, chunk_sizes=None, zlib=Fals
...
@@ -93,15 +91,15 @@ def create_variables(nc_file, first_stamp, database, chunk_sizes=None, zlib=Fals
coordinates
=
{
coordinates
=
{
# fields: type, dimension, fill, valid_min, std_name, longname, units, valid_max, cf_role, axis
# fields: type, dimension, fill, valid_min, std_name, longname, units, valid_max, cf_role, axis
'
time
'
:
[
np
.
float64
,
(
'
time
'
,),
-
999.
,
None
,
None
,
"
Hour offset from midnight
"
,
'
time
'
:
[
np
.
float64
,
(
'
time
'
,),
DEFAULT_FLOAT_FILL
,
None
,
None
,
"
Hour offset from midnight
"
,
t_u
,
None
,
None
,
None
],
t_u
,
None
,
None
,
None
],
'
lon
'
:
[
np
.
float32
,
tuple
(),
-
999.
,
-
180.
,
'
longitude
'
,
None
,
'
degrees_east
'
,
180.
,
None
],
'
lon
'
:
[
np
.
float32
,
tuple
(),
DEFAULT_FLOAT_FILL
,
-
180.
,
'
longitude
'
,
None
,
'
degrees_east
'
,
180.
,
None
],
'
lat
'
:
[
np
.
float32
,
tuple
(),
-
999.
,
-
90.
,
'
latitude
'
,
None
,
'
degrees_north
'
,
90.
,
None
],
'
lat
'
:
[
np
.
float32
,
tuple
(),
DEFAULT_FLOAT_FILL
,
-
90.
,
'
latitude
'
,
None
,
'
degrees_north
'
,
90.
,
None
],
'
alt
'
:
[
np
.
float32
,
tuple
(),
-
999.
,
None
,
'
height
'
,
'
vertical distance
'
,
'
m
'
,
None
,
None
],
'
alt
'
:
[
np
.
float32
,
tuple
(),
DEFAULT_FLOAT_FILL
,
None
,
'
height
'
,
'
vertical distance
'
,
'
m
'
,
None
,
None
],
# int64 for base_time would be best, but NetCDF4 Classic does not support it
# int64 for base_time would be best, but NetCDF4 Classic does not support it
# NetCDF4 Classic mode was chosen so users can use MFDatasets (multi-file datasets)
# NetCDF4 Classic mode was chosen so users can use MFDatasets (multi-file datasets)
'
base_time
'
:
[
np
.
int32
,
tuple
(),
-
999.
,
None
,
'
time
'
,
btln
,
btu
,
None
,
None
],
'
base_time
'
:
[
np
.
int32
,
tuple
(),
DEFAULT_FLOAT_FILL
,
None
,
'
time
'
,
btln
,
btu
,
None
,
None
],
'
time_offset
'
:
[
np
.
float64
,
(
'
time
'
,),
-
999.
,
None
,
'
time
'
,
to_ln
,
to_u
,
None
,
None
],
'
time_offset
'
:
[
np
.
float64
,
(
'
time
'
,),
DEFAULT_FLOAT_FILL
,
None
,
'
time
'
,
to_ln
,
to_u
,
None
,
None
],
'
station_name
'
:
[
'
c
'
,
(
'
max_len_station_name
'
,),
'
\0
'
,
None
,
None
,
'
station name
'
,
None
,
None
,
'
timeseries_id
'
],
'
station_name
'
:
[
'
c
'
,
(
'
max_len_station_name
'
,),
'
\0
'
,
None
,
None
,
'
station name
'
,
None
,
None
,
'
timeseries_id
'
],
}
}
...
@@ -154,7 +152,7 @@ def create_variables(nc_file, first_stamp, database, chunk_sizes=None, zlib=Fals
...
@@ -154,7 +152,7 @@ def create_variables(nc_file, first_stamp, database, chunk_sizes=None, zlib=Fals
varTup
=
database
[
entry
]
varTup
=
database
[
entry
]
variable
=
nc_file
.
createVariable
(
entry
,
np
.
float32
,
variable
=
nc_file
.
createVariable
(
entry
,
np
.
float32
,
dimensions
=
(
'
time
'
,),
fill_value
=
float
(
-
999
)
,
zlib
=
zlib
,
dimensions
=
(
'
time
'
,),
fill_value
=
DEFAULT_FLOAT_FILL
,
zlib
=
zlib
,
chunksizes
=
chunk_sizes
)
chunksizes
=
chunk_sizes
)
variable
.
standard_name
=
varTup
[
1
]
variable
.
standard_name
=
varTup
[
1
]
...
@@ -250,7 +248,7 @@ def minute_averages(frame):
...
@@ -250,7 +248,7 @@ def minute_averages(frame):
return
new_frame
.
fillna
(
np
.
nan
)
return
new_frame
.
fillna
(
np
.
nan
)
def
average
_over_interval
(
frame
,
interval_width
):
def
summary
_over_interval
(
frame
,
interval_width
):
"""
takes a frame and an interval to average it over, and returns a minimum,
"""
takes a frame and an interval to average it over, and returns a minimum,
maximum, and average dataframe for that interval
maximum, and average dataframe for that interval
"""
"""
...
@@ -309,7 +307,7 @@ def write_vars(nc_file, frame, database):
...
@@ -309,7 +307,7 @@ def write_vars(nc_file, frame, database):
if
varName
not
in
fileVar
:
if
varName
not
in
fileVar
:
LOG
.
debug
(
'
Unused input variable: %s
'
,
varName
)
LOG
.
debug
(
'
Unused input variable: %s
'
,
varName
)
continue
continue
fileVar
[
varName
][:]
=
frame
[
varName
].
fillna
(
-
999.
).
values
fileVar
[
varName
][:]
=
frame
[
varName
].
fillna
(
DEFAULT_FLOAT_FILL
).
values
valid_min
=
database
[
varName
][
5
]
valid_min
=
database
[
varName
][
5
]
valid_max
=
database
[
varName
][
6
]
valid_max
=
database
[
varName
][
6
]
...
@@ -354,14 +352,40 @@ def write_global_attributes(nc_file, input_sources):
...
@@ -354,14 +352,40 @@ def write_global_attributes(nc_file, input_sources):
def
create_giant_netcdf
(
input_files
,
output_fn
,
zlib
,
chunk_size
,
def
create_giant_netcdf
(
input_files
,
output_fn
,
zlib
,
chunk_size
,
start
=
None
,
end
=
None
,
interval_width
=
None
,
start
=
None
,
end
=
None
,
interval_width
=
None
,
summary
=
False
,
database
=
schema
.
database
):
database
=
schema
.
database
):
frame
=
get_data
(
input_files
)
frame
=
get_data
(
input_files
)
if
frame
.
empty
:
if
frame
.
empty
:
raise
ValueError
(
"
No data found from input files: {}
"
.
format
(
"
,
"
.
join
(
input_files
)))
raise
ValueError
(
"
No data found from input files: {}
"
.
format
(
"
,
"
.
join
(
input_files
)))
frame
=
minute_averages
(
frame
)
# Add wind direction components so we can average wind direction properly
if
interval_width
:
frame
[
'
wind_east
'
],
frame
[
'
wind_north
'
],
_
=
calc
.
wind_vector_components
(
frame
[
'
wind_speed
'
],
frame
[
'
wind_dir
'
])
frame
=
average_over_interval
(
frame
,
interval_width
)
# round up each 1 minute group so data at time T is the average of data
# from T - 1 (exclusive) to T (inclusive).
# new_frame = frame.resample('1T', closed='right', loffset='1T').mean()
new_frame
=
frame
.
resample
(
'
5S
'
,
closed
=
'
right
'
,
loffset
=
'
5S
'
).
mean
()
# 2 minute rolling average of 5 second data (5 seconds * 24 = 120 seconds = 2 minutes)
winds_frame_5s
=
new_frame
[[
'
wind_speed
'
,
'
wind_east
'
,
'
wind_north
'
]]
# winds_frame_5s = winds_frame_5s.resample('5S', closed='right', loffset='5S').mean()
winds_frame_2m
=
winds_frame_5s
.
rolling
(
24
,
win_type
=
'
boxcar
'
).
mean
()
winds_frame_2m
[
'
gust
'
]
=
calculate_wind_gust
(
winds_frame_5s
[
'
wind_speed
'
],
winds_frame_2m
[
'
wind_speed
'
])
# rolling average is used for mean output
new_frame
.
update
(
winds_frame_2m
)
# adds wind_speed, wind_east/north
new_frame
[
'
gust
'
]
=
winds_frame_2m
[
'
gust
'
]
# average the values
if
summary
:
frame
=
summary_over_interval
(
frame
,
interval_width
)
else
:
frame
=
new_frame
.
resample
(
interval_width
,
closed
=
'
right
'
,
loffset
=
interval_width
).
mean
()
# gust_idx = new_frame['gust'].resample(interval_width, closed='right', loffset=interval_width).apply(lambda arr_like: arr_like.argmax())
# frame['gust'][:] = new_frame['gust'][gust_idx.values]
# frame['wind_dir'] = calc.wind_vector_degrees(frame['wind_east'][gust_idx.values], frame['wind_north'][gust_idx.values])
frame
[
'
wind_dir
'
]
=
calc
.
wind_vector_degrees
(
frame
[
'
wind_east
'
],
frame
[
'
wind_north
'
])
frame
[
'
gust
'
]
=
new_frame
[
'
gust
'
].
resample
(
interval_width
,
closed
=
'
right
'
,
loffset
=
interval_width
).
max
()
frame
.
fillna
(
np
.
nan
,
inplace
=
True
)
if
start
and
end
:
if
start
and
end
:
frame
=
frame
[
start
.
strftime
(
'
%Y-%m-%d %H:%M:%S
'
):
end
.
strftime
(
'
%Y-%m-%d %H:%M:%S
'
)]
frame
=
frame
[
start
.
strftime
(
'
%Y-%m-%d %H:%M:%S
'
):
end
.
strftime
(
'
%Y-%m-%d %H:%M:%S
'
)]
...
@@ -371,6 +395,7 @@ def create_giant_netcdf(input_files, output_fn, zlib, chunk_size,
...
@@ -371,6 +395,7 @@ def create_giant_netcdf(input_files, output_fn, zlib, chunk_size,
else
:
else
:
chunk_sizes
=
[
frame
.
shape
[
0
]]
chunk_sizes
=
[
frame
.
shape
[
0
]]
import
ipdb
;
ipdb
.
set_trace
()
first_stamp
=
dt
.
strptime
(
str
(
frame
.
index
[
0
]),
'
%Y-%m-%d %H:%M:%S
'
)
first_stamp
=
dt
.
strptime
(
str
(
frame
.
index
[
0
]),
'
%Y-%m-%d %H:%M:%S
'
)
# NETCDF4_CLASSIC was chosen so that MFDataset reading would work. See:
# NETCDF4_CLASSIC was chosen so that MFDataset reading would work. See:
# http://unidata.github.io/netcdf4-python/#netCDF4.MFDataset
# http://unidata.github.io/netcdf4-python/#netCDF4.MFDataset
...
@@ -406,7 +431,7 @@ def main():
...
@@ -406,7 +431,7 @@ def main():
parser
.
add_argument
(
'
-e
'
,
'
--end-time
'
,
type
=
_dt_convert
,
parser
.
add_argument
(
'
-e
'
,
'
--end-time
'
,
type
=
_dt_convert
,
help
=
'
End time of massive netcdf file. Formats allowed:
'
+
help
=
'
End time of massive netcdf file. Formats allowed:
'
+
"
\'
YYYY-MM-DDTHH:MM:SS
\'
,
\'
YYYY-MM-DD
\'
"
)
"
\'
YYYY-MM-DDTHH:MM:SS
\'
,
\'
YYYY-MM-DD
\'
"
)
parser
.
add_argument
(
'
-n
'
,
'
--interval
'
,
parser
.
add_argument
(
'
-n
'
,
'
--interval
'
,
default
=
'
1T
'
,
help
=
"""
Width of the interval to average input data
help
=
"""
Width of the interval to average input data
over in Pandas offset format. If not specified, 1 minute averages are used. If
over in Pandas offset format. If not specified, 1 minute averages are used. If
specified then
'
_high
'
,
'
_mean
'
, and
'
_low
'
versions of the data fields are
specified then
'
_high
'
,
'
_mean
'
, and
'
_low
'
versions of the data fields are
...
@@ -414,6 +439,8 @@ written to the output NetCDF.
...
@@ -414,6 +439,8 @@ written to the output NetCDF.
Use
'
1D
'
for daily or
'
5T
'
for 5 minute averages.
Use
'
1D
'
for daily or
'
5T
'
for 5 minute averages.
See this page for more details:
See this page for more details:
http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
"""
)
http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
"""
)
parser
.
add_argument
(
'
--summary
'
,
action
=
'
store_true
'
,
help
=
"
Create a file with _low, _mean, _high versions of every variable name
"
)
parser
.
add_argument
(
'
-f
'
,
'
--fields
'
,
nargs
=
'
+
'
,
default
=
schema
.
met_vars
,
parser
.
add_argument
(
'
-f
'
,
'
--fields
'
,
nargs
=
'
+
'
,
default
=
schema
.
met_vars
,
help
=
"
Variable names to include in the NetCDF file (base name, no suffixes)
"
)
help
=
"
Variable names to include in the NetCDF file (base name, no suffixes)
"
)
parser
.
add_argument
(
'
--chunk-size
'
,
type
=
int
,
help
=
'
chunk size for the netCDF file
'
)
parser
.
add_argument
(
'
--chunk-size
'
,
type
=
int
,
help
=
'
chunk size for the netCDF file
'
)
...
@@ -438,8 +465,9 @@ each input file is mapped to the corresponding output file.
...
@@ -438,8 +465,9 @@ each input file is mapped to the corresponding output file.
elif
not
args
.
start_time
and
args
.
end_time
:
elif
not
args
.
start_time
and
args
.
end_time
:
raise
ValueError
(
'
start time must be specified when end time is specified
'
)
raise
ValueError
(
'
start time must be specified when end time is specified
'
)
database
=
MEAN_DATABASE
if
args
.
interval
else
schema
.
database
mini_database
=
{
k
:
schema
.
database
[
k
]
for
k
in
args
.
fields
}
mini_database
=
{
k
:
database
[
k
]
for
k
in
args
.
fields
}
if
args
.
summary
:
mini_database
=
make_summary_dict
(
mini_database
)
# Case 1: All inputs to 1 output file
# Case 1: All inputs to 1 output file
# Case 2: Each input in to a separate output file
# Case 2: Each input in to a separate output file
...
@@ -455,7 +483,7 @@ each input file is mapped to the corresponding output file.
...
@@ -455,7 +483,7 @@ each input file is mapped to the corresponding output file.
try
:
try
:
create_giant_netcdf
(
in_files
,
out_fn
,
args
.
zlib
,
create_giant_netcdf
(
in_files
,
out_fn
,
args
.
zlib
,
args
.
chunk_size
,
args
.
start_time
,
args
.
chunk_size
,
args
.
start_time
,
args
.
end_time
,
args
.
interval
,
args
.
end_time
,
args
.
interval
,
args
.
summary
,
mini_database
)
mini_database
)
success
=
True
success
=
True
except
(
ValueError
,
TypeError
):
except
(
ValueError
,
TypeError
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment