Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
MendotaBuoy
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
MetObs
MendotaBuoy
Merge requests
!1
Sorting script
Code
Review changes
Check out branch
Download
Patches
Plain diff
Merged
Sorting script
sorting-script
into
master
Overview
2
Commits
10
Pipelines
0
Changes
1
Merged
Katherine Kolman
requested to merge
sorting-script
into
master
6 years ago
Overview
2
Commits
10
Pipelines
0
Changes
1
Expand
0
0
Merge request reports
Compare
master
version 3
9bf623f0
6 years ago
version 2
bd89d671
6 years ago
version 1
a9f4fe34
6 years ago
master (base)
and
latest version
latest version
ae9bd1cb
10 commits,
6 years ago
version 3
9bf623f0
9 commits,
6 years ago
version 2
bd89d671
8 commits,
6 years ago
version 1
a9f4fe34
7 commits,
6 years ago
1 file
+
118
−
0
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
mendotabuoy/tools/sortingscript.py
0 → 100644
+
118
−
0
Options
import
os
from
datetime
import
datetime
,
timedelta
import
sys
import
logging
LOG
=
logging
.
getLogger
(
__name__
)
def
isLegacyFile
(
filename
):
return
filename
.
split
(
"
.
"
)[
1
].
split
(
"
-
"
)[
0
]
<
"
2013
"
def
getDateTime
(
line
,
isLegacy
):
if
isLegacy
:
_
,
_
,
_
,
line_year
,
line_doy
,
line_time
=
line
.
split
(
"
,
"
)[:
6
]
line_month
=
datetime
.
strptime
((
line_doy
+
"
"
+
line_year
),
"
%j %Y
"
).
strftime
(
"
%m
"
)
line_day
=
datetime
.
strptime
((
line_doy
+
"
"
+
line_year
),
"
%j %Y
"
).
strftime
(
"
%d
"
)
if
line_time
==
"
2400
"
:
line_hour
=
00
line_min
=
00
return
datetime
(
int
(
line_year
),
int
(
line_month
),
int
(
line_day
),
int
(
line_hour
),
int
(
line_min
))
+
timedelta
(
days
=
1
,
hours
=
6
)
else
:
line_hour
=
datetime
.
strptime
(
line_time
.
rjust
(
4
,
"
0
"
),
"
%H%M
"
).
strftime
(
"
%H
"
)
line_min
=
datetime
.
strptime
(
line_time
.
rjust
(
4
,
"
0
"
),
"
%H%M
"
).
strftime
(
"
%M
"
)
return
datetime
(
int
(
line_year
),
int
(
line_month
),
int
(
line_day
),
int
(
line_hour
),
int
(
line_min
))
+
timedelta
(
hours
=
6
)
else
:
line_year
=
line
.
split
(
"
"
)[
0
][
1
:
5
]
line_month
=
line
.
split
(
"
"
)[
0
][
6
:
8
]
line_day
=
line
.
split
(
"
"
)[
0
][
9
:
11
]
line_time
=
line
.
split
(
"
"
)[
1
][:
8
]
line_hour
=
datetime
.
strptime
(
line_time
,
"
%H:%M:%S
"
).
strftime
(
"
%H
"
)
line_min
=
datetime
.
strptime
(
line_time
,
"
%H:%M:%S
"
).
strftime
(
"
%M
"
)
line_sec
=
datetime
.
strptime
(
line_time
,
"
%H:%M:%S
"
).
strftime
(
"
%S
"
)
return
datetime
(
int
(
line_year
),
int
(
line_month
),
int
(
line_day
),
int
(
line_hour
),
int
(
line_min
),
int
(
line_sec
))
INPUT_DIR
=
"
/Users/kkolman/data1/raw/mendota/buoy/
"
OUTPUT_DIR
=
"
./testdata2/data1/raw/mendota/buoy/
"
def
create_line_infos
(
base_dir
):
line_infos
=
[]
headers
=
{}
LOG
.
info
(
"
Searching through files...
"
)
# collects every file name and their filepath (indices match up)
for
dirpath
,
dirs
,
files
in
os
.
walk
(
base_dir
):
LOG
.
debug
(
"
Searching: {}...
"
.
format
(
dirpath
))
for
filename
in
files
:
if
filename
[
0
:
2
]
!=
"
me
"
:
continue
filepath
=
os
.
path
.
join
(
dirpath
,
filename
)
file_date
=
datetime
.
strptime
(
filename
.
split
(
"
.
"
)[
1
],
"
%Y-%m-%d
"
).
date
()
is_legacy
=
isLegacyFile
(
filename
)
file_type
=
"
_
"
+
filename
.
split
(
"
.
"
)[
0
].
split
(
"
_
"
)[
-
1
]
if
file_type
==
"
_buoy
"
:
file_type
=
""
with
open
(
filepath
,
"
r
"
,
newline
=
''
)
as
filecontents
:
for
line
in
filecontents
:
if
len
(
line
)
<
1
:
continue
# checking for header files
if
not
line
[
1
].
isdigit
()
and
not
line
[
0
].
isdigit
():
headers
.
setdefault
((
file_type
,
file_date
),
[]).
append
(
line
)
continue
line_date
=
getDateTime
(
line
,
is_legacy
)
line_infos
.
append
((
file_type
,
line_date
,
file_date
,
line
))
return
headers
,
line_infos
def
main
():
from
argparse
import
ArgumentParser
parser
=
ArgumentParser
()
parser
.
add_argument
(
"
--input
"
,
nargs
=
"
+
"
,
default
=
[
INPUT_DIR
],
help
=
"
One or more input directories to search for files
"
)
parser
.
add_argument
(
"
--output
"
,
default
=
OUTPUT_DIR
,
help
=
"
Base output directory to write files to (next subdirectory is YYYY)
"
)
args
=
parser
.
parse_args
()
input_dirs
=
args
.
input
output_dir
=
args
.
output
logging
.
basicConfig
(
level
=
logging
.
DEBUG
)
for
input_dir
in
input_dirs
:
headers
,
line_infos
=
create_line_infos
(
input_dir
)
LOG
.
info
(
"
Writing data lines to destination files...
"
)
current_file
=
None
current_file_date
=
None
prev_line_date
=
None
current_file_type
=
None
FILENAME_PATTERN
=
os
.
path
.
join
(
"
%Y
"
,
"
%m
"
,
"
%d
"
,
"
mendota_buoy{}.%Y-%m-%d.ascii
"
)
for
file_type
,
line_date
,
file_date
,
line
in
sorted
(
line_infos
):
if
current_file
is
None
or
(
line_date
.
date
()
!=
current_file_date
or
file_type
!=
current_file_type
):
fpath
=
os
.
path
.
join
(
output_dir
,
line_date
.
strftime
(
FILENAME_PATTERN
.
format
(
file_type
)))
output_dirname
=
os
.
path
.
dirname
(
fpath
)
os
.
makedirs
(
output_dirname
,
exist_ok
=
True
)
if
current_file
is
not
None
:
current_file
.
close
()
current_file
=
open
(
fpath
,
'
w
'
,
newline
=
''
)
this_header
=
headers
.
get
((
file_type
,
file_date
),
[])
for
header_line
in
this_header
:
current_file
.
write
(
header_line
)
current_file_date
=
line_date
.
date
()
current_file_type
=
file_type
if
prev_line_date
is
not
None
and
prev_line_date
==
line_date
:
LOG
.
debug
(
"
Duplicate line: %s
"
,
line_date
.
isoformat
())
continue
current_file
.
write
(
line
)
prev_line_date
=
line_date
current_file
.
close
()
if
__name__
==
"
__main__
"
:
sys
.
exit
(
main
())
Loading