Skip to content
Snippets Groups Projects
Unverified Commit 19642d1f authored by David Hoese's avatar David Hoese
Browse files

Further simplify files API

parent e064a2cf
Branches
No related tags found
No related merge requests found
......@@ -45,8 +45,19 @@ def handle_begin_end(begin, end):
return begin, end
def handle_single_stream(cur_dt, site, inst, level, version, stream_pattern):
def get_stream_info(stream_id):
from flask import current_app as app
parts = stream_id.split('.')
if len(parts) == 4:
site, inst, stream_pattern, level = parts
version = '*'
elif len(parts) == 5:
site, inst, stream_pattern, level, version = parts
level = level.replace('l', 'level_')
version = version.replace('v', 'version_')
if site not in file_responses.ARCHIVE_INFO:
return 'missing_site'
info = file_responses.ARCHIVE_INFO[site]
......@@ -59,6 +70,7 @@ def handle_single_stream(cur_dt, site, inst, level, version, stream_pattern):
return 'missing_level'
info = info[level]
# TODO: Add '*' shortcut
if stream_pattern not in info:
return 'missing_stream_pattern'
info = info[stream_pattern]
......@@ -67,6 +79,8 @@ def handle_single_stream(cur_dt, site, inst, level, version, stream_pattern):
# FIXME: Get version from archive_info
# and use newest one when '*'
version = 'version_00'
if version not in info['versions']:
return 'missing_version'
path = os.path.join(site,
inst,
......@@ -74,77 +88,48 @@ def handle_single_stream(cur_dt, site, inst, level, version, stream_pattern):
version,
file_responses.FREQUENCY_DIR_FMT[info['frequency']],
info['pattern'])
path = cur_dt.strftime(path)
pathname = os.path.join(app.config['ARCHIVE_ROOT'], path)
if os.path.isfile(pathname):
url = os.path.join(app.config['ARCHIVE_URL'], path)
file_info = {}
file_info['filename'] = os.path.basename(pathname)
file_info['url'] = url
file_info['site'] = site
file_info['inst'] = inst
file_info['level'] = level
file_info['size'] = os.stat(pathname).st_size
return file_info
return {}
def get_data_for_dt(cur_dt, sites, insts, levels, versions, patterns):
data = []
for idx, site in enumerate(sites):
inst = insts[idx]
level = levels[idx]
version = versions[idx]
pattern = patterns[idx]
stream_info = {}
stream_info['relpath'] = path
stream_info['site'] = site
stream_info['inst'] = inst
stream_info['level'] = level
stream_info['version'] = version
return stream_info
file_info = handle_single_stream(cur_dt, site, inst, level, version, pattern)
#if file info is empty, file does not exist
#should not be appended to return frame
if not file_info:
continue
elif isinstance(file_info, str):
# FIXME: Gather stream info and then fill in for a specific date
# FIXME: Actually provide the stream name so they know what is wrong
return file_info
#append to list - later to be created into frame
data.append(file_info)
return data
def get_data(start, end, streams):
from flask import current_app as app
def get_data(start, end, sites, insts, levels, versions, patterns):
cur_dt = start
data = []
for day in range((end - start).days + 1):
tmp_data = get_data_for_dt(cur_dt, sites, insts, levels, versions, patterns)
if isinstance(tmp_data, str):
return tmp_data
data += tmp_data
cur_dt += delta(days=1)
dts = tuple(cur_dt + delta(days=i) for i in range((end - start).days + 1))
for stream_id in streams:
stream_info = get_stream_info(stream_id)
if isinstance(stream_info, str):
# an error occurred
return stream_id, stream_info
relpath = stream_info['relpath']
for dt in dts:
path = dt.strftime(relpath)
pathname = os.path.join(app.config['ARCHIVE_ROOT'], path)
if os.path.exists(pathname):
file_info = stream_info.copy()
file_info['filename'] = os.path.basename(pathname)
file_info['url'] = os.path.join(app.config['ARCHIVE_URL'], path)
file_info['size'] = os.stat(pathname).st_size
data.append(file_info)
return pd.DataFrame(data)
def test_resources(sites, insts):
if not sites or not insts:
return False
if len(sites) != len(insts):
return False
for site, inst in zip(sites, insts):
if site not in file_responses.ARCHIVE_INFO:
return False
if inst not in file_responses.ARCHIVE_INFO[site]:
return False
return True
def handleCSV(frame):
# Normalize the frame that was given so we only have expected information
columns = ['filename', 'url', 'site', 'inst', 'level', 'version', 'size']
frame = frame[columns]
body = StringIO()
output = StringIO()
......@@ -152,19 +137,15 @@ def handleCSV(frame):
output.write('# status: success\n# code: 200\n# message:\n')
output.write('# num_results: ' + str(len(list(frame.index))) + '\n')
output.write('# Fields: filename, url, site, inst, level, size')
output.write('# Fields: {}'.format(', '.join(columns)))
output.write('\n' + body.getvalue())
if frame.empty:
return output.getvalue()
frame['size'] = frame['size'].apply(str)
allRows = frame.values.tolist()
for row in allRows:
output.write(', '.join(row) + '\n')
for row in frame.values:
output.write(', '.join(str(x) for x in row) + '\n')
return output.getvalue()
......@@ -215,35 +196,24 @@ def handleBAT(frame):
return output.getvalue()
urls = frame['url']
names = frame['filename']
directories = []
for idx, url in enumerate(urls):
filename = names[idx]
filePath = url.split('{')[1]
directory = filePath.split(filename)[0]
directory = directory.replace('/', '\\')
relpath = frame['relpath'][idx]
directory = os.path.dirname(relpath).replace('/', '\\')
if directory not in directories:
directories.append(directory)
for directory in directories:
output.write('\nif not exist %cd%\\data\\' + directory + ' (\n')
output.write('&nbsp&nbsp&nbsp&nbspmkdir %cd%\\')
output.write(directory + '\n)\n')
directories.append(directory)
output.write('\nif not exist %cd%\\data\\' + directory + ' (\n')
output.write('&nbsp&nbsp&nbsp&nbspmkdir %cd%\\')
output.write(directory + '\n)\n')
output.write('\nbitsadmin /create myDownloadJob\n\n')
for url in urls:
output.write('bitsadmin /addfile myDownloadJob ' + url.replace('{', ''))
nextLine = ' %cd%\\data\\' + url.split('{')[1] + '\n'
for idx, url in enumerate(urls):
relpath = frame['relpath'][idx]
output.write('bitsadmin /addfile myDownloadJob ' + url)
nextLine = ' %cd%\\data\\' + relpath + '\n'
nextLine = nextLine.replace('/', '\\')
output.write(nextLine)
output.write('bitsadmin /resume myDownloadJob\n')
......@@ -266,19 +236,20 @@ def handleJSON(frame):
if frame.empty:
return jsonify(**output)
allRows = frame.values.tolist()
# allRows = frame.values.tolist()
body = []
for row in allRows:
newRow = {}
newRow['filename'] = row[0]
newRow['url'] = row[1]
newRow['site'] = row[2]
newRow['inst'] = row[3]
newRow['level'] = row[4]
newRow['size'] = row[5]
body.append(newRow)
for row in frame.values:
new_row = dict((k, row[idx]) for idx, k in enumerate(frame.columns))
# newRow = {}
# newRow['filename'] = row[0]
# newRow['url'] = row[1]
# newRow['site'] = row[2]
# newRow['inst'] = row[3]
# newRow['level'] = row[4]
# newRow['size'] = row[5]
body.append(new_row)
output['data'] = body
......@@ -327,13 +298,15 @@ def throwStream(fmt):
return render_template('400.html', format=fmt), 400
def handle_error(fmt, error_str):
def handle_error(fmt, error_str, stream_id=None):
try:
handler = file_responses.ERROR_HANDLERS[fmt]
except KeyError:
return render_template('400.html', format=fmt), 400
err_code, err_msg = file_responses.ERROR_MESSAGES[error_str]
if stream_id is not None:
err_msg += ": '{}'".format(stream_id)
res = handler(err_code, err_msg)
if fmt == 'json':
return jsonify(**res), err_code
......@@ -348,7 +321,7 @@ RESPONSE_HANDLERS = {
}
def find_stream_files(fmt, beginTime, endTime, sites, insts, levels, patterns, versions):
def find_stream_files(fmt, beginTime, endTime, streams):
try:
times = handle_begin_end(beginTime, endTime)
except TypeError:
......@@ -356,20 +329,11 @@ def find_stream_files(fmt, beginTime, endTime, sites, insts, levels, patterns, v
begin_date = times[0]
end_date = times[1]
frame = get_data(begin_date, end_date, streams)
if not test_resources(sites, insts):
return handle_error(fmt, 'missing_inst')
frame = get_data(begin_date, end_date, sites, insts, levels, versions, patterns)
if isinstance(frame, str):
return handle_error(fmt, frame)
# Normalize the frame that was given so we only have expected information
# XXX: What if we want more information in the JSON or other format?
if not frame.empty:
FILES = ['filename', 'url', 'site', 'inst', 'level', 'size']
frame = frame[FILES]
if isinstance(frame, tuple):
stream_id, error_type = frame
return handle_error(fmt, error_type, stream_id=stream_id)
try:
handler = RESPONSE_HANDLERS[fmt]
......
......@@ -390,41 +390,41 @@ def getData(fmt):
else:
return handleMultiSites(fmt)
#files
@app.route('/api/files.<fmt>', methods=['GET'])
def getFiles(fmt):
def get_files(fmt):
beginTime = request.args.get('begin')
endTime = request.args.get('end')
dataStream = request.args.get('streams')
sites = []
insts = []
levels = []
versions = []
patterns = []
dataStream = dataStream.split(':')
for stream in dataStream:
getData = stream.split('.')
if len(getData) != 4 and len(getData) != 5:
return files.throwStream(fmt)
else:
sites.append(getData[0])
insts.append(getData[1])
patterns.append(getData[2].replace('_', '.'))
level = getData[3].replace('l', 'level_')
levels.append(level)
if len(getData) == 4:
versions.append('*')
else:
versions.append(getData[4].replace('v', 'version_'))
return files.find_stream_files(fmt, beginTime, endTime, sites, insts, levels, patterns, versions)
streams = request.args.get('streams').split(':')
# stream_id = '<site>.<inst>.<level=lXX>.<pattern>[.version]'
# sites = []
# insts = []
# levels = []
# versions = []
# patterns = []
# dataStream = dataStream.split(':')
# for stream in dataStream:
# getData = stream.split('.')
#
# if len(getData) != 4 and len(getData) != 5:
# return files.throwStream(fmt)
# else:
# sites.append(getData[0])
# insts.append(getData[1])
#
# patterns.append(getData[2].replace('_', '.'))
#
# level = getData[3].replace('l', 'level_')
# levels.append(level)
#
# if len(getData) == 4:
# versions.append('*')
# else:
# versions.append(getData[4].replace('v', 'version_'))
return files.find_stream_files(fmt, beginTime, endTime, streams)
if __name__ == '__main__':
app.debug = True
......
......@@ -447,30 +447,30 @@
<li style='font-size: 15px'>
Latest ASCII file for AOSS Tower:
<ul>
<a href="{{ url_for('getFiles', fmt='csv', streams='aoss.tower.ascii.l00') }}" }}>
{{ url_for('getFiles', fmt='csv', streams='aoss.tower.ascii.level_00') | replace('%3A', ':') }}
<a href="{{ url_for('get_files', fmt='csv', streams='aoss.tower.ascii.l00') }}" }}>
{{ url_for('get_files', fmt='csv', streams='aoss.tower.ascii.level_00') | replace('%3A', ':') }}
</a>
</ul>
</li>
<li style='font-size: 15px'>
Latest .SCR files AOSS AERI:
<ul>
<a href="{{ url_for('getFiles', fmt='csv', streams='aoss.aeri.R_SCR.l00:aoss.aeri.E_SCR.l00:aoss.aeri.Y_SCR.l00') }}">
{{ url_for('getFiles', fmt='csv', streams='aoss.aeri.R_SCR.l00:aoss.aeri.E_SCR.l00:aoss.aeri.Y_SCR.l00') | replace('%3A', ':') }}
<a href="{{ url_for('get_files', fmt='csv', streams='aoss.aeri.R_SCR.l00:aoss.aeri.E_SCR.l00:aoss.aeri.Y_SCR.l00') }}">
{{ url_for('get_files', fmt='csv', streams='aoss.aeri.R_SCR.l00:aoss.aeri.E_SCR.l00:aoss.aeri.Y_SCR.l00') | replace('%3A', ':') }}
</a>
</ul>
</li>
<li style='font-size: 15px'>
Last 2 days of all AOSS AERI files:
<ul>
<a href="{{ url_for('getFiles', fmt='csv', streams=('aoss.aeri.PAR.l00:aoss.aeri.QC.l00:aoss.aeri.SUM.l00:' +
<a href="{{ url_for('get_files', fmt='csv', streams=('aoss.aeri.PAR.l00:aoss.aeri.QC.l00:aoss.aeri.SUM.l00:' +
'aoss.aeri.B1_CXS.l00:aoss.aeri.B1_UVS.l00:aoss.aeri.B2_CXS.l00:aoss.aeri.B2_UVS.l00:aoss.aeri.C1_RNC.l00:aoss.aeri.C2_RNC.l00:' +
'aoss.aeri.F1_CSV.l00:aoss.aeri.F1_CXS.l00:aoss.aeri.F1_UVS.l00:aoss.aeri.F2_CSV.l00:aoss.aeri.F2_CXS.l00:aoss.aeri.F2_UVS.l00:' +
'aoss.aeri.R_SCR.l00:aoss.aeri.E_SCR.l00:aoss.aeri.Y_SCR.l00'), begin='-2') }}" >
<!-- 'aoss.aeri.B1_CXS.l00:aoss.aeri.B1_UVS.l00:aoss.aeri.B2_CXS.l00:aoss.aeri.B2_UVS.l00:aoss.aeri.C1_RNC.l00:aoss.aeri.C2_RNC.l00:' +
aoss.aeri.F1_CSV.l00:aoss.aeri.F1_CXS.l00:aoss.aeri.F1_UVS.l00:aoss.aeri.F2_CSV.l00:aoss.aeri.F2_CXS.l00:aoss.aeri.F2_UVS.l00' +
'aoss.aeri.R_SCR.l00:aoss.aeri.E_SCR.l00:aoss.aeri.Y_SCR.l00'), begin='-2') }} "> -->
{{ url_for('getFiles', fmt='csv', streams=('aoss.aeri.PAR.l00:aoss.aeri.QC.l00:aoss.aeri.SUM.l00:' +
{{ url_for('get_files', fmt='csv', streams=('aoss.aeri.PAR.l00:aoss.aeri.QC.l00:aoss.aeri.SUM.l00:' +
'aoss.aeri.B1_CXS.l00:aoss.aeri.B1_UVS.l00:aoss.aeri.B2_CXS.l00:aoss.aeri.B2_UVS.l00:aoss.aeri.C1_RNC.l00:aoss.aeri.C2_RNC.l00:' +
'aoss.aeri.F1_CSV.l00:aoss.aeri.F1_CXS.l00:aoss.aeri.F1_UVS.l00:aoss.aeri.F2_CSV.l00:aoss.aeri.F2_CXS.l00:aoss.aeri.F2_UVS.l00' +
'aoss.aeri.R_SCR.l00:aoss.aeri.E_SCR.l00:aoss.aeri.Y_SCR.l00'
......@@ -482,16 +482,16 @@
<li style='font-size: 15px'>
AOSS AERI Par, QC, and B1.UVS files for 2016-07-11
<ul>
<a href="{{ url_for('getFiles', fmt='csv', streams='aoss.aeri.PAR.l00:aoss.aeri.QC.l00:aoss.aeri.B1_UVS.l00', begin='2016-07-11', end='2016-07-11') }}">
{{ url_for('getFiles', fmt='csv', streams='aoss.aeri.PAR.l00:aoss.aeri.QC.l00:aoss.aeri.B1_UVS.l00', begin='2016-07-11', end='2016-07-11') | replace('%3A', ':') }}
<a href="{{ url_for('get_files', fmt='csv', streams='aoss.aeri.PAR.l00:aoss.aeri.QC.l00:aoss.aeri.B1_UVS.l00', begin='2016-07-11', end='2016-07-11') }}">
{{ url_for('get_files', fmt='csv', streams='aoss.aeri.PAR.l00:aoss.aeri.QC.l00:aoss.aeri.B1_UVS.l00', begin='2016-07-11', end='2016-07-11') | replace('%3A', ':') }}
</a>
</ul>
</li>
<li style='font-size: 15px'>
AOSS AERI Par and AOSS Tower ASCII files for 2016-07-11
<ul>
<a href="{{ url_for('getFiles', fmt='csv', streams='aoss.aeri.PAR.l00:aoss.tower.ascii.l00', begin='2016-07-11', end='2016-07-11') }}">
{{ url_for('getFiles', fmt='csv', streams='aoss.aeri.PAR.l00:aoss.tower.ascii.l00', begin='2016-07-11', end='2016-07-11') | replace('%3A', ':') }}
<a href="{{ url_for('get_files', fmt='csv', streams='aoss.aeri.PAR.l00:aoss.tower.ascii.l00', begin='2016-07-11', end='2016-07-11') }}">
{{ url_for('get_files', fmt='csv', streams='aoss.aeri.PAR.l00:aoss.tower.ascii.l00', begin='2016-07-11', end='2016-07-11') | replace('%3A', ':') }}
</a>
</ul>
</li>
......
......@@ -51,9 +51,23 @@ class TestFilesAPI(unittest.TestCase):
res = self.app.get('/api/files')
assert b'File Request Application' in res.data
def test_api_tower_daily_nc(self):
def test_api_tower_daily_ascii_csv(self):
res = self.app.get('/api/files.csv?streams=aoss.tower.ascii.l00')
print(res.data)
fn = bytes(self._datetimes[0].strftime('rig_tower.%Y-%m-%d.ascii'), encoding='utf-8')
assert fn in res.data
def test_api_tower_daily_ascii_json(self):
res = self.app.get('/api/files.json?streams=aoss.tower.ascii.l00')
fn = bytes(self._datetimes[0].strftime('rig_tower.%Y-%m-%d.ascii'), encoding='utf-8')
assert fn in res.data
def test_api_tower_daily_ascii_sh(self):
res = self.app.get('/api/files.sh?streams=aoss.tower.ascii.l00')
fn = bytes(self._datetimes[0].strftime('rig_tower.%Y-%m-%d.ascii'), encoding='utf-8')
assert fn in res.data
def test_api_tower_daily_ascii_bat(self):
res = self.app.get('/api/files.bat?streams=aoss.tower.ascii.l00')
fn = bytes(self._datetimes[0].strftime('rig_tower.%Y-%m-%d.ascii'), encoding='utf-8')
assert fn in res.data
......
......@@ -20,16 +20,19 @@ ARCHIVE_INFO = {
'ascii': {
'frequency': ProductFrequency.DAILY_FILE,
'pattern': 'rig_tower.%Y-%m-%d.ascii',
'versions': ('version_00',),
},
},
'level_b1': {
'nc-1mo-1d': {
'frequency': ProductFrequency.MONTHLY_DIR,
'pattern': 'aoss_tower.%Y-%m.nc',
'versions': ('version_00',),
},
'nc-1d-1m': {
'frequency': ProductFrequency.DAILY_DIR,
'pattern': 'aoss_tower.%Y-%m-%d.nc',
'versions': ('version_00',),
},
},
},
......@@ -38,26 +41,32 @@ ARCHIVE_INFO = {
'par': {
'frequency': ProductFrequency.DAILY_DIR,
'pattern': '%y%m%d.par',
'versions': ('version_00',),
},
'qc': {
'frequency': ProductFrequency.DAILY_DIR,
'pattern': '%y%m%d.qc',
'versions': ('version_00',),
},
'sum': {
'frequency': ProductFrequency.DAILY_DIR,
'pattern': '%y%m%d.sum',
'versions': ('version_00',),
},
'scr-aesitter': {
'frequency': ProductFrequency.DAILY_DIR,
'pattern': 'AESITTER.SCR',
'versions': ('version_00',),
},
'scr-radiance': {
'frequency': ProductFrequency.DAILY_DIR,
'pattern': 'RADIANCE.SCR',
'versions': ('version_00',),
},
'scr-summary': {
'frequency': ProductFrequency.DAILY_DIR,
'pattern': 'SUMMARY.SCR',
'versions': ('version_00',),
},
},
},
......@@ -83,6 +92,7 @@ for file_suffix in ('B1.CXS',
nfo = {
'frequency': ProductFrequency.DAILY_DIR,
'pattern': '%y%m%d{}'.format(stream_pat),
'versions': ('version_00',),
}
ARCHIVE_INFO['aoss']['aeri']['level_00'][stream_pat] = nfo
......@@ -120,4 +130,5 @@ ERROR_MESSAGES = {
'missing_site': (400, 'missing or unknown site parameter'),
'missing_level': (400, 'missing or unknown level parameter'),
'missing_stream_pattern': (400, 'missing or unknown stream pattern parameter'),
'missing_version': (400, 'missing or unknown version parameter'),
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment