Skip to content
Snippets Groups Projects
Commit 2f77b3ca authored by Alan De Smet's avatar Alan De Smet
Browse files

Use external curl instead urllib.request

See #13 for details (Also tracking ticket at https://gitlab.ssec.wisc.edu/cspp_geo/cspp-geo-aitf/-/issues/402 )

A few tests had to change because error output no longer include "File
not found" in the output.
parent c2b13332
No related branches found
No related tags found
No related merge requests found
......@@ -25,12 +25,13 @@ import os
from os import getenv
import time
from time import sleep
from shutil import copyfileobj
import urllib.request
import urllib.error
import subprocess
import pathlib
import contextlib
import fcntl
import errno
import re
from csppfetch.exclusivelockfile import AtomicCreateIfMissing
import csppfetch.daterange
......@@ -380,7 +381,105 @@ def download_already_present(state, dst, download_stats, i_own_file):
download_stats.download_already_present(state, dst)
touch(dst, i_own_file)
HUGE_TIMEOUT=60*60*24*30 # 30 days in seconds
def extract_curl_http_error_code(stderr_msg):
m = re.search(r'requested URL returned error: (\d+) *(.*)', stderr_msg);
if m is None: return 0, "Unable to parse curl output for error code"
return int(m.group(1)), m.group(2)
def describe_curl_exit_code(code):
CODES = {
1: 'Unsupported protocol',
2: 'Failed to initialize',
3: 'URL malformed',
4: 'Required feature or option not present in build',
5: 'Could not resolve proxy',
6: 'Could not resolve host',
7: 'Failed to connect to host',
8: 'Weird server reply',
9: 'FTP access denied',
10: 'FTP accept failed',
11: 'FTP weird PASS reply',
12: 'FTP timeout expired',
13: 'FTP weird PASV reply',
14: 'FTP weird 227 format',
15: 'FTP cannot use host',
16: 'HTTP/2 error',
17: 'FTP could not set binary',
18: 'Partial file',
19: 'FTP could not download/access file; RETR or similar failed',
21: 'FTP quote error',
22: 'HTTP page not retrieved',
23: 'Write error',
25: 'FTP could not STOR file',
26: 'Read error',
27: 'Out of memory',
28: 'Operation timeout',
30: 'FTP PORT failed',
31: 'FTP could not use REST',
33: 'HTTP range error',
34: 'HTTP post error',
35: 'SSL connect error',
36: 'Bad download resume',
37: 'FILE could not read file',
38: 'LDAP cannot bind',
39: 'LDAP search failed',
41: 'Function not found',
42: 'Aborted by callback',
43: 'Internal error',
45: 'Interface error',
47: 'Too many redirects',
48: 'Unknown option specified to libcurl',
49: 'Malformed telnet option',
51: "Peer's SSL certificate or SSH MD5 fingerprint was not OK",
52: 'The server did not reply anything',
53: 'SSL crypto engine not found',
54: 'Cannot set SSL crypto engine as default',
55: 'Failed sending network data',
56: 'Failure in receiving network data',
58: 'Problem with the local certificate',
59: 'Could not use specified SSL cipher',
60: 'Peer certificate cannot be authenticated with known CA certificates',
61: 'Unrecognized transfer encoding',
62: 'Invalid LDAP URL',
63: 'Maximum file size exceeded',
64: 'Requested FTP SSL level failed',
65: 'Sending the data requires a rewind that failed',
66: 'Failed to initialise SSL Engine',
67: 'The user name, password, or similar was not accepted',
68: 'File not found on TFTP server',
69: 'Permission problem on TFTP server',
70: 'Out of disk space on TFTP server',
71: 'Illegal TFTP operation',
72: 'Unknown TFTP transfer ID',
73: 'File already exists (TFTP)',
74: 'No such user (TFTP)',
75: 'Character conversion failed',
76: 'Character conversion functions required',
77: 'Problem reading the SSL CA cert',
78: 'The resource referenced in the URL does not exist',
79: 'An unspecified error occurred during the SSH session',
80: 'Failed to shut down the SSL connection',
82: 'Could not load CRL file, missing or wrong format',
83: 'Issuer check failed',
84: 'The FTP PRET command failed',
85: 'Mismatch of RTSP CSeq numbers',
86: 'Mismatch of RTSP Session Identifiers',
87: 'Unable to parse FTP file list',
88: 'FTP chunk callback reported error',
89: 'No connection available, the session will be queued',
90: 'SSL public key does not matched pinned public key',
91: 'Invalid SSL certificate status',
92: 'Stream error in HTTP/2 framing layer',
93: 'An API function was called from inside a callback',
94: 'An authentication function returned an error',
95: 'A problem was detected in the HTTP/3 layer',
96: 'QUIC connection error',
}
if code in CODES: return CODES[code]
return f"Unknown exit code {code}"
HUGE_TIMEOUT=60*60*24*7 # 7 days in seconds
# This timeout must be less than 2^31/1000 due to limits in Python
def download_to_file_no_lock(url, dst, fileobj, timeout=HUGE_TIMEOUT):
""" Download file from url to fileobj
......@@ -396,14 +495,43 @@ def download_to_file_no_lock(url, dst, fileobj, timeout=HUGE_TIMEOUT):
... download_to_file_no_lock("http://www.ssec.wisc.edu/", dst, f)
... os.path.getsize(dst) > 0
True
For backward compatibility, errors are raised as urllib.error.* errors.
"""
more = ""
if hasattr(fileobj, 'name'):
more = f"(using temporary file {fileobj.name})"
logging.debug(f"Downloading {url} to {dst} {more}")
with urllib.request.urlopen(url, timeout=timeout) as req:
copyfileobj(req,fileobj)
args = [
"curl",
"--location", # Follow redirects
"--fail", # Exit non-zero on HTTP errors (4xx or 5xx)
url
]
try:
ret = subprocess.run(args, stdout=fileobj, stderr=subprocess.PIPE, timeout=timeout)
except subprocess.TimeoutExpired:
raise urllib.error.URLError(f"curl timeout expired ({timeout} seconds)")
if ret.returncode == 0:
return
curl_reason = describe_curl_exit_code(ret.returncode)
logging.info(f"Failed to download {url}, curl returned {ret.returncode} {curl_reason}. curl output is:")
stderr = ret.stderr.decode('utf-8')
for line in stderr.rstrip().split("\n"):
logging.info(" "+line)
if ret.returncode == 22:
# We got an HTTP code of 4xx or 5xx (or hypothetically higher)
code, msg = extract_curl_http_error_code(stderr)
raise urllib.error.HTTPError(url, code, msg, None, None)
raise urllib.error.URLError(f"curl failed, returning {ret.returncode} {curl_reason}; see log for details")
class DownloadsDisabledException(Exception):
pass
......
......@@ -299,13 +299,13 @@ class Test_aitf_data_for_run (TestCase):
Took 0:00:00.0.*
Average download speed of .*/second.
Warning: Unable to download any file set, reasons include:
Warning: Failed to download file set Global Forecast System for 2023-02-09 00:00:00 forecast 3 and 6 hours into the future: HTTP Error 404: File not found
Warning: Failed to download file set Global Forecast System for 2023-02-08 18:00:00 forecast 9 and 12 hours into the future: HTTP Error 404: File not found
Warning: Failed to download file set Global Forecast System for 2023-02-09 00:00:00 forecast 3 and 6 hours into the future: HTTP Error 404: File not found
Warning: Failed to download file set Global Forecast System for 2023-02-08 18:00:00 forecast 9 and 12 hours into the future: HTTP Error 404: File not found
Warning: Failed to download file set Global Forecast System for 2023-02-09 00:00:00 forecast 3 and 6 hours into the future: HTTP Error 404: File not found
Warning: Failed to download file set Global Forecast System for 2023-02-08 18:00:00 forecast 9 and 12 hours into the future: HTTP Error 404: File not found
CRITICAL ERROR: Errors occurred during download, including Failed to download file set Global Forecast System for 2023-02-09 00:00:00 forecast 3 and 6 hours into the future: HTTP Error 404: File not found
Warning: Failed to download file set Global Forecast System for 2023-02-09 00:00:00 forecast 3 and 6 hours into the future: HTTP Error 404:
Warning: Failed to download file set Global Forecast System for 2023-02-08 18:00:00 forecast 9 and 12 hours into the future: HTTP Error 404:
Warning: Failed to download file set Global Forecast System for 2023-02-09 00:00:00 forecast 3 and 6 hours into the future: HTTP Error 404:
Warning: Failed to download file set Global Forecast System for 2023-02-08 18:00:00 forecast 9 and 12 hours into the future: HTTP Error 404:
Warning: Failed to download file set Global Forecast System for 2023-02-09 00:00:00 forecast 3 and 6 hours into the future: HTTP Error 404:
Warning: Failed to download file set Global Forecast System for 2023-02-08 18:00:00 forecast 9 and 12 hours into the future: HTTP Error 404:
CRITICAL ERROR: Errors occurred during download, including Failed to download file set Global Forecast System for 2023-02-09 00:00:00 forecast 3 and 6 hours into the future: HTTP Error 404:
""")
self.assertDownloadCommand(
......@@ -324,7 +324,7 @@ class Test_aitf_data_for_run (TestCase):
def test_missing_SST(self):
expected_stdout = dedent("""\
Warning: Unable to download any file set, reasons include:
((Warning: Failed to download file set AVHRR Sea Surface Temperature for) 2023-02-09 at priority "" \\(empty string\\): (HTTP Error 404: File not found)
((Warning: Failed to download file set AVHRR Sea Surface Temperature for) 2023-02-09 at priority "" \\(empty string\\): (HTTP Error 404: )
\\2 2023-02-09 at priority "_preliminary": \\3
\\2 2023-02-08 at priority "" \\(empty string\\): \\3
\\2 2023-02-08 at priority "_preliminary": \\3
......@@ -340,7 +340,7 @@ class Test_aitf_data_for_run (TestCase):
\\2 2023-02-03 at priority "_preliminary": \\3)
\\1
\\1
CRITICAL ERROR: Errors occurred during download, including Failed to download file set AVHRR Sea Surface Temperature for 2023-02-09 at priority "" \\(empty string\\): HTTP Error 404: File not found
CRITICAL ERROR: Errors occurred during download, including Failed to download file set AVHRR Sea Surface Temperature for 2023-02-09 at priority "" \\(empty string\\): HTTP Error 404:
""")
self.assertDownloadCommand(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment