diff options
Diffstat (limited to 'hypervideo_dl/downloader/http.py')
-rw-r--r-- | hypervideo_dl/downloader/http.py | 130 |
1 files changed, 64 insertions, 66 deletions
diff --git a/hypervideo_dl/downloader/http.py b/hypervideo_dl/downloader/http.py index 2e95bb9..591a9b0 100644 --- a/hypervideo_dl/downloader/http.py +++ b/hypervideo_dl/downloader/http.py @@ -1,29 +1,30 @@ from __future__ import unicode_literals -import errno import os -import socket +import ssl import time import random -import re from .common import FileDownloader from ..compat import ( - compat_str, compat_urllib_error, + compat_http_client ) from ..utils import ( ContentTooShortError, encodeFilename, int_or_none, - sanitize_open, + parse_http_range, sanitized_Request, ThrottledDownload, + try_call, write_xattr, XAttrMetadataError, XAttrUnavailableError, ) +RESPONSE_READ_EXCEPTIONS = (TimeoutError, ConnectionError, ssl.SSLError, compat_http_client.HTTPException) + class HttpFD(FileDownloader): def real_download(self, filename, info_dict): @@ -54,11 +55,11 @@ class HttpFD(FileDownloader): ctx.open_mode = 'wb' ctx.resume_len = 0 - ctx.data_len = None ctx.block_size = self.params.get('buffersize', 1024) ctx.start_time = time.time() - ctx.chunk_size = None - throttle_start = None + + # parse given Range + req_start, req_end, _ = parse_http_range(headers.get('Range')) if self.params.get('continuedl', True): # Establish possible resume length @@ -81,43 +82,50 @@ class HttpFD(FileDownloader): class NextFragment(Exception): pass - def set_range(req, start, end): - range_header = 'bytes=%d-' % start - if end: - range_header += compat_str(end) - req.add_header('Range', range_header) - def establish_connection(): ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size) if not is_test and chunk_size else chunk_size) if ctx.resume_len > 0: range_start = ctx.resume_len + if req_start is not None: + # offset the beginning of Range to be within request + range_start += req_start if ctx.is_resume: self.report_resuming_byte(ctx.resume_len) ctx.open_mode = 'ab' + elif req_start is not None: + range_start = req_start elif ctx.chunk_size > 0: range_start = 0 else: range_start = None ctx.is_resume = False - range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None - if range_end and ctx.data_len is not None and range_end >= ctx.data_len: - range_end = ctx.data_len - 1 - has_range = range_start is not None - ctx.has_range = has_range + + if ctx.chunk_size: + chunk_aware_end = range_start + ctx.chunk_size - 1 + # we're not allowed to download outside Range + range_end = chunk_aware_end if req_end is None else min(chunk_aware_end, req_end) + elif req_end is not None: + # there's no need for chunked downloads, so download until the end of Range + range_end = req_end + else: + range_end = None + + if try_call(lambda: range_start > range_end): + ctx.resume_len = 0 + ctx.open_mode = 'wb' + raise RetryDownload(Exception(f'Conflicting range. (start={range_start} > end={range_end})')) + + if try_call(lambda: range_end >= ctx.content_len): + range_end = ctx.content_len - 1 + request = sanitized_Request(url, request_data, headers) + has_range = range_start is not None if has_range: - set_range(request, range_start, range_end) + request.add_header('Range', f'bytes={int(range_start)}-{int_or_none(range_end) or ""}') # Establish connection try: - try: - ctx.data = self.ydl.urlopen(request) - except (compat_urllib_error.URLError, ) as err: - # reason may not be available, e.g. for urllib2.HTTPError on python 2.6 - reason = getattr(err, 'reason', None) - if isinstance(reason, socket.timeout): - raise RetryDownload(err) - raise err + ctx.data = self.ydl.urlopen(request) # When trying to resume, Content-Range HTTP header of response has to be checked # to match the value of requested Range HTTP header. This is due to a webservers # that don't support resuming and serve a whole file with no Content-Range @@ -125,31 +133,27 @@ class HttpFD(FileDownloader): # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799) if has_range: content_range = ctx.data.headers.get('Content-Range') - if content_range: - content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range) + content_range_start, content_range_end, content_len = parse_http_range(content_range) + if content_range_start is not None and range_start == content_range_start: # Content-Range is present and matches requested Range, resume is possible - if content_range_m: - if range_start == int(content_range_m.group(1)): - content_range_end = int_or_none(content_range_m.group(2)) - content_len = int_or_none(content_range_m.group(3)) - accept_content_len = ( - # Non-chunked download - not ctx.chunk_size - # Chunked download and requested piece or - # its part is promised to be served - or content_range_end == range_end - or content_len < range_end) - if accept_content_len: - ctx.data_len = content_len - return + accept_content_len = ( + # Non-chunked download + not ctx.chunk_size + # Chunked download and requested piece or + # its part is promised to be served + or content_range_end == range_end + or content_len < range_end) + if accept_content_len: + ctx.content_len = content_len + ctx.data_len = min(content_len, req_end or content_len) - (req_start or 0) + return # Content-Range is either not present or invalid. Assuming remote webserver is # trying to send the whole file, resume is not possible, so wiping the local file # and performing entire redownload self.report_unable_to_resume() ctx.resume_len = 0 ctx.open_mode = 'wb' - ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None)) - return + ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None)) except (compat_urllib_error.HTTPError, ) as err: if err.code == 416: # Unable to resume (requested range not satisfiable) @@ -191,14 +195,16 @@ class HttpFD(FileDownloader): # Unexpected HTTP error raise raise RetryDownload(err) - except socket.error as err: - if err.errno != errno.ECONNRESET: - # Connection reset is no problem, just retry + except compat_urllib_error.URLError as err: + if isinstance(err.reason, ssl.CertificateError): raise raise RetryDownload(err) + # In urllib.request.AbstractHTTPHandler, the response is partially read on request. + # Any errors that occur during this will not be wrapped by URLError + except RESPONSE_READ_EXCEPTIONS as err: + raise RetryDownload(err) def download(): - nonlocal throttle_start data_len = ctx.data.info().get('Content-length', None) # Range HTTP header may be ignored/unsupported by a webserver @@ -241,16 +247,8 @@ class HttpFD(FileDownloader): try: # Download and write data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) - # socket.timeout is a subclass of socket.error but may not have - # errno set - except socket.timeout as e: - retry(e) - except socket.error as e: - # SSLError on python 2 (inherits socket.error) may have - # no errno set but this error message - if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out': - retry(e) - raise + except RESPONSE_READ_EXCEPTIONS as err: + retry(err) byte_counter += len(data_block) @@ -261,7 +259,7 @@ class HttpFD(FileDownloader): # Open destination file just in time if ctx.stream is None: try: - ctx.stream, ctx.tmpfilename = sanitize_open( + ctx.stream, ctx.tmpfilename = self.sanitize_open( ctx.tmpfilename, ctx.open_mode) assert ctx.stream is not None ctx.filename = self.undo_temp_name(ctx.tmpfilename) @@ -321,16 +319,16 @@ class HttpFD(FileDownloader): if speed and speed < (self.params.get('throttledratelimit') or 0): # The speed must stay below the limit for 3 seconds # This prevents raising error when the speed temporarily goes down - if throttle_start is None: - throttle_start = now - elif now - throttle_start > 3: + if ctx.throttle_start is None: + ctx.throttle_start = now + elif now - ctx.throttle_start > 3: if ctx.stream is not None and ctx.tmpfilename != '-': ctx.stream.close() raise ThrottledDownload() elif speed: - throttle_start = None + ctx.throttle_start = None - if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len: + if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len: ctx.resume_len = byte_counter # ctx.block_size = block_size raise NextFragment() |