diff options
author | Jesús <heckyel@hyperbola.info> | 2022-04-06 03:37:17 +0800 |
---|---|---|
committer | Jesús <heckyel@hyperbola.info> | 2022-04-06 03:37:17 +0800 |
commit | 1e5a50b71d8f0eae6007bedc329eecb24bb5aba3 (patch) | |
tree | a8611cda6596391cb6fb645e1469dcd356b63924 /hypervideo_dl/downloader | |
parent | f52fb3bceeb9d22b5106c1796fecec474a0cc138 (diff) | |
download | hypervideo-1e5a50b71d8f0eae6007bedc329eecb24bb5aba3.tar.lz hypervideo-1e5a50b71d8f0eae6007bedc329eecb24bb5aba3.tar.xz hypervideo-1e5a50b71d8f0eae6007bedc329eecb24bb5aba3.zip |
update from upstream
Diffstat (limited to 'hypervideo_dl/downloader')
-rw-r--r-- | hypervideo_dl/downloader/__init__.py | 17 | ||||
-rw-r--r-- | hypervideo_dl/downloader/common.py | 99 | ||||
-rw-r--r-- | hypervideo_dl/downloader/dash.py | 68 | ||||
-rw-r--r-- | hypervideo_dl/downloader/external.py | 102 | ||||
-rw-r--r-- | hypervideo_dl/downloader/f4m.py | 2 | ||||
-rw-r--r-- | hypervideo_dl/downloader/fc2.py | 41 | ||||
-rw-r--r-- | hypervideo_dl/downloader/fragment.py | 171 | ||||
-rw-r--r-- | hypervideo_dl/downloader/hls.py | 9 | ||||
-rw-r--r-- | hypervideo_dl/downloader/http.py | 130 | ||||
-rw-r--r-- | hypervideo_dl/downloader/ism.py | 4 | ||||
-rw-r--r-- | hypervideo_dl/downloader/mhtml.py | 13 | ||||
-rw-r--r-- | hypervideo_dl/downloader/rtmp.py | 3 | ||||
-rw-r--r-- | hypervideo_dl/downloader/websocket.py | 7 | ||||
-rw-r--r-- | hypervideo_dl/downloader/youtube_live_chat.py | 9 |
14 files changed, 447 insertions, 228 deletions
diff --git a/hypervideo_dl/downloader/__init__.py b/hypervideo_dl/downloader/__init__.py index 2449c74..96d484d 100644 --- a/hypervideo_dl/downloader/__init__.py +++ b/hypervideo_dl/downloader/__init__.py @@ -12,10 +12,15 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N info_copy = info_dict.copy() info_copy['to_stdout'] = to_stdout - downloaders = [_get_suitable_downloader(info_copy, proto, params, default) - for proto in (protocol or info_copy['protocol']).split('+')] + protocols = (protocol or info_copy['protocol']).split('+') + downloaders = [_get_suitable_downloader(info_copy, proto, params, default) for proto in protocols] + if set(downloaders) == {FFmpegFD} and FFmpegFD.can_merge_formats(info_copy, params): return FFmpegFD + elif (set(downloaders) == {DashSegmentsFD} + and not (to_stdout and len(protocols) > 1) + and set(protocols) == {'http_dash_segments_generator'}): + return DashSegmentsFD elif len(downloaders) == 1: return downloaders[0] return None @@ -25,6 +30,7 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N from .common import FileDownloader from .dash import DashSegmentsFD from .f4m import F4mFD +from .fc2 import FC2LiveFD from .hls import HlsFD from .http import HttpFD from .rtmp import RtmpFD @@ -41,6 +47,7 @@ from .external import ( PROTOCOL_MAP = { 'rtmp': RtmpFD, + 'rtmpe': RtmpFD, 'rtmp_ffmpeg': FFmpegFD, 'm3u8_native': HlsFD, 'm3u8': FFmpegFD, @@ -48,9 +55,11 @@ PROTOCOL_MAP = { 'rtsp': RtspFD, 'f4m': F4mFD, 'http_dash_segments': DashSegmentsFD, + 'http_dash_segments_generator': DashSegmentsFD, 'ism': IsmFD, 'mhtml': MhtmlFD, 'niconico_dmc': NiconicoDmcFD, + 'fc2_live': FC2LiveFD, 'websocket_frag': WebSocketFragmentFD, 'youtube_live_chat': YoutubeLiveChatFD, 'youtube_live_chat_replay': YoutubeLiveChatFD, @@ -62,6 +71,7 @@ def shorten_protocol_name(proto, simplify=False): 'm3u8_native': 'm3u8_n', 'rtmp_ffmpeg': 'rtmp_f', 'http_dash_segments': 'dash', + 'http_dash_segments_generator': 'dash_g', 'niconico_dmc': 'dmc', 'websocket_frag': 'WSfrag', } @@ -70,6 +80,7 @@ def shorten_protocol_name(proto, simplify=False): 'https': 'http', 'ftps': 'ftp', 'm3u8_native': 'm3u8', + 'http_dash_segments_generator': 'dash', 'rtmp_ffmpeg': 'rtmp', 'm3u8_frag_urls': 'm3u8', 'dash_frag_urls': 'dash', @@ -108,7 +119,7 @@ def _get_suitable_downloader(info_dict, protocol, params, default): return FFmpegFD elif (external_downloader or '').lower() == 'native': return HlsFD - elif get_suitable_downloader( + elif protocol == 'm3u8_native' and get_suitable_downloader( info_dict, params, None, protocol='m3u8_frag_urls', to_stdout=info_dict['to_stdout']): return HlsFD elif params.get('hls_prefer_native') is True: diff --git a/hypervideo_dl/downloader/common.py b/hypervideo_dl/downloader/common.py index 27ca2cd..7cef3e8 100644 --- a/hypervideo_dl/downloader/common.py +++ b/hypervideo_dl/downloader/common.py @@ -4,14 +4,17 @@ import os import re import time import random +import errno from ..utils import ( decodeArgument, encodeFilename, error_to_compat_str, format_bytes, + sanitize_open, shell_quote, timeconvert, + timetuple_from_msec, ) from ..minicurses import ( MultilineLogger, @@ -38,6 +41,7 @@ class FileDownloader(object): ratelimit: Download speed limit, in bytes/sec. throttledratelimit: Assume the download is being throttled below this speed (bytes/sec) retries: Number of times to retry for HTTP error 5xx + file_access_retries: Number of times to retry on file access error buffersize: Size of download buffer in bytes. noresizebuffer: Do not automatically resize the download buffer. continuedl: Try to continue downloads if possible. @@ -75,14 +79,12 @@ class FileDownloader(object): @staticmethod def format_seconds(seconds): - (mins, secs) = divmod(seconds, 60) - (hours, mins) = divmod(mins, 60) - if hours > 99: + time = timetuple_from_msec(seconds * 1000) + if time.hours > 99: return '--:--:--' - if hours == 0: - return '%02d:%02d' % (mins, secs) - else: - return '%02d:%02d:%02d' % (hours, mins, secs) + if not time.hours: + return '%02d:%02d' % time[1:-1] + return '%02d:%02d:%02d' % time[:-1] @staticmethod def calc_percent(byte_counter, data_len): @@ -94,6 +96,8 @@ class FileDownloader(object): def format_percent(percent): if percent is None: return '---.-%' + elif percent == 100: + return '100%' return '%6s' % ('%3.1f%%' % percent) @staticmethod @@ -155,7 +159,7 @@ class FileDownloader(object): return int(round(number * multiplier)) def to_screen(self, *args, **kargs): - self.ydl.to_stdout(*args, quiet=self.params.get('quiet'), **kargs) + self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs) def to_stderr(self, message): self.ydl.to_stderr(message) @@ -206,13 +210,41 @@ class FileDownloader(object): def ytdl_filename(self, filename): return filename + '.ytdl' + def wrap_file_access(action, *, fatal=False): + def outer(func): + def inner(self, *args, **kwargs): + file_access_retries = self.params.get('file_access_retries', 0) + retry = 0 + while True: + try: + return func(self, *args, **kwargs) + except (IOError, OSError) as err: + retry = retry + 1 + if retry > file_access_retries or err.errno not in (errno.EACCES, errno.EINVAL): + if not fatal: + self.report_error(f'unable to {action} file: {err}') + return + raise + self.to_screen( + f'[download] Unable to {action} file due to file access error. ' + f'Retrying (attempt {retry} of {self.format_retries(file_access_retries)}) ...') + time.sleep(0.01) + return inner + return outer + + @wrap_file_access('open', fatal=True) + def sanitize_open(self, filename, open_mode): + return sanitize_open(filename, open_mode) + + @wrap_file_access('remove') + def try_remove(self, filename): + os.remove(filename) + + @wrap_file_access('rename') def try_rename(self, old_filename, new_filename): if old_filename == new_filename: return - try: - os.replace(old_filename, new_filename) - except (IOError, OSError) as err: - self.report_error(f'unable to rename file: {err}') + os.replace(old_filename, new_filename) def try_utime(self, filename, last_modified_hdr): """Try to set the last-modified time of the given file.""" @@ -245,14 +277,32 @@ class FileDownloader(object): elif self.ydl.params.get('logger'): self._multiline = MultilineLogger(self.ydl.params['logger'], lines) elif self.params.get('progress_with_newline'): - self._multiline = BreaklineStatusPrinter(self.ydl._screen_file, lines) + self._multiline = BreaklineStatusPrinter(self.ydl._out_files['screen'], lines) else: - self._multiline = MultilinePrinter(self.ydl._screen_file, lines, not self.params.get('quiet')) + self._multiline = MultilinePrinter(self.ydl._out_files['screen'], lines, not self.params.get('quiet')) + self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color') def _finish_multiline_status(self): self._multiline.end() - def _report_progress_status(self, s): + _progress_styles = { + 'downloaded_bytes': 'light blue', + 'percent': 'light blue', + 'eta': 'yellow', + 'speed': 'green', + 'elapsed': 'bold white', + 'total_bytes': '', + 'total_bytes_estimate': '', + } + + def _report_progress_status(self, s, default_template): + for name, style in self._progress_styles.items(): + name = f'_{name}_str' + if name not in s: + continue + s[name] = self._format_progress(s[name], style) + s['_default_template'] = default_template % s + progress_dict = s.copy() progress_dict.pop('info_dict') progress_dict = {'info': s['info_dict'], 'progress': progress_dict} @@ -265,6 +315,10 @@ class FileDownloader(object): progress_template.get('download-title') or 'hypervideo %(progress._default_template)s', progress_dict)) + def _format_progress(self, *args, **kwargs): + return self.ydl._format_text( + self._multiline.stream, self._multiline.allow_colors, *args, **kwargs) + def report_progress(self, s): if s['status'] == 'finished': if self.params.get('noprogress'): @@ -277,8 +331,7 @@ class FileDownloader(object): s['_elapsed_str'] = self.format_seconds(s['elapsed']) msg_template += ' in %(_elapsed_str)s' s['_percent_str'] = self.format_percent(100) - s['_default_template'] = msg_template % s - self._report_progress_status(s) + self._report_progress_status(s, msg_template) return if s['status'] != 'downloading': @@ -287,7 +340,7 @@ class FileDownloader(object): if s.get('eta') is not None: s['_eta_str'] = self.format_eta(s['eta']) else: - s['_eta_str'] = 'Unknown ETA' + s['_eta_str'] = 'Unknown' if s.get('total_bytes') and s.get('downloaded_bytes') is not None: s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes']) @@ -319,9 +372,12 @@ class FileDownloader(object): else: msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' else: - msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' - s['_default_template'] = msg_template % s - self._report_progress_status(s) + msg_template = '%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s' + if s.get('fragment_index') and s.get('fragment_count'): + msg_template += ' (frag %(fragment_index)s/%(fragment_count)s)' + elif s.get('fragment_index'): + msg_template += ' (frag %(fragment_index)s)' + self._report_progress_status(s, msg_template) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" @@ -372,6 +428,7 @@ class FileDownloader(object): 'status': 'finished', 'total_bytes': os.path.getsize(encodeFilename(filename)), }, info_dict) + self._finish_multiline_status() return True, False if subtitle is False: diff --git a/hypervideo_dl/downloader/dash.py b/hypervideo_dl/downloader/dash.py index 6444ad6..a845ee7 100644 --- a/hypervideo_dl/downloader/dash.py +++ b/hypervideo_dl/downloader/dash.py @@ -1,4 +1,5 @@ from __future__ import unicode_literals +import time from ..downloader import get_suitable_downloader from .fragment import FragmentFD @@ -15,27 +16,53 @@ class DashSegmentsFD(FragmentFD): FD_NAME = 'dashsegments' def real_download(self, filename, info_dict): - if info_dict.get('is_live'): + if info_dict.get('is_live') and set(info_dict['protocol'].split('+')) != {'http_dash_segments_generator'}: self.report_error('Live DASH videos are not supported') - fragment_base_url = info_dict.get('fragment_base_url') - fragments = info_dict['fragments'][:1] if self.params.get( - 'test', False) else info_dict['fragments'] - + real_start = time.time() real_downloader = get_suitable_downloader( info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-')) - ctx = { - 'filename': filename, - 'total_frags': len(fragments), - } + requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])] + args = [] + for fmt in requested_formats or [info_dict]: + try: + fragment_count = 1 if self.params.get('test') else len(fmt['fragments']) + except TypeError: + fragment_count = None + ctx = { + 'filename': fmt.get('filepath') or filename, + 'live': 'is_from_start' if fmt.get('is_from_start') else fmt.get('is_live'), + 'total_frags': fragment_count, + } + + if real_downloader: + self._prepare_external_frag_download(ctx) + else: + self._prepare_and_start_frag_download(ctx, fmt) + ctx['start'] = real_start + + fragments_to_download = self._get_fragments(fmt, ctx) + + if real_downloader: + self.to_screen( + '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) + info_dict['fragments'] = list(fragments_to_download) + fd = real_downloader(self.ydl, self.params) + return fd.real_download(filename, info_dict) + + args.append([ctx, fragments_to_download, fmt]) - if real_downloader: - self._prepare_external_frag_download(ctx) - else: - self._prepare_and_start_frag_download(ctx, info_dict) + return self.download_and_append_fragments_multiple(*args) + + def _resolve_fragments(self, fragments, ctx): + fragments = fragments(ctx) if callable(fragments) else fragments + return [next(iter(fragments))] if self.params.get('test') else fragments + + def _get_fragments(self, fmt, ctx): + fragment_base_url = fmt.get('fragment_base_url') + fragments = self._resolve_fragments(fmt['fragments'], ctx) - fragments_to_download = [] frag_index = 0 for i, fragment in enumerate(fragments): frag_index += 1 @@ -46,17 +73,8 @@ class DashSegmentsFD(FragmentFD): assert fragment_base_url fragment_url = urljoin(fragment_base_url, fragment['path']) - fragments_to_download.append({ + yield { 'frag_index': frag_index, 'index': i, 'url': fragment_url, - }) - - if real_downloader: - self.to_screen( - '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) - info_dict['fragments'] = fragments_to_download - fd = real_downloader(self.ydl, self.params) - return fd.real_download(filename, info_dict) - - return self.download_and_append_fragments(ctx, fragments_to_download, info_dict) + } diff --git a/hypervideo_dl/downloader/external.py b/hypervideo_dl/downloader/external.py index 74adb05..b99dc37 100644 --- a/hypervideo_dl/downloader/external.py +++ b/hypervideo_dl/downloader/external.py @@ -13,17 +13,18 @@ from ..compat import ( ) from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS from ..utils import ( + classproperty, cli_option, cli_valueless_option, cli_bool_option, _configuration_args, + determine_ext, encodeFilename, encodeArgument, handle_youtubedl_headers, check_executable, - is_outdated_version, - process_communicate_or_kill, - sanitize_open, + Popen, + remove_end, ) @@ -73,17 +74,23 @@ class ExternalFD(FragmentFD): def get_basename(cls): return cls.__name__[:-2].lower() + @classproperty + def EXE_NAME(cls): + return cls.get_basename() + @property def exe(self): - return self.get_basename() + return self.EXE_NAME @classmethod def available(cls, path=None): - path = check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT]) - if path: - cls.exe = path - return path - return False + path = check_executable( + cls.EXE_NAME if path in (None, cls.get_basename()) else path, + [cls.AVAILABLE_OPT]) + if not path: + return False + cls.exe = path + return path @classmethod def supports(cls, info_dict): @@ -106,7 +113,7 @@ class ExternalFD(FragmentFD): def _configuration_args(self, keys=None, *args, **kwargs): return _configuration_args( - self.get_basename(), self.params.get('external_downloader_args'), self.get_basename(), + self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME, keys, *args, **kwargs) def _call_downloader(self, tmpfilename, info_dict): @@ -116,9 +123,8 @@ class ExternalFD(FragmentFD): self._debug_cmd(cmd) if 'fragments' not in info_dict: - p = subprocess.Popen( - cmd, stderr=subprocess.PIPE) - _, stderr = process_communicate_or_kill(p) + p = Popen(cmd, stderr=subprocess.PIPE) + _, stderr = p.communicate_or_kill() if p.returncode != 0: self.to_stderr(stderr.decode('utf-8', 'replace')) return p.returncode @@ -128,9 +134,8 @@ class ExternalFD(FragmentFD): count = 0 while count <= fragment_retries: - p = subprocess.Popen( - cmd, stderr=subprocess.PIPE) - _, stderr = process_communicate_or_kill(p) + p = Popen(cmd, stderr=subprocess.PIPE) + _, stderr = p.communicate_or_kill() if p.returncode == 0: break # TODO: Decide whether to retry based on error code @@ -147,23 +152,23 @@ class ExternalFD(FragmentFD): return -1 decrypt_fragment = self.decrypter(info_dict) - dest, _ = sanitize_open(tmpfilename, 'wb') + dest, _ = self.sanitize_open(tmpfilename, 'wb') for frag_index, fragment in enumerate(info_dict['fragments']): fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) try: - src, _ = sanitize_open(fragment_filename, 'rb') - except IOError: + src, _ = self.sanitize_open(fragment_filename, 'rb') + except IOError as err: if skip_unavailable_fragments and frag_index > 1: - self.to_screen('[%s] Skipping fragment %d ...' % (self.get_basename(), frag_index)) + self.report_skip_fragment(frag_index, err) continue - self.report_error('Unable to open fragment %d' % frag_index) + self.report_error(f'Unable to open fragment {frag_index}; {err}') return -1 dest.write(decrypt_fragment(fragment, src.read())) src.close() if not self.params.get('keep_fragments', False): - os.remove(encodeFilename(fragment_filename)) + self.try_remove(encodeFilename(fragment_filename)) dest.close() - os.remove(encodeFilename('%s.frag.urls' % tmpfilename)) + self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename)) return 0 @@ -171,7 +176,7 @@ class CurlFD(ExternalFD): AVAILABLE_OPT = '-V' def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '--location', '-o', tmpfilename] + cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed'] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] @@ -199,8 +204,8 @@ class CurlFD(ExternalFD): self._debug_cmd(cmd) # curl writes the progress to stderr so don't capture it. - p = subprocess.Popen(cmd) - process_communicate_or_kill(p) + p = Popen(cmd) + p.communicate_or_kill() return p.returncode @@ -221,7 +226,7 @@ class WgetFD(ExternalFD): AVAILABLE_OPT = '--version' def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] + cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto'] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] @@ -232,7 +237,10 @@ class WgetFD(ExternalFD): retry[1] = '0' cmd += retry cmd += self._option('--bind-address', 'source_address') - cmd += self._option('--proxy', 'proxy') + proxy = self.params.get('proxy') + if proxy: + for var in ('http_proxy', 'https_proxy'): + cmd += ['--execute', '%s=%s' % (var, proxy)] cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate') cmd += self._configuration_args() cmd += ['--', info_dict['url']] @@ -255,7 +263,7 @@ class Aria2cFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-c', '--console-log-level=warn', '--summary-interval=0', '--download-result=hide', - '--file-allocation=none', '-x16', '-j16', '-s16'] + '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16'] if 'fragments' in info_dict: cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true'] else: @@ -269,6 +277,7 @@ class Aria2cFD(ExternalFD): cmd += self._option('--all-proxy', 'proxy') cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=') + cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=') cmd += self._configuration_args() # aria2c strips out spaces from the beginning/end of filenames and paths. @@ -293,7 +302,7 @@ class Aria2cFD(ExternalFD): for frag_index, fragment in enumerate(info_dict['fragments']): fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index) url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename)) - stream, _ = sanitize_open(url_list_file, 'wb') + stream, _ = self.sanitize_open(url_list_file, 'wb') stream.write('\n'.join(url_list).encode('utf-8')) stream.close() cmd += ['-i', url_list_file] @@ -304,10 +313,7 @@ class Aria2cFD(ExternalFD): class HttpieFD(ExternalFD): AVAILABLE_OPT = '--version' - - @classmethod - def available(cls, path=None): - return ExternalFD.available(cls, path or 'http') + EXE_NAME = 'http' def _make_cmd(self, tmpfilename, info_dict): cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] @@ -446,8 +452,7 @@ class FFmpegFD(ExternalFD): if info_dict.get('requested_formats') or protocol == 'http_dash_segments': for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]): stream_number = fmt.get('manifest_stream_number', 0) - a_or_v = 'a' if fmt.get('acodec') != 'none' else 'v' - args.extend(['-map', f'{i}:{a_or_v}:{stream_number}']) + args.extend(['-map', f'{i}:{stream_number}']) if self.params.get('test', False): args += ['-fs', compat_str(self._TEST_FILE_SIZE)] @@ -461,12 +466,21 @@ class FFmpegFD(ExternalFD): args += ['-f', 'mpegts'] else: args += ['-f', 'mp4'] - if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): + if (ffpp.basename == 'ffmpeg' and ffpp._features.get('needs_adtstoasc')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): args += ['-bsf:a', 'aac_adtstoasc'] elif protocol == 'rtmp': args += ['-f', 'flv'] elif ext == 'mp4' and tmpfilename == '-': args += ['-f', 'mpegts'] + elif ext == 'unknown_video': + ext = determine_ext(remove_end(tmpfilename, '.part')) + if ext == 'unknown_video': + self.report_warning( + 'The video format is unknown and cannot be downloaded by ffmpeg. ' + 'Explicitly set the extension in the filename to attempt download in that format') + else: + self.report_warning(f'The video format is unknown. Trying to download as {ext} according to the filename') + args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)] else: args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)] @@ -476,7 +490,7 @@ class FFmpegFD(ExternalFD): args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) self._debug_cmd(args) - proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env) + proc = Popen(args, stdin=subprocess.PIPE, env=env) if url in ('-', 'pipe:'): self.on_process_started(proc, proc.stdin) try: @@ -488,7 +502,7 @@ class FFmpegFD(ExternalFD): # streams). Note that Windows is not affected and produces playable # files (see https://github.com/ytdl-org/youtube-dl/issues/8300). if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'): - process_communicate_or_kill(proc, b'q') + proc.communicate_or_kill(b'q') else: proc.kill() proc.wait() @@ -500,11 +514,13 @@ class AVconvFD(FFmpegFD): pass -_BY_NAME = dict( - (klass.get_basename(), klass) +_BY_NAME = { + klass.get_basename(): klass for name, klass in globals().items() if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD') -) +} + +_BY_EXE = {klass.EXE_NAME: klass for klass in _BY_NAME.values()} def list_external_downloaders(): @@ -516,4 +532,4 @@ def get_external_downloader(external_downloader): downloader . """ # Drop .exe extension on Windows bn = os.path.splitext(os.path.basename(external_downloader))[0] - return _BY_NAME.get(bn) + return _BY_NAME.get(bn, _BY_EXE.get(bn)) diff --git a/hypervideo_dl/downloader/f4m.py b/hypervideo_dl/downloader/f4m.py index 9da2776..0008b7c 100644 --- a/hypervideo_dl/downloader/f4m.py +++ b/hypervideo_dl/downloader/f4m.py @@ -366,7 +366,7 @@ class F4mFD(FragmentFD): ctx = { 'filename': filename, 'total_frags': total_frags, - 'live': live, + 'live': bool(live), } self._prepare_frag_download(ctx) diff --git a/hypervideo_dl/downloader/fc2.py b/hypervideo_dl/downloader/fc2.py new file mode 100644 index 0000000..157bcf2 --- /dev/null +++ b/hypervideo_dl/downloader/fc2.py @@ -0,0 +1,41 @@ +from __future__ import division, unicode_literals + +import threading + +from .common import FileDownloader +from .external import FFmpegFD + + +class FC2LiveFD(FileDownloader): + """ + Downloads FC2 live without being stopped. <br> + Note, this is not a part of public API, and will be removed without notice. + DO NOT USE + """ + + def real_download(self, filename, info_dict): + ws = info_dict['ws'] + + heartbeat_lock = threading.Lock() + heartbeat_state = [None, 1] + + def heartbeat(): + try: + heartbeat_state[1] += 1 + ws.send('{"name":"heartbeat","arguments":{},"id":%d}' % heartbeat_state[1]) + except Exception: + self.to_screen('[fc2:live] Heartbeat failed') + + with heartbeat_lock: + heartbeat_state[0] = threading.Timer(30, heartbeat) + heartbeat_state[0]._daemonic = True + heartbeat_state[0].start() + + heartbeat() + + new_info_dict = info_dict.copy() + new_info_dict.update({ + 'ws': None, + 'protocol': 'live_ffmpeg', + }) + return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict) diff --git a/hypervideo_dl/downloader/fragment.py b/hypervideo_dl/downloader/fragment.py index 57068db..a991c6d 100644 --- a/hypervideo_dl/downloader/fragment.py +++ b/hypervideo_dl/downloader/fragment.py @@ -1,9 +1,10 @@ from __future__ import division, unicode_literals +import http.client +import json +import math import os import time -import json -from math import ceil try: import concurrent.futures @@ -13,8 +14,9 @@ except ImportError: from .common import FileDownloader from .http import HttpFD -from ..aes import aes_cbc_decrypt_bytes +from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 from ..compat import ( + compat_os_name, compat_urllib_error, compat_struct_pack, ) @@ -22,8 +24,8 @@ from ..utils import ( DownloadError, error_to_compat_str, encodeFilename, - sanitize_open, sanitized_Request, + traverse_obj, ) @@ -31,6 +33,10 @@ class HttpQuietDownloader(HttpFD): def to_screen(self, *args, **kargs): pass + def report_retry(self, err, count, retries): + super().to_screen( + f'[download] Got server HTTP error: {err}. Retrying (attempt {count} of {self.format_retries(retries)}) ...') + class FragmentFD(FileDownloader): """ @@ -44,6 +50,7 @@ class FragmentFD(FileDownloader): Skip unavailable fragments (DASH and hlsnative only) keep_fragments: Keep downloaded fragments on disk after downloading is finished + concurrent_fragment_downloads: The number of threads to use for native hls and dash downloads _no_ytdl_file: Don't use .ytdl file For each incomplete fragment download hypervideo keeps on disk a special @@ -72,8 +79,9 @@ class FragmentFD(FileDownloader): '\r[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...' % (error_to_compat_str(err), frag_index, count, self.format_retries(retries))) - def report_skip_fragment(self, frag_index): - self.to_screen('[download] Skipping fragment %d ...' % frag_index) + def report_skip_fragment(self, frag_index, err=None): + err = f' {err};' if err else '' + self.to_screen(f'[download]{err} Skipping fragment {frag_index:d} ...') def _prepare_url(self, info_dict, url): headers = info_dict.get('http_headers') @@ -84,11 +92,11 @@ class FragmentFD(FileDownloader): self._start_frag_download(ctx, info_dict) def __do_ytdl_file(self, ctx): - return not ctx['live'] and not ctx['tmpfilename'] == '-' and not self.params.get('_no_ytdl_file') + return ctx['live'] is not True and ctx['tmpfilename'] != '-' and not self.params.get('_no_ytdl_file') def _read_ytdl_file(self, ctx): assert 'ytdl_corrupt' not in ctx - stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r') + stream, _ = self.sanitize_open(self.ytdl_filename(ctx['filename']), 'r') try: ytdl_data = json.loads(stream.read()) ctx['fragment_index'] = ytdl_data['downloader']['current_fragment']['index'] @@ -100,7 +108,7 @@ class FragmentFD(FileDownloader): stream.close() def _write_ytdl_file(self, ctx): - frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w') + frag_index_stream, _ = self.sanitize_open(self.ytdl_filename(ctx['filename']), 'w') try: downloader = { 'current_fragment': { @@ -125,14 +133,19 @@ class FragmentFD(FileDownloader): } success = ctx['dl'].download(fragment_filename, fragment_info_dict) if not success: - return False, None + return False if fragment_info_dict.get('filetime'): ctx['fragment_filetime'] = fragment_info_dict.get('filetime') ctx['fragment_filename_sanitized'] = fragment_filename - return True, self._read_fragment(ctx) + return True def _read_fragment(self, ctx): - down, frag_sanitized = sanitize_open(ctx['fragment_filename_sanitized'], 'rb') + try: + down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb') + except FileNotFoundError: + if ctx.get('live'): + return None + raise ctx['fragment_filename_sanitized'] = frag_sanitized frag_content = down.read() down.close() @@ -146,7 +159,7 @@ class FragmentFD(FileDownloader): if self.__do_ytdl_file(ctx): self._write_ytdl_file(ctx) if not self.params.get('keep_fragments', False): - os.remove(encodeFilename(ctx['fragment_filename_sanitized'])) + self.try_remove(encodeFilename(ctx['fragment_filename_sanitized'])) del ctx['fragment_filename_sanitized'] def _prepare_frag_download(self, ctx): @@ -165,8 +178,8 @@ class FragmentFD(FileDownloader): dl = HttpQuietDownloader( self.ydl, { - 'continuedl': True, - 'quiet': True, + 'continuedl': self.params.get('continuedl', True), + 'quiet': self.params.get('quiet'), 'noprogress': True, 'ratelimit': self.params.get('ratelimit'), 'retries': self.params.get('retries', 0), @@ -208,7 +221,7 @@ class FragmentFD(FileDownloader): self._write_ytdl_file(ctx) assert ctx['fragment_index'] == 0 - dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode) + dest_stream, tmpfilename = self.sanitize_open(tmpfilename, open_mode) ctx.update({ 'dl': dl, @@ -236,6 +249,7 @@ class FragmentFD(FileDownloader): start = time.time() ctx.update({ 'started': start, + 'fragment_started': start, # Amount of fragment's bytes downloaded by the time of the previous # frag progress hook invocation 'prev_frag_downloaded_bytes': 0, @@ -266,6 +280,9 @@ class FragmentFD(FileDownloader): ctx['fragment_index'] = state['fragment_index'] state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes'] ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes'] + ctx['speed'] = state['speed'] = self.calc_speed( + ctx['fragment_started'], time_now, frag_total_bytes) + ctx['fragment_started'] = time.time() ctx['prev_frag_downloaded_bytes'] = 0 else: frag_downloaded_bytes = s['downloaded_bytes'] @@ -274,8 +291,8 @@ class FragmentFD(FileDownloader): state['eta'] = self.calc_eta( start, time_now, estimated_size - resume_len, state['downloaded_bytes'] - resume_len) - state['speed'] = s.get('speed') or ctx.get('speed') - ctx['speed'] = state['speed'] + ctx['speed'] = state['speed'] = self.calc_speed( + ctx['fragment_started'], time_now, frag_downloaded_bytes) ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes self._hook_progress(state, info_dict) @@ -288,7 +305,7 @@ class FragmentFD(FileDownloader): if self.__do_ytdl_file(ctx): ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) if os.path.isfile(ytdl_filename): - os.remove(ytdl_filename) + self.try_remove(ytdl_filename) elapsed = time.time() - ctx['started'] if ctx['tmpfilename'] == '-': @@ -355,9 +372,7 @@ class FragmentFD(FileDownloader): # not what it decrypts to. if self.params.get('test', False): return frag_content - padding_len = 16 - (len(frag_content) % 16) - decrypted_data = aes_cbc_decrypt_bytes(frag_content + bytes([padding_len] * padding_len), decrypt_info['KEY'], iv) - return decrypted_data[:-decrypted_data[-1]] + return unpad_pkcs7(aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv)) return decrypt_fragment @@ -366,64 +381,105 @@ class FragmentFD(FileDownloader): @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ... all args must be either tuple or list ''' + interrupt_trigger = [True] max_progress = len(args) if max_progress == 1: return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func) - max_workers = self.params.get('concurrent_fragment_downloads', max_progress) - self._prepare_multiline_status(max_progress) + max_workers = self.params.get('concurrent_fragment_downloads', 1) + if max_progress > 1: + self._prepare_multiline_status(max_progress) + is_live = any(traverse_obj(args, (..., 2, 'is_live'), default=[])) def thread_func(idx, ctx, fragments, info_dict, tpe): ctx['max_progress'] = max_progress ctx['progress_idx'] = idx - return self.download_and_append_fragments(ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func, tpe=tpe) + return self.download_and_append_fragments( + ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func, + tpe=tpe, interrupt_trigger=interrupt_trigger) class FTPE(concurrent.futures.ThreadPoolExecutor): # has to stop this or it's going to wait on the worker thread itself def __exit__(self, exc_type, exc_val, exc_tb): pass + if compat_os_name == 'nt': + def future_result(future): + while True: + try: + return future.result(0.1) + except KeyboardInterrupt: + raise + except concurrent.futures.TimeoutError: + continue + else: + def future_result(future): + return future.result() + + def interrupt_trigger_iter(fg): + for f in fg: + if not interrupt_trigger[0]: + break + yield f + spins = [] for idx, (ctx, fragments, info_dict) in enumerate(args): - tpe = FTPE(ceil(max_workers / max_progress)) - job = tpe.submit(thread_func, idx, ctx, fragments, info_dict, tpe) + tpe = FTPE(math.ceil(max_workers / max_progress)) + job = tpe.submit(thread_func, idx, ctx, interrupt_trigger_iter(fragments), info_dict, tpe) spins.append((tpe, job)) result = True for tpe, job in spins: try: - result = result and job.result() + result = result and future_result(job) + except KeyboardInterrupt: + interrupt_trigger[0] = False finally: tpe.shutdown(wait=True) + if not interrupt_trigger[0] and not is_live: + raise KeyboardInterrupt() + # we expect the user wants to stop and DO WANT the preceding postprocessors to run; + # so returning a intermediate result here instead of KeyboardInterrupt on live return result - def download_and_append_fragments(self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None, tpe=None): + def download_and_append_fragments( + self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None, + tpe=None, interrupt_trigger=None): + if not interrupt_trigger: + interrupt_trigger = (True, ) + fragment_retries = self.params.get('fragment_retries', 0) - is_fatal = (lambda idx: idx == 0) if self.params.get('skip_unavailable_fragments', True) else (lambda _: True) + is_fatal = ( + ((lambda _: False) if info_dict.get('is_live') else (lambda idx: idx == 0)) + if self.params.get('skip_unavailable_fragments', True) else (lambda _: True)) + if not pack_func: pack_func = lambda frag_content, _: frag_content def download_fragment(fragment, ctx): + if not interrupt_trigger[0]: + return + frag_index = ctx['fragment_index'] = fragment['frag_index'] + ctx['last_error'] = None headers = info_dict.get('http_headers', {}).copy() byte_range = fragment.get('byte_range') if byte_range: headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) # Never skip the first fragment - fatal = is_fatal(fragment.get('index') or (frag_index - 1)) - count, frag_content = 0, None + fatal, count = is_fatal(fragment.get('index') or (frag_index - 1)), 0 while count <= fragment_retries: try: - success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers) - if not success: - return False, frag_index - break - except compat_urllib_error.HTTPError as err: + if self._download_fragment(ctx, fragment['url'], info_dict, headers): + break + return + except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err: # Unavailable (possibly temporary) fragments may be served. # First we try to retry then either skip or abort. # See https://github.com/ytdl-org/youtube-dl/issues/10165, # https://github.com/ytdl-org/youtube-dl/issues/10448). count += 1 + ctx['last_error'] = err if count <= fragment_retries: self.report_retry_fragment(err, frag_index, count, fragment_retries) except DownloadError: @@ -433,49 +489,46 @@ class FragmentFD(FileDownloader): break raise - if count > fragment_retries: - if not fatal: - return False, frag_index + if count > fragment_retries and fatal: ctx['dest_stream'].close() self.report_error('Giving up after %s fragment retries' % fragment_retries) - return False, frag_index - return frag_content, frag_index def append_fragment(frag_content, frag_index, ctx): - if not frag_content: - if not is_fatal(frag_index - 1): - self.report_skip_fragment(frag_index) - return True - else: - ctx['dest_stream'].close() - self.report_error( - 'fragment %s not found, unable to continue' % frag_index) - return False - self._append_fragment(ctx, pack_func(frag_content, frag_index)) + if frag_content: + self._append_fragment(ctx, pack_func(frag_content, frag_index)) + elif not is_fatal(frag_index - 1): + self.report_skip_fragment(frag_index, 'fragment not found') + else: + ctx['dest_stream'].close() + self.report_error(f'fragment {frag_index} not found, unable to continue') + return False return True decrypt_fragment = self.decrypter(info_dict) - max_workers = self.params.get('concurrent_fragment_downloads', 1) + max_workers = math.ceil( + self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1)) if can_threaded_download and max_workers > 1: def _download_fragment(fragment): ctx_copy = ctx.copy() - frag_content, frag_index = download_fragment(fragment, ctx_copy) - return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized') + download_fragment(fragment, ctx_copy) + return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized') self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome') with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: - for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments): + for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments): ctx['fragment_filename_sanitized'] = frag_filename ctx['fragment_index'] = frag_index - result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) + result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx) if not result: return False else: for fragment in fragments: - frag_content, frag_index = download_fragment(fragment, ctx) - result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) + if not interrupt_trigger[0]: + break + download_fragment(fragment, ctx) + result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx) if not result: return False diff --git a/hypervideo_dl/downloader/hls.py b/hypervideo_dl/downloader/hls.py index ef8a81b..f3f32b5 100644 --- a/hypervideo_dl/downloader/hls.py +++ b/hypervideo_dl/downloader/hls.py @@ -77,6 +77,15 @@ class HlsFD(FragmentFD): message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodome are available; ' 'Decryption will be performed natively, but will be extremely slow') if not can_download: + has_drm = re.search('|'.join([ + r'#EXT-X-FAXS-CM:', # Adobe Flash Access + r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay + ]), s) + if has_drm and not self.params.get('allow_unplayable_formats'): + self.report_error( + 'This video is DRM protected; Try selecting another format with --format or ' + 'add --check-formats to automatically fallback to the next best format') + return False message = message or 'Unsupported features have been detected' fd = FFmpegFD(self.ydl, self.params) self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}') diff --git a/hypervideo_dl/downloader/http.py b/hypervideo_dl/downloader/http.py index 2e95bb9..591a9b0 100644 --- a/hypervideo_dl/downloader/http.py +++ b/hypervideo_dl/downloader/http.py @@ -1,29 +1,30 @@ from __future__ import unicode_literals -import errno import os -import socket +import ssl import time import random -import re from .common import FileDownloader from ..compat import ( - compat_str, compat_urllib_error, + compat_http_client ) from ..utils import ( ContentTooShortError, encodeFilename, int_or_none, - sanitize_open, + parse_http_range, sanitized_Request, ThrottledDownload, + try_call, write_xattr, XAttrMetadataError, XAttrUnavailableError, ) +RESPONSE_READ_EXCEPTIONS = (TimeoutError, ConnectionError, ssl.SSLError, compat_http_client.HTTPException) + class HttpFD(FileDownloader): def real_download(self, filename, info_dict): @@ -54,11 +55,11 @@ class HttpFD(FileDownloader): ctx.open_mode = 'wb' ctx.resume_len = 0 - ctx.data_len = None ctx.block_size = self.params.get('buffersize', 1024) ctx.start_time = time.time() - ctx.chunk_size = None - throttle_start = None + + # parse given Range + req_start, req_end, _ = parse_http_range(headers.get('Range')) if self.params.get('continuedl', True): # Establish possible resume length @@ -81,43 +82,50 @@ class HttpFD(FileDownloader): class NextFragment(Exception): pass - def set_range(req, start, end): - range_header = 'bytes=%d-' % start - if end: - range_header += compat_str(end) - req.add_header('Range', range_header) - def establish_connection(): ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size) if not is_test and chunk_size else chunk_size) if ctx.resume_len > 0: range_start = ctx.resume_len + if req_start is not None: + # offset the beginning of Range to be within request + range_start += req_start if ctx.is_resume: self.report_resuming_byte(ctx.resume_len) ctx.open_mode = 'ab' + elif req_start is not None: + range_start = req_start elif ctx.chunk_size > 0: range_start = 0 else: range_start = None ctx.is_resume = False - range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None - if range_end and ctx.data_len is not None and range_end >= ctx.data_len: - range_end = ctx.data_len - 1 - has_range = range_start is not None - ctx.has_range = has_range + + if ctx.chunk_size: + chunk_aware_end = range_start + ctx.chunk_size - 1 + # we're not allowed to download outside Range + range_end = chunk_aware_end if req_end is None else min(chunk_aware_end, req_end) + elif req_end is not None: + # there's no need for chunked downloads, so download until the end of Range + range_end = req_end + else: + range_end = None + + if try_call(lambda: range_start > range_end): + ctx.resume_len = 0 + ctx.open_mode = 'wb' + raise RetryDownload(Exception(f'Conflicting range. (start={range_start} > end={range_end})')) + + if try_call(lambda: range_end >= ctx.content_len): + range_end = ctx.content_len - 1 + request = sanitized_Request(url, request_data, headers) + has_range = range_start is not None if has_range: - set_range(request, range_start, range_end) + request.add_header('Range', f'bytes={int(range_start)}-{int_or_none(range_end) or ""}') # Establish connection try: - try: - ctx.data = self.ydl.urlopen(request) - except (compat_urllib_error.URLError, ) as err: - # reason may not be available, e.g. for urllib2.HTTPError on python 2.6 - reason = getattr(err, 'reason', None) - if isinstance(reason, socket.timeout): - raise RetryDownload(err) - raise err + ctx.data = self.ydl.urlopen(request) # When trying to resume, Content-Range HTTP header of response has to be checked # to match the value of requested Range HTTP header. This is due to a webservers # that don't support resuming and serve a whole file with no Content-Range @@ -125,31 +133,27 @@ class HttpFD(FileDownloader): # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799) if has_range: content_range = ctx.data.headers.get('Content-Range') - if content_range: - content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range) + content_range_start, content_range_end, content_len = parse_http_range(content_range) + if content_range_start is not None and range_start == content_range_start: # Content-Range is present and matches requested Range, resume is possible - if content_range_m: - if range_start == int(content_range_m.group(1)): - content_range_end = int_or_none(content_range_m.group(2)) - content_len = int_or_none(content_range_m.group(3)) - accept_content_len = ( - # Non-chunked download - not ctx.chunk_size - # Chunked download and requested piece or - # its part is promised to be served - or content_range_end == range_end - or content_len < range_end) - if accept_content_len: - ctx.data_len = content_len - return + accept_content_len = ( + # Non-chunked download + not ctx.chunk_size + # Chunked download and requested piece or + # its part is promised to be served + or content_range_end == range_end + or content_len < range_end) + if accept_content_len: + ctx.content_len = content_len + ctx.data_len = min(content_len, req_end or content_len) - (req_start or 0) + return # Content-Range is either not present or invalid. Assuming remote webserver is # trying to send the whole file, resume is not possible, so wiping the local file # and performing entire redownload self.report_unable_to_resume() ctx.resume_len = 0 ctx.open_mode = 'wb' - ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None)) - return + ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None)) except (compat_urllib_error.HTTPError, ) as err: if err.code == 416: # Unable to resume (requested range not satisfiable) @@ -191,14 +195,16 @@ class HttpFD(FileDownloader): # Unexpected HTTP error raise raise RetryDownload(err) - except socket.error as err: - if err.errno != errno.ECONNRESET: - # Connection reset is no problem, just retry + except compat_urllib_error.URLError as err: + if isinstance(err.reason, ssl.CertificateError): raise raise RetryDownload(err) + # In urllib.request.AbstractHTTPHandler, the response is partially read on request. + # Any errors that occur during this will not be wrapped by URLError + except RESPONSE_READ_EXCEPTIONS as err: + raise RetryDownload(err) def download(): - nonlocal throttle_start data_len = ctx.data.info().get('Content-length', None) # Range HTTP header may be ignored/unsupported by a webserver @@ -241,16 +247,8 @@ class HttpFD(FileDownloader): try: # Download and write data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) - # socket.timeout is a subclass of socket.error but may not have - # errno set - except socket.timeout as e: - retry(e) - except socket.error as e: - # SSLError on python 2 (inherits socket.error) may have - # no errno set but this error message - if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out': - retry(e) - raise + except RESPONSE_READ_EXCEPTIONS as err: + retry(err) byte_counter += len(data_block) @@ -261,7 +259,7 @@ class HttpFD(FileDownloader): # Open destination file just in time if ctx.stream is None: try: - ctx.stream, ctx.tmpfilename = sanitize_open( + ctx.stream, ctx.tmpfilename = self.sanitize_open( ctx.tmpfilename, ctx.open_mode) assert ctx.stream is not None ctx.filename = self.undo_temp_name(ctx.tmpfilename) @@ -321,16 +319,16 @@ class HttpFD(FileDownloader): if speed and speed < (self.params.get('throttledratelimit') or 0): # The speed must stay below the limit for 3 seconds # This prevents raising error when the speed temporarily goes down - if throttle_start is None: - throttle_start = now - elif now - throttle_start > 3: + if ctx.throttle_start is None: + ctx.throttle_start = now + elif now - ctx.throttle_start > 3: if ctx.stream is not None and ctx.tmpfilename != '-': ctx.stream.close() raise ThrottledDownload() elif speed: - throttle_start = None + ctx.throttle_start = None - if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len: + if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len: ctx.resume_len = byte_counter # ctx.block_size = block_size raise NextFragment() diff --git a/hypervideo_dl/downloader/ism.py b/hypervideo_dl/downloader/ism.py index 09516ab..4d5618c 100644 --- a/hypervideo_dl/downloader/ism.py +++ b/hypervideo_dl/downloader/ism.py @@ -263,9 +263,11 @@ class IsmFD(FragmentFD): count = 0 while count <= fragment_retries: try: - success, frag_content = self._download_fragment(ctx, segment['url'], info_dict) + success = self._download_fragment(ctx, segment['url'], info_dict) if not success: return False + frag_content = self._read_fragment(ctx) + if not extra_state['ism_track_written']: tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd']) info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0] diff --git a/hypervideo_dl/downloader/mhtml.py b/hypervideo_dl/downloader/mhtml.py index f0f4dc6..c8332c0 100644 --- a/hypervideo_dl/downloader/mhtml.py +++ b/hypervideo_dl/downloader/mhtml.py @@ -114,8 +114,8 @@ body > figure > img { fragment_base_url = info_dict.get('fragment_base_url') fragments = info_dict['fragments'][:1] if self.params.get( 'test', False) else info_dict['fragments'] - title = info_dict['title'] - origin = info_dict['webpage_url'] + title = info_dict.get('title', info_dict['format_id']) + origin = info_dict.get('webpage_url', info_dict['url']) ctx = { 'filename': filename, @@ -166,10 +166,15 @@ body > figure > img { if (i + 1) <= ctx['fragment_index']: continue - fragment_url = urljoin(fragment_base_url, fragment['path']) - success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) + fragment_url = fragment.get('url') + if not fragment_url: + assert fragment_base_url + fragment_url = urljoin(fragment_base_url, fragment['path']) + + success = self._download_fragment(ctx, fragment_url, info_dict) if not success: continue + frag_content = self._read_fragment(ctx) mime_type = b'image/jpeg' if frag_content.startswith(b'\x89PNG\r\n\x1a\n'): diff --git a/hypervideo_dl/downloader/rtmp.py b/hypervideo_dl/downloader/rtmp.py index 6dca647..90f1acf 100644 --- a/hypervideo_dl/downloader/rtmp.py +++ b/hypervideo_dl/downloader/rtmp.py @@ -12,6 +12,7 @@ from ..utils import ( encodeFilename, encodeArgument, get_exe_version, + Popen, ) @@ -26,7 +27,7 @@ class RtmpFD(FileDownloader): start = time.time() resume_percent = None resume_downloaded_data_len = None - proc = subprocess.Popen(args, stderr=subprocess.PIPE) + proc = Popen(args, stderr=subprocess.PIPE) cursor_in_new_line = True proc_stderr_closed = False try: diff --git a/hypervideo_dl/downloader/websocket.py b/hypervideo_dl/downloader/websocket.py index 0882220..58e2bce 100644 --- a/hypervideo_dl/downloader/websocket.py +++ b/hypervideo_dl/downloader/websocket.py @@ -5,9 +5,12 @@ import threading try: import websockets - has_websockets = True -except ImportError: +except (ImportError, SyntaxError): + # websockets 3.10 on python 3.6 causes SyntaxError + # See https://github.com/hypervideo/hypervideo/issues/2633 has_websockets = False +else: + has_websockets = True from .common import FileDownloader from .external import FFmpegFD diff --git a/hypervideo_dl/downloader/youtube_live_chat.py b/hypervideo_dl/downloader/youtube_live_chat.py index ef4205e..dd21ac8 100644 --- a/hypervideo_dl/downloader/youtube_live_chat.py +++ b/hypervideo_dl/downloader/youtube_live_chat.py @@ -22,6 +22,9 @@ class YoutubeLiveChatFD(FragmentFD): def real_download(self, filename, info_dict): video_id = info_dict['video_id'] self.to_screen('[%s] Downloading live chat' % self.FD_NAME) + if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat': + self.report_warning('Live chat download runs until the livestream ends. ' + 'If you wish to download the video simultaneously, run a separate hypervideo instance') fragment_retries = self.params.get('fragment_retries', 0) test = self.params.get('test', False) @@ -112,9 +115,10 @@ class YoutubeLiveChatFD(FragmentFD): count = 0 while count <= fragment_retries: try: - success, raw_fragment = dl_fragment(url, request_data, headers) + success = dl_fragment(url, request_data, headers) if not success: return False, None, None, None + raw_fragment = self._read_fragment(ctx) try: data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) except RegexNotFoundError: @@ -142,9 +146,10 @@ class YoutubeLiveChatFD(FragmentFD): self._prepare_and_start_frag_download(ctx, info_dict) - success, raw_fragment = dl_fragment(info_dict['url']) + success = dl_fragment(info_dict['url']) if not success: return False + raw_fragment = self._read_fragment(ctx) try: data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) except RegexNotFoundError: |