diff options
author | Jesús <heckyel@hyperbola.info> | 2021-10-18 15:24:21 -0500 |
---|---|---|
committer | Jesús <heckyel@hyperbola.info> | 2021-10-18 15:24:21 -0500 |
commit | 5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e (patch) | |
tree | 65209bc739db35e31f1c9b5b868eb5df4fe12ae3 /hypervideo_dl/downloader/external.py | |
parent | 27fe903c511691c078942bef5ee9a05a43b15c8f (diff) | |
download | hypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.tar.lz hypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.tar.xz hypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.zip |
update from upstream
Diffstat (limited to 'hypervideo_dl/downloader/external.py')
-rw-r--r-- | hypervideo_dl/downloader/external.py | 258 |
1 files changed, 203 insertions, 55 deletions
diff --git a/hypervideo_dl/downloader/external.py b/hypervideo_dl/downloader/external.py index c31f891..74adb05 100644 --- a/hypervideo_dl/downloader/external.py +++ b/hypervideo_dl/downloader/external.py @@ -6,7 +6,7 @@ import subprocess import sys import time -from .common import FileDownloader +from .fragment import FragmentFD from ..compat import ( compat_setenv, compat_str, @@ -16,16 +16,21 @@ from ..utils import ( cli_option, cli_valueless_option, cli_bool_option, - cli_configuration_args, + _configuration_args, encodeFilename, encodeArgument, handle_youtubedl_headers, check_executable, is_outdated_version, + process_communicate_or_kill, + sanitize_open, ) -class ExternalFD(FileDownloader): +class ExternalFD(FragmentFD): + SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps') + can_download_to_stdout = False + def real_download(self, filename, info_dict): self.report_destination(filename) tmpfilename = self.temp_name(filename) @@ -56,7 +61,7 @@ class ExternalFD(FileDownloader): 'downloaded_bytes': fsize, 'total_bytes': fsize, }) - self._hook_progress(status) + self._hook_progress(status, info_dict) return True else: self.to_stderr('\n') @@ -70,19 +75,25 @@ class ExternalFD(FileDownloader): @property def exe(self): - return self.params.get('external_downloader') + return self.get_basename() @classmethod - def available(cls): - return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT]) + def available(cls, path=None): + path = check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT]) + if path: + cls.exe = path + return path + return False @classmethod def supports(cls, info_dict): - return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') + return ( + (cls.can_download_to_stdout or not info_dict.get('to_stdout')) + and info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS) @classmethod - def can_download(cls, info_dict): - return cls.available() and cls.supports(info_dict) + def can_download(cls, info_dict, path=None): + return cls.available(path) and cls.supports(info_dict) def _option(self, command_option, param): return cli_option(self.params, command_option, param) @@ -93,8 +104,10 @@ class ExternalFD(FileDownloader): def _valueless_option(self, command_option, param, expected_value=True): return cli_valueless_option(self.params, command_option, param, expected_value) - def _configuration_args(self, default=[]): - return cli_configuration_args(self.params, 'external_downloader_args', default) + def _configuration_args(self, keys=None, *args, **kwargs): + return _configuration_args( + self.get_basename(), self.params.get('external_downloader_args'), self.get_basename(), + keys, *args, **kwargs) def _call_downloader(self, tmpfilename, info_dict): """ Either overwrite this or implement _make_cmd """ @@ -102,12 +115,56 @@ class ExternalFD(FileDownloader): self._debug_cmd(cmd) - p = subprocess.Popen( - cmd, stderr=subprocess.PIPE) - _, stderr = p.communicate() - if p.returncode != 0: + if 'fragments' not in info_dict: + p = subprocess.Popen( + cmd, stderr=subprocess.PIPE) + _, stderr = process_communicate_or_kill(p) + if p.returncode != 0: + self.to_stderr(stderr.decode('utf-8', 'replace')) + return p.returncode + + fragment_retries = self.params.get('fragment_retries', 0) + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) + + count = 0 + while count <= fragment_retries: + p = subprocess.Popen( + cmd, stderr=subprocess.PIPE) + _, stderr = process_communicate_or_kill(p) + if p.returncode == 0: + break + # TODO: Decide whether to retry based on error code + # https://aria2.github.io/manual/en/html/aria2c.html#exit-status self.to_stderr(stderr.decode('utf-8', 'replace')) - return p.returncode + count += 1 + if count <= fragment_retries: + self.to_screen( + '[%s] Got error. Retrying fragments (attempt %d of %s)...' + % (self.get_basename(), count, self.format_retries(fragment_retries))) + if count > fragment_retries: + if not skip_unavailable_fragments: + self.report_error('Giving up after %s fragment retries' % fragment_retries) + return -1 + + decrypt_fragment = self.decrypter(info_dict) + dest, _ = sanitize_open(tmpfilename, 'wb') + for frag_index, fragment in enumerate(info_dict['fragments']): + fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) + try: + src, _ = sanitize_open(fragment_filename, 'rb') + except IOError: + if skip_unavailable_fragments and frag_index > 1: + self.to_screen('[%s] Skipping fragment %d ...' % (self.get_basename(), frag_index)) + continue + self.report_error('Unable to open fragment %d' % frag_index) + return -1 + dest.write(decrypt_fragment(fragment, src.read())) + src.close() + if not self.params.get('keep_fragments', False): + os.remove(encodeFilename(fragment_filename)) + dest.close() + os.remove(encodeFilename('%s.frag.urls' % tmpfilename)) + return 0 class CurlFD(ExternalFD): @@ -115,8 +172,10 @@ class CurlFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '--location', '-o', tmpfilename] - for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['--header', '%s: %s' % (key, val)] + cmd += self._bool_option('--continue-at', 'continuedl', '-', '0') cmd += self._valueless_option('--silent', 'noprogress') cmd += self._valueless_option('--verbose', 'verbose') @@ -141,7 +200,7 @@ class CurlFD(ExternalFD): # curl writes the progress to stderr so don't capture it. p = subprocess.Popen(cmd) - p.communicate() + process_communicate_or_kill(p) return p.returncode @@ -150,8 +209,9 @@ class AxelFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-o', tmpfilename] - for key, val in info_dict['http_headers'].items(): - cmd += ['-H', '%s: %s' % (key, val)] + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['-H', '%s: %s' % (key, val)] cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd @@ -162,8 +222,9 @@ class WgetFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] - for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['--header', '%s: %s' % (key, val)] cmd += self._option('--limit-rate', 'ratelimit') retry = self._option('--tries', 'retries') if len(retry) == 2: @@ -180,51 +241,115 @@ class WgetFD(ExternalFD): class Aria2cFD(ExternalFD): AVAILABLE_OPT = '-v' + SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'dash_frag_urls', 'm3u8_frag_urls') + + @staticmethod + def supports_manifest(manifest): + UNSUPPORTED_FEATURES = [ + r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1] + # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 + ] + check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) + return all(check_results) def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '-c'] - cmd += self._configuration_args([ - '--min-split-size', '1M', '--max-connection-per-server', '4']) - dn = os.path.dirname(tmpfilename) - if dn: - cmd += ['--dir', dn] - cmd += ['--out', os.path.basename(tmpfilename)] - for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] + cmd = [self.exe, '-c', + '--console-log-level=warn', '--summary-interval=0', '--download-result=hide', + '--file-allocation=none', '-x16', '-j16', '-s16'] + if 'fragments' in info_dict: + cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true'] + else: + cmd += ['--min-split-size', '1M'] + + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['--header', '%s: %s' % (key, val)] + cmd += self._option('--max-overall-download-limit', 'ratelimit') cmd += self._option('--interface', 'source_address') cmd += self._option('--all-proxy', 'proxy') cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=') - cmd += ['--', info_dict['url']] + cmd += self._configuration_args() + + # aria2c strips out spaces from the beginning/end of filenames and paths. + # We work around this issue by adding a "./" to the beginning of the + # filename and relative path, and adding a "/" at the end of the path. + # See: https://github.com/hypervideo/hypervideo/issues/276 + # https://github.com/ytdl-org/youtube-dl/issues/20312 + # https://github.com/aria2/aria2/issues/1373 + dn = os.path.dirname(tmpfilename) + if dn: + if not os.path.isabs(dn): + dn = '.%s%s' % (os.path.sep, dn) + cmd += ['--dir', dn + os.path.sep] + if 'fragments' not in info_dict: + cmd += ['--out', '.%s%s' % (os.path.sep, os.path.basename(tmpfilename))] + cmd += ['--auto-file-renaming=false'] + + if 'fragments' in info_dict: + cmd += ['--file-allocation=none', '--uri-selector=inorder'] + url_list_file = '%s.frag.urls' % tmpfilename + url_list = [] + for frag_index, fragment in enumerate(info_dict['fragments']): + fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index) + url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename)) + stream, _ = sanitize_open(url_list_file, 'wb') + stream.write('\n'.join(url_list).encode('utf-8')) + stream.close() + cmd += ['-i', url_list_file] + else: + cmd += ['--', info_dict['url']] return cmd class HttpieFD(ExternalFD): + AVAILABLE_OPT = '--version' + @classmethod - def available(cls): - return check_executable('http', ['--version']) + def available(cls, path=None): + return ExternalFD.available(cls, path or 'http') def _make_cmd(self, tmpfilename, info_dict): cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] - for key, val in info_dict['http_headers'].items(): - cmd += ['%s:%s' % (key, val)] + + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['%s:%s' % (key, val)] return cmd class FFmpegFD(ExternalFD): + SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments') + can_download_to_stdout = True + + @classmethod + def available(cls, path=None): + # TODO: Fix path for ffmpeg + # Fixme: This may be wrong when --ffmpeg-location is used + return FFmpegPostProcessor().available + @classmethod def supports(cls, info_dict): - return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms') + return all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')) + + def on_process_started(self, proc, stdin): + """ Override this in subclasses """ + pass @classmethod - def available(cls): - return FFmpegPostProcessor().available + def can_merge_formats(cls, info_dict, params): + return ( + info_dict.get('requested_formats') + and info_dict.get('protocol') + and not params.get('allow_unplayable_formats') + and 'no-direct-merge' not in params.get('compat_opts', []) + and cls.can_download(info_dict)) def _call_downloader(self, tmpfilename, info_dict): - url = info_dict['url'] + urls = [f['url'] for f in info_dict.get('requested_formats', [])] or [info_dict['url']] ffpp = FFmpegPostProcessor(downloader=self) if not ffpp.available: - self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') + self.report_error('m3u8 download detected but ffmpeg could not be found. Please install') return False ffpp.check_version() @@ -234,7 +359,12 @@ class FFmpegFD(ExternalFD): if self.params.get(log_level, False): args += ['-loglevel', log_level] break + if not self.params.get('verbose'): + args += ['-hide_banner'] + args += info_dict.get('_ffmpeg_args', []) + + # This option exists only for compatibility. Extractors should use `_ffmpeg_args` instead seekable = info_dict.get('_seekable') if seekable is not None: # setting -seekable prevents ffmpeg from guessing if the server @@ -244,8 +374,6 @@ class FFmpegFD(ExternalFD): # http://trac.ffmpeg.org/ticket/6125#comment:10 args += ['-seekable', '1' if seekable else '0'] - args += self._configuration_args() - # start_time = info_dict.get('start_time') or 0 # if start_time: # args += ['-ss', compat_str(start_time)] @@ -253,7 +381,7 @@ class FFmpegFD(ExternalFD): # if end_time: # args += ['-t', compat_str(end_time - start_time)] - if info_dict['http_headers'] and re.match(r'^https?://', url): + if info_dict.get('http_headers') is not None and re.match(r'^https?://', urls[0]): # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. headers = handle_youtubedl_headers(info_dict['http_headers']) @@ -311,13 +439,25 @@ class FFmpegFD(ExternalFD): elif isinstance(conn, compat_str): args += ['-rtmp_conn', conn] - args += ['-i', url, '-c', 'copy'] + for i, url in enumerate(urls): + args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url] + + args += ['-c', 'copy'] + if info_dict.get('requested_formats') or protocol == 'http_dash_segments': + for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]): + stream_number = fmt.get('manifest_stream_number', 0) + a_or_v = 'a' if fmt.get('acodec') != 'none' else 'v' + args.extend(['-map', f'{i}:{a_or_v}:{stream_number}']) if self.params.get('test', False): args += ['-fs', compat_str(self._TEST_FILE_SIZE)] + ext = info_dict['ext'] if protocol in ('m3u8', 'm3u8_native'): - if self.params.get('hls_use_mpegts', False) or tmpfilename == '-': + use_mpegts = (tmpfilename == '-') or self.params.get('hls_use_mpegts') + if use_mpegts is None: + use_mpegts = info_dict.get('is_live') + if use_mpegts: args += ['-f', 'mpegts'] else: args += ['-f', 'mp4'] @@ -325,25 +465,33 @@ class FFmpegFD(ExternalFD): args += ['-bsf:a', 'aac_adtstoasc'] elif protocol == 'rtmp': args += ['-f', 'flv'] + elif ext == 'mp4' and tmpfilename == '-': + args += ['-f', 'mpegts'] else: - args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])] + args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)] + + args += self._configuration_args(('_o1', '_o', '')) args = [encodeArgument(opt) for opt in args] args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) - self._debug_cmd(args) proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env) + if url in ('-', 'pipe:'): + self.on_process_started(proc, proc.stdin) try: retval = proc.wait() - except KeyboardInterrupt: + except BaseException as e: # subprocces.run would send the SIGKILL signal to ffmpeg and the # mp4 file couldn't be played, but if we ask ffmpeg to quit it # produces a file that is playable (this is mostly useful for live # streams). Note that Windows is not affected and produces playable # files (see https://github.com/ytdl-org/youtube-dl/issues/8300). - if sys.platform != 'win32': - proc.communicate(b'q') + if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'): + process_communicate_or_kill(proc, b'q') + else: + proc.kill() + proc.wait() raise return retval @@ -355,7 +503,7 @@ class AVconvFD(FFmpegFD): _BY_NAME = dict( (klass.get_basename(), klass) for name, klass in globals().items() - if name.endswith('FD') and name != 'ExternalFD' + if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD') ) @@ -368,4 +516,4 @@ def get_external_downloader(external_downloader): downloader . """ # Drop .exe extension on Windows bn = os.path.splitext(os.path.basename(external_downloader))[0] - return _BY_NAME[bn] + return _BY_NAME.get(bn) |