diff options
author | Jesús <heckyel@hyperbola.info> | 2021-10-18 15:24:21 -0500 |
---|---|---|
committer | Jesús <heckyel@hyperbola.info> | 2021-10-18 15:24:21 -0500 |
commit | 5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e (patch) | |
tree | 65209bc739db35e31f1c9b5b868eb5df4fe12ae3 /hypervideo_dl/downloader | |
parent | 27fe903c511691c078942bef5ee9a05a43b15c8f (diff) | |
download | hypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.tar.lz hypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.tar.xz hypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.zip |
update from upstream
Diffstat (limited to 'hypervideo_dl/downloader')
-rw-r--r-- | hypervideo_dl/downloader/__init__.py | 105 | ||||
-rw-r--r-- | hypervideo_dl/downloader/common.py | 171 | ||||
-rw-r--r-- | hypervideo_dl/downloader/dash.py | 88 | ||||
-rw-r--r-- | hypervideo_dl/downloader/external.py | 258 | ||||
-rw-r--r-- | hypervideo_dl/downloader/f4m.py | 19 | ||||
-rw-r--r-- | hypervideo_dl/downloader/fragment.py | 255 | ||||
-rw-r--r-- | hypervideo_dl/downloader/hls.py | 285 | ||||
-rw-r--r-- | hypervideo_dl/downloader/http.py | 37 | ||||
-rw-r--r-- | hypervideo_dl/downloader/ism.py | 58 | ||||
-rw-r--r-- | hypervideo_dl/downloader/mhtml.py | 202 | ||||
-rw-r--r-- | hypervideo_dl/downloader/niconico.py | 57 | ||||
-rw-r--r-- | hypervideo_dl/downloader/rtmp.py | 18 | ||||
-rw-r--r-- | hypervideo_dl/downloader/rtsp.py | 4 | ||||
-rw-r--r-- | hypervideo_dl/downloader/websocket.py | 59 | ||||
-rw-r--r-- | hypervideo_dl/downloader/youtube_live_chat.py | 236 |
15 files changed, 1516 insertions, 336 deletions
diff --git a/hypervideo_dl/downloader/__init__.py b/hypervideo_dl/downloader/__init__.py index 2e485df..2449c74 100644 --- a/hypervideo_dl/downloader/__init__.py +++ b/hypervideo_dl/downloader/__init__.py @@ -1,24 +1,47 @@ from __future__ import unicode_literals +from ..compat import compat_str +from ..utils import ( + determine_protocol, + NO_DEFAULT +) + + +def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=None, to_stdout=False): + info_dict['protocol'] = determine_protocol(info_dict) + info_copy = info_dict.copy() + info_copy['to_stdout'] = to_stdout + + downloaders = [_get_suitable_downloader(info_copy, proto, params, default) + for proto in (protocol or info_copy['protocol']).split('+')] + if set(downloaders) == {FFmpegFD} and FFmpegFD.can_merge_formats(info_copy, params): + return FFmpegFD + elif len(downloaders) == 1: + return downloaders[0] + return None + + +# Some of these require get_suitable_downloader from .common import FileDownloader +from .dash import DashSegmentsFD from .f4m import F4mFD from .hls import HlsFD from .http import HttpFD from .rtmp import RtmpFD -from .dash import DashSegmentsFD from .rtsp import RtspFD from .ism import IsmFD +from .mhtml import MhtmlFD +from .niconico import NiconicoDmcFD +from .websocket import WebSocketFragmentFD +from .youtube_live_chat import YoutubeLiveChatFD from .external import ( get_external_downloader, FFmpegFD, ) -from ..utils import ( - determine_protocol, -) - PROTOCOL_MAP = { 'rtmp': RtmpFD, + 'rtmp_ffmpeg': FFmpegFD, 'm3u8_native': HlsFD, 'm3u8': FFmpegFD, 'mms': RtspFD, @@ -26,36 +49,78 @@ PROTOCOL_MAP = { 'f4m': F4mFD, 'http_dash_segments': DashSegmentsFD, 'ism': IsmFD, + 'mhtml': MhtmlFD, + 'niconico_dmc': NiconicoDmcFD, + 'websocket_frag': WebSocketFragmentFD, + 'youtube_live_chat': YoutubeLiveChatFD, + 'youtube_live_chat_replay': YoutubeLiveChatFD, } -def get_suitable_downloader(info_dict, params={}): +def shorten_protocol_name(proto, simplify=False): + short_protocol_names = { + 'm3u8_native': 'm3u8_n', + 'rtmp_ffmpeg': 'rtmp_f', + 'http_dash_segments': 'dash', + 'niconico_dmc': 'dmc', + 'websocket_frag': 'WSfrag', + } + if simplify: + short_protocol_names.update({ + 'https': 'http', + 'ftps': 'ftp', + 'm3u8_native': 'm3u8', + 'rtmp_ffmpeg': 'rtmp', + 'm3u8_frag_urls': 'm3u8', + 'dash_frag_urls': 'dash', + }) + return short_protocol_names.get(proto, proto) + + +def _get_suitable_downloader(info_dict, protocol, params, default): """Get the downloader class that can handle the info dict.""" - protocol = determine_protocol(info_dict) - info_dict['protocol'] = protocol + if default is NO_DEFAULT: + default = HttpFD # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict): # return FFmpegFD - external_downloader = params.get('external_downloader') - if external_downloader is not None: + info_dict['protocol'] = protocol + downloaders = params.get('external_downloader') + external_downloader = ( + downloaders if isinstance(downloaders, compat_str) or downloaders is None + else downloaders.get(shorten_protocol_name(protocol, True), downloaders.get('default'))) + + if external_downloader is None: + if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params): + return FFmpegFD + elif external_downloader.lower() != 'native': ed = get_external_downloader(external_downloader) - if ed.can_download(info_dict): + if ed.can_download(info_dict, external_downloader): return ed - if protocol.startswith('m3u8') and info_dict.get('is_live'): - return FFmpegFD - - if protocol == 'm3u8' and params.get('hls_prefer_native') is True: - return HlsFD + if protocol == 'http_dash_segments': + if info_dict.get('is_live') and (external_downloader or '').lower() != 'native': + return FFmpegFD - if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False: - return FFmpegFD + if protocol in ('m3u8', 'm3u8_native'): + if info_dict.get('is_live'): + return FFmpegFD + elif (external_downloader or '').lower() == 'native': + return HlsFD + elif get_suitable_downloader( + info_dict, params, None, protocol='m3u8_frag_urls', to_stdout=info_dict['to_stdout']): + return HlsFD + elif params.get('hls_prefer_native') is True: + return HlsFD + elif params.get('hls_prefer_native') is False: + return FFmpegFD - return PROTOCOL_MAP.get(protocol, HttpFD) + return PROTOCOL_MAP.get(protocol, default) __all__ = [ - 'get_suitable_downloader', 'FileDownloader', + 'get_suitable_downloader', + 'shorten_protocol_name', ] diff --git a/hypervideo_dl/downloader/common.py b/hypervideo_dl/downloader/common.py index d023168..27ca2cd 100644 --- a/hypervideo_dl/downloader/common.py +++ b/hypervideo_dl/downloader/common.py @@ -2,11 +2,9 @@ from __future__ import division, unicode_literals import os import re -import sys import time import random -from ..compat import compat_os_name from ..utils import ( decodeArgument, encodeFilename, @@ -15,6 +13,12 @@ from ..utils import ( shell_quote, timeconvert, ) +from ..minicurses import ( + MultilineLogger, + MultilinePrinter, + QuietMultilinePrinter, + BreaklineStatusPrinter +) class FileDownloader(object): @@ -32,25 +36,28 @@ class FileDownloader(object): verbose: Print additional info to stdout. quiet: Do not print messages to stdout. ratelimit: Download speed limit, in bytes/sec. + throttledratelimit: Assume the download is being throttled below this speed (bytes/sec) retries: Number of times to retry for HTTP error 5xx buffersize: Size of download buffer in bytes. noresizebuffer: Do not automatically resize the download buffer. continuedl: Try to continue downloads if possible. noprogress: Do not print the progress bar. - logtostderr: Log messages to stderr instead of stdout. - consoletitle: Display progress in console window's titlebar. nopart: Do not use temporary .part files. updatetime: Use the Last-modified header to set output file timestamps. test: Download only first bytes to test the downloader. min_filesize: Skip files smaller than this size max_filesize: Skip files larger than this size xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. - external_downloader_args: A list of additional command-line arguments for the - external downloader. + external_downloader_args: A dictionary of downloader keys (in lower case) + and a list of additional command-line arguments for the + executable. Use 'default' as the name for arguments to be + passed to all downloaders. For compatibility with youtube-dl, + a single list of args can also be used hls_use_mpegts: Use the mpegts container for HLS videos. http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be useful for bypassing bandwidth throttling imposed by a webserver (experimental) + progress_template: See YoutubeDL.py Subclasses of this one must re-define the real_download method. """ @@ -63,6 +70,7 @@ class FileDownloader(object): self.ydl = ydl self._progress_hooks = [] self.params = params + self._prepare_multiline_status() self.add_progress_hook(self.report_progress) @staticmethod @@ -147,10 +155,10 @@ class FileDownloader(object): return int(round(number * multiplier)) def to_screen(self, *args, **kargs): - self.ydl.to_screen(*args, **kargs) + self.ydl.to_stdout(*args, quiet=self.params.get('quiet'), **kargs) def to_stderr(self, message): - self.ydl.to_screen(message) + self.ydl.to_stderr(message) def to_console_title(self, message): self.ydl.to_console_title(message) @@ -164,6 +172,9 @@ class FileDownloader(object): def report_error(self, *args, **kargs): self.ydl.report_error(*args, **kargs) + def write_debug(self, *args, **kargs): + self.ydl.write_debug(*args, **kargs) + def slow_down(self, start_time, now, byte_counter): """Sleep if the download speed is over the rate limit.""" rate_limit = self.params.get('ratelimit') @@ -196,12 +207,12 @@ class FileDownloader(object): return filename + '.ytdl' def try_rename(self, old_filename, new_filename): + if old_filename == new_filename: + return try: - if old_filename == new_filename: - return - os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) + os.replace(old_filename, new_filename) except (IOError, OSError) as err: - self.report_error('unable to rename file: %s' % error_to_compat_str(err)) + self.report_error(f'unable to rename file: {err}') def try_utime(self, filename, last_modified_hdr): """Try to set the last-modified time of the given file.""" @@ -228,39 +239,46 @@ class FileDownloader(object): """Report destination filename.""" self.to_screen('[download] Destination: ' + filename) - def _report_progress_status(self, msg, is_last_line=False): - fullmsg = '[download] ' + msg - if self.params.get('progress_with_newline', False): - self.to_screen(fullmsg) + def _prepare_multiline_status(self, lines=1): + if self.params.get('noprogress'): + self._multiline = QuietMultilinePrinter() + elif self.ydl.params.get('logger'): + self._multiline = MultilineLogger(self.ydl.params['logger'], lines) + elif self.params.get('progress_with_newline'): + self._multiline = BreaklineStatusPrinter(self.ydl._screen_file, lines) else: - if compat_os_name == 'nt': - prev_len = getattr(self, '_report_progress_prev_line_length', - 0) - if prev_len > len(fullmsg): - fullmsg += ' ' * (prev_len - len(fullmsg)) - self._report_progress_prev_line_length = len(fullmsg) - clear_line = '\r' - else: - clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r') - self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) - self.to_console_title('hypervideo ' + msg) + self._multiline = MultilinePrinter(self.ydl._screen_file, lines, not self.params.get('quiet')) + + def _finish_multiline_status(self): + self._multiline.end() + + def _report_progress_status(self, s): + progress_dict = s.copy() + progress_dict.pop('info_dict') + progress_dict = {'info': s['info_dict'], 'progress': progress_dict} + + progress_template = self.params.get('progress_template', {}) + self._multiline.print_at_line(self.ydl.evaluate_outtmpl( + progress_template.get('download') or '[download] %(progress._default_template)s', + progress_dict), s.get('progress_idx') or 0) + self.to_console_title(self.ydl.evaluate_outtmpl( + progress_template.get('download-title') or 'hypervideo %(progress._default_template)s', + progress_dict)) def report_progress(self, s): if s['status'] == 'finished': - if self.params.get('noprogress', False): + if self.params.get('noprogress'): self.to_screen('[download] Download completed') - else: - msg_template = '100%%' - if s.get('total_bytes') is not None: - s['_total_bytes_str'] = format_bytes(s['total_bytes']) - msg_template += ' of %(_total_bytes_str)s' - if s.get('elapsed') is not None: - s['_elapsed_str'] = self.format_seconds(s['elapsed']) - msg_template += ' in %(_elapsed_str)s' - self._report_progress_status( - msg_template % s, is_last_line=True) - - if self.params.get('noprogress'): + msg_template = '100%%' + if s.get('total_bytes') is not None: + s['_total_bytes_str'] = format_bytes(s['total_bytes']) + msg_template += ' of %(_total_bytes_str)s' + if s.get('elapsed') is not None: + s['_elapsed_str'] = self.format_seconds(s['elapsed']) + msg_template += ' in %(_elapsed_str)s' + s['_percent_str'] = self.format_percent(100) + s['_default_template'] = msg_template % s + self._report_progress_status(s) return if s['status'] != 'downloading': @@ -302,8 +320,8 @@ class FileDownloader(object): msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' else: msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' - - self._report_progress_status(msg_template % s) + s['_default_template'] = msg_template % s + self._report_progress_status(s) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" @@ -312,27 +330,30 @@ class FileDownloader(object): def report_retry(self, err, count, retries): """Report retry in case of HTTP error 5xx""" self.to_screen( - '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...' + '[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...' % (error_to_compat_str(err), count, self.format_retries(retries))) - def report_file_already_downloaded(self, file_name): + def report_file_already_downloaded(self, *args, **kwargs): """Report file has already been fully downloaded.""" - try: - self.to_screen('[download] %s has already been downloaded' % file_name) - except UnicodeEncodeError: - self.to_screen('[download] The file has already been downloaded') + return self.ydl.report_file_already_downloaded(*args, **kwargs) def report_unable_to_resume(self): """Report it was impossible to resume download.""" self.to_screen('[download] Unable to resume') - def download(self, filename, info_dict): + @staticmethod + def supports_manifest(manifest): + """ Whether the downloader can download the fragments from the manifest. + Redefine in subclasses if needed. """ + pass + + def download(self, filename, info_dict, subtitle=False): """Download to a filename using the info from info_dict Return True on success and False otherwise """ nooverwrites_and_exists = ( - self.params.get('nooverwrites', False) + not self.params.get('overwrites', True) and os.path.exists(encodeFilename(filename)) ) @@ -350,26 +371,43 @@ class FileDownloader(object): 'filename': filename, 'status': 'finished', 'total_bytes': os.path.getsize(encodeFilename(filename)), - }) - return True - - min_sleep_interval = self.params.get('sleep_interval') - if min_sleep_interval: - max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) - sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) - self.to_screen( - '[download] Sleeping %s seconds...' % ( - int(sleep_interval) if sleep_interval.is_integer() - else '%.2f' % sleep_interval)) - time.sleep(sleep_interval) - - return self.real_download(filename, info_dict) + }, info_dict) + return True, False + + if subtitle is False: + min_sleep_interval = self.params.get('sleep_interval') + if min_sleep_interval: + max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) + sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) + self.to_screen( + '[download] Sleeping %s seconds ...' % ( + int(sleep_interval) if sleep_interval.is_integer() + else '%.2f' % sleep_interval)) + time.sleep(sleep_interval) + else: + sleep_interval_sub = 0 + if type(self.params.get('sleep_interval_subtitles')) is int: + sleep_interval_sub = self.params.get('sleep_interval_subtitles') + if sleep_interval_sub > 0: + self.to_screen( + '[download] Sleeping %s seconds ...' % ( + sleep_interval_sub)) + time.sleep(sleep_interval_sub) + ret = self.real_download(filename, info_dict) + self._finish_multiline_status() + return ret, True def real_download(self, filename, info_dict): """Real download process. Redefine in subclasses.""" raise NotImplementedError('This method must be implemented by subclasses') - def _hook_progress(self, status): + def _hook_progress(self, status, info_dict): + if not self._progress_hooks: + return + status['info_dict'] = info_dict + # youtube-dl passes the same status object to all the hooks. + # Some third party scripts seems to be relying on this. + # So keep this behavior if possible for ph in self._progress_hooks: ph(status) @@ -387,5 +425,4 @@ class FileDownloader(object): if exe is None: exe = os.path.basename(str_args[0]) - self.to_screen('[debug] %s command line: %s' % ( - exe, shell_quote(str_args))) + self.write_debug('%s command line: %s' % (exe, shell_quote(str_args))) diff --git a/hypervideo_dl/downloader/dash.py b/hypervideo_dl/downloader/dash.py index c6d674b..6444ad6 100644 --- a/hypervideo_dl/downloader/dash.py +++ b/hypervideo_dl/downloader/dash.py @@ -1,80 +1,62 @@ from __future__ import unicode_literals +from ..downloader import get_suitable_downloader from .fragment import FragmentFD -from ..compat import compat_urllib_error -from ..utils import ( - DownloadError, - urljoin, -) + +from ..utils import urljoin class DashSegmentsFD(FragmentFD): """ - Download segments in a DASH manifest + Download segments in a DASH manifest. External downloaders can take over + the fragment downloads by supporting the 'dash_frag_urls' protocol """ FD_NAME = 'dashsegments' def real_download(self, filename, info_dict): + if info_dict.get('is_live'): + self.report_error('Live DASH videos are not supported') + fragment_base_url = info_dict.get('fragment_base_url') fragments = info_dict['fragments'][:1] if self.params.get( 'test', False) else info_dict['fragments'] + real_downloader = get_suitable_downloader( + info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-')) + ctx = { 'filename': filename, 'total_frags': len(fragments), } - self._prepare_and_start_frag_download(ctx) - - fragment_retries = self.params.get('fragment_retries', 0) - skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) + if real_downloader: + self._prepare_external_frag_download(ctx) + else: + self._prepare_and_start_frag_download(ctx, info_dict) + fragments_to_download = [] frag_index = 0 for i, fragment in enumerate(fragments): frag_index += 1 if frag_index <= ctx['fragment_index']: continue - # In DASH, the first segment contains necessary headers to - # generate a valid MP4 file, so always abort for the first segment - fatal = i == 0 or not skip_unavailable_fragments - count = 0 - while count <= fragment_retries: - try: - fragment_url = fragment.get('url') - if not fragment_url: - assert fragment_base_url - fragment_url = urljoin(fragment_base_url, fragment['path']) - success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) - if not success: - return False - self._append_fragment(ctx, frag_content) - break - except compat_urllib_error.HTTPError as err: - # YouTube may often return 404 HTTP error for a fragment causing the - # whole download to fail. However if the same fragment is immediately - # retried with the same request data this usually succeeds (1-2 attempts - # is usually enough) thus allowing to download the whole file successfully. - # To be future-proof we will retry all fragments that fail with any - # HTTP error. - count += 1 - if count <= fragment_retries: - self.report_retry_fragment(err, frag_index, count, fragment_retries) - except DownloadError: - # Don't retry fragment if error occurred during HTTP downloading - # itself since it has own retry settings - if not fatal: - self.report_skip_fragment(frag_index) - break - raise - - if count > fragment_retries: - if not fatal: - self.report_skip_fragment(frag_index) - continue - self.report_error('giving up after %s fragment retries' % fragment_retries) - return False - - self._finish_frag_download(ctx) - - return True + fragment_url = fragment.get('url') + if not fragment_url: + assert fragment_base_url + fragment_url = urljoin(fragment_base_url, fragment['path']) + + fragments_to_download.append({ + 'frag_index': frag_index, + 'index': i, + 'url': fragment_url, + }) + + if real_downloader: + self.to_screen( + '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) + info_dict['fragments'] = fragments_to_download + fd = real_downloader(self.ydl, self.params) + return fd.real_download(filename, info_dict) + + return self.download_and_append_fragments(ctx, fragments_to_download, info_dict) diff --git a/hypervideo_dl/downloader/external.py b/hypervideo_dl/downloader/external.py index c31f891..74adb05 100644 --- a/hypervideo_dl/downloader/external.py +++ b/hypervideo_dl/downloader/external.py @@ -6,7 +6,7 @@ import subprocess import sys import time -from .common import FileDownloader +from .fragment import FragmentFD from ..compat import ( compat_setenv, compat_str, @@ -16,16 +16,21 @@ from ..utils import ( cli_option, cli_valueless_option, cli_bool_option, - cli_configuration_args, + _configuration_args, encodeFilename, encodeArgument, handle_youtubedl_headers, check_executable, is_outdated_version, + process_communicate_or_kill, + sanitize_open, ) -class ExternalFD(FileDownloader): +class ExternalFD(FragmentFD): + SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps') + can_download_to_stdout = False + def real_download(self, filename, info_dict): self.report_destination(filename) tmpfilename = self.temp_name(filename) @@ -56,7 +61,7 @@ class ExternalFD(FileDownloader): 'downloaded_bytes': fsize, 'total_bytes': fsize, }) - self._hook_progress(status) + self._hook_progress(status, info_dict) return True else: self.to_stderr('\n') @@ -70,19 +75,25 @@ class ExternalFD(FileDownloader): @property def exe(self): - return self.params.get('external_downloader') + return self.get_basename() @classmethod - def available(cls): - return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT]) + def available(cls, path=None): + path = check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT]) + if path: + cls.exe = path + return path + return False @classmethod def supports(cls, info_dict): - return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') + return ( + (cls.can_download_to_stdout or not info_dict.get('to_stdout')) + and info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS) @classmethod - def can_download(cls, info_dict): - return cls.available() and cls.supports(info_dict) + def can_download(cls, info_dict, path=None): + return cls.available(path) and cls.supports(info_dict) def _option(self, command_option, param): return cli_option(self.params, command_option, param) @@ -93,8 +104,10 @@ class ExternalFD(FileDownloader): def _valueless_option(self, command_option, param, expected_value=True): return cli_valueless_option(self.params, command_option, param, expected_value) - def _configuration_args(self, default=[]): - return cli_configuration_args(self.params, 'external_downloader_args', default) + def _configuration_args(self, keys=None, *args, **kwargs): + return _configuration_args( + self.get_basename(), self.params.get('external_downloader_args'), self.get_basename(), + keys, *args, **kwargs) def _call_downloader(self, tmpfilename, info_dict): """ Either overwrite this or implement _make_cmd """ @@ -102,12 +115,56 @@ class ExternalFD(FileDownloader): self._debug_cmd(cmd) - p = subprocess.Popen( - cmd, stderr=subprocess.PIPE) - _, stderr = p.communicate() - if p.returncode != 0: + if 'fragments' not in info_dict: + p = subprocess.Popen( + cmd, stderr=subprocess.PIPE) + _, stderr = process_communicate_or_kill(p) + if p.returncode != 0: + self.to_stderr(stderr.decode('utf-8', 'replace')) + return p.returncode + + fragment_retries = self.params.get('fragment_retries', 0) + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) + + count = 0 + while count <= fragment_retries: + p = subprocess.Popen( + cmd, stderr=subprocess.PIPE) + _, stderr = process_communicate_or_kill(p) + if p.returncode == 0: + break + # TODO: Decide whether to retry based on error code + # https://aria2.github.io/manual/en/html/aria2c.html#exit-status self.to_stderr(stderr.decode('utf-8', 'replace')) - return p.returncode + count += 1 + if count <= fragment_retries: + self.to_screen( + '[%s] Got error. Retrying fragments (attempt %d of %s)...' + % (self.get_basename(), count, self.format_retries(fragment_retries))) + if count > fragment_retries: + if not skip_unavailable_fragments: + self.report_error('Giving up after %s fragment retries' % fragment_retries) + return -1 + + decrypt_fragment = self.decrypter(info_dict) + dest, _ = sanitize_open(tmpfilename, 'wb') + for frag_index, fragment in enumerate(info_dict['fragments']): + fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) + try: + src, _ = sanitize_open(fragment_filename, 'rb') + except IOError: + if skip_unavailable_fragments and frag_index > 1: + self.to_screen('[%s] Skipping fragment %d ...' % (self.get_basename(), frag_index)) + continue + self.report_error('Unable to open fragment %d' % frag_index) + return -1 + dest.write(decrypt_fragment(fragment, src.read())) + src.close() + if not self.params.get('keep_fragments', False): + os.remove(encodeFilename(fragment_filename)) + dest.close() + os.remove(encodeFilename('%s.frag.urls' % tmpfilename)) + return 0 class CurlFD(ExternalFD): @@ -115,8 +172,10 @@ class CurlFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '--location', '-o', tmpfilename] - for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['--header', '%s: %s' % (key, val)] + cmd += self._bool_option('--continue-at', 'continuedl', '-', '0') cmd += self._valueless_option('--silent', 'noprogress') cmd += self._valueless_option('--verbose', 'verbose') @@ -141,7 +200,7 @@ class CurlFD(ExternalFD): # curl writes the progress to stderr so don't capture it. p = subprocess.Popen(cmd) - p.communicate() + process_communicate_or_kill(p) return p.returncode @@ -150,8 +209,9 @@ class AxelFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-o', tmpfilename] - for key, val in info_dict['http_headers'].items(): - cmd += ['-H', '%s: %s' % (key, val)] + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['-H', '%s: %s' % (key, val)] cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd @@ -162,8 +222,9 @@ class WgetFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] - for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['--header', '%s: %s' % (key, val)] cmd += self._option('--limit-rate', 'ratelimit') retry = self._option('--tries', 'retries') if len(retry) == 2: @@ -180,51 +241,115 @@ class WgetFD(ExternalFD): class Aria2cFD(ExternalFD): AVAILABLE_OPT = '-v' + SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'dash_frag_urls', 'm3u8_frag_urls') + + @staticmethod + def supports_manifest(manifest): + UNSUPPORTED_FEATURES = [ + r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1] + # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 + ] + check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) + return all(check_results) def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '-c'] - cmd += self._configuration_args([ - '--min-split-size', '1M', '--max-connection-per-server', '4']) - dn = os.path.dirname(tmpfilename) - if dn: - cmd += ['--dir', dn] - cmd += ['--out', os.path.basename(tmpfilename)] - for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] + cmd = [self.exe, '-c', + '--console-log-level=warn', '--summary-interval=0', '--download-result=hide', + '--file-allocation=none', '-x16', '-j16', '-s16'] + if 'fragments' in info_dict: + cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true'] + else: + cmd += ['--min-split-size', '1M'] + + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['--header', '%s: %s' % (key, val)] + cmd += self._option('--max-overall-download-limit', 'ratelimit') cmd += self._option('--interface', 'source_address') cmd += self._option('--all-proxy', 'proxy') cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=') - cmd += ['--', info_dict['url']] + cmd += self._configuration_args() + + # aria2c strips out spaces from the beginning/end of filenames and paths. + # We work around this issue by adding a "./" to the beginning of the + # filename and relative path, and adding a "/" at the end of the path. + # See: https://github.com/hypervideo/hypervideo/issues/276 + # https://github.com/ytdl-org/youtube-dl/issues/20312 + # https://github.com/aria2/aria2/issues/1373 + dn = os.path.dirname(tmpfilename) + if dn: + if not os.path.isabs(dn): + dn = '.%s%s' % (os.path.sep, dn) + cmd += ['--dir', dn + os.path.sep] + if 'fragments' not in info_dict: + cmd += ['--out', '.%s%s' % (os.path.sep, os.path.basename(tmpfilename))] + cmd += ['--auto-file-renaming=false'] + + if 'fragments' in info_dict: + cmd += ['--file-allocation=none', '--uri-selector=inorder'] + url_list_file = '%s.frag.urls' % tmpfilename + url_list = [] + for frag_index, fragment in enumerate(info_dict['fragments']): + fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index) + url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename)) + stream, _ = sanitize_open(url_list_file, 'wb') + stream.write('\n'.join(url_list).encode('utf-8')) + stream.close() + cmd += ['-i', url_list_file] + else: + cmd += ['--', info_dict['url']] return cmd class HttpieFD(ExternalFD): + AVAILABLE_OPT = '--version' + @classmethod - def available(cls): - return check_executable('http', ['--version']) + def available(cls, path=None): + return ExternalFD.available(cls, path or 'http') def _make_cmd(self, tmpfilename, info_dict): cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] - for key, val in info_dict['http_headers'].items(): - cmd += ['%s:%s' % (key, val)] + + if info_dict.get('http_headers') is not None: + for key, val in info_dict['http_headers'].items(): + cmd += ['%s:%s' % (key, val)] return cmd class FFmpegFD(ExternalFD): + SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments') + can_download_to_stdout = True + + @classmethod + def available(cls, path=None): + # TODO: Fix path for ffmpeg + # Fixme: This may be wrong when --ffmpeg-location is used + return FFmpegPostProcessor().available + @classmethod def supports(cls, info_dict): - return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms') + return all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')) + + def on_process_started(self, proc, stdin): + """ Override this in subclasses """ + pass @classmethod - def available(cls): - return FFmpegPostProcessor().available + def can_merge_formats(cls, info_dict, params): + return ( + info_dict.get('requested_formats') + and info_dict.get('protocol') + and not params.get('allow_unplayable_formats') + and 'no-direct-merge' not in params.get('compat_opts', []) + and cls.can_download(info_dict)) def _call_downloader(self, tmpfilename, info_dict): - url = info_dict['url'] + urls = [f['url'] for f in info_dict.get('requested_formats', [])] or [info_dict['url']] ffpp = FFmpegPostProcessor(downloader=self) if not ffpp.available: - self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') + self.report_error('m3u8 download detected but ffmpeg could not be found. Please install') return False ffpp.check_version() @@ -234,7 +359,12 @@ class FFmpegFD(ExternalFD): if self.params.get(log_level, False): args += ['-loglevel', log_level] break + if not self.params.get('verbose'): + args += ['-hide_banner'] + args += info_dict.get('_ffmpeg_args', []) + + # This option exists only for compatibility. Extractors should use `_ffmpeg_args` instead seekable = info_dict.get('_seekable') if seekable is not None: # setting -seekable prevents ffmpeg from guessing if the server @@ -244,8 +374,6 @@ class FFmpegFD(ExternalFD): # http://trac.ffmpeg.org/ticket/6125#comment:10 args += ['-seekable', '1' if seekable else '0'] - args += self._configuration_args() - # start_time = info_dict.get('start_time') or 0 # if start_time: # args += ['-ss', compat_str(start_time)] @@ -253,7 +381,7 @@ class FFmpegFD(ExternalFD): # if end_time: # args += ['-t', compat_str(end_time - start_time)] - if info_dict['http_headers'] and re.match(r'^https?://', url): + if info_dict.get('http_headers') is not None and re.match(r'^https?://', urls[0]): # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. headers = handle_youtubedl_headers(info_dict['http_headers']) @@ -311,13 +439,25 @@ class FFmpegFD(ExternalFD): elif isinstance(conn, compat_str): args += ['-rtmp_conn', conn] - args += ['-i', url, '-c', 'copy'] + for i, url in enumerate(urls): + args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url] + + args += ['-c', 'copy'] + if info_dict.get('requested_formats') or protocol == 'http_dash_segments': + for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]): + stream_number = fmt.get('manifest_stream_number', 0) + a_or_v = 'a' if fmt.get('acodec') != 'none' else 'v' + args.extend(['-map', f'{i}:{a_or_v}:{stream_number}']) if self.params.get('test', False): args += ['-fs', compat_str(self._TEST_FILE_SIZE)] + ext = info_dict['ext'] if protocol in ('m3u8', 'm3u8_native'): - if self.params.get('hls_use_mpegts', False) or tmpfilename == '-': + use_mpegts = (tmpfilename == '-') or self.params.get('hls_use_mpegts') + if use_mpegts is None: + use_mpegts = info_dict.get('is_live') + if use_mpegts: args += ['-f', 'mpegts'] else: args += ['-f', 'mp4'] @@ -325,25 +465,33 @@ class FFmpegFD(ExternalFD): args += ['-bsf:a', 'aac_adtstoasc'] elif protocol == 'rtmp': args += ['-f', 'flv'] + elif ext == 'mp4' and tmpfilename == '-': + args += ['-f', 'mpegts'] else: - args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])] + args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)] + + args += self._configuration_args(('_o1', '_o', '')) args = [encodeArgument(opt) for opt in args] args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) - self._debug_cmd(args) proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env) + if url in ('-', 'pipe:'): + self.on_process_started(proc, proc.stdin) try: retval = proc.wait() - except KeyboardInterrupt: + except BaseException as e: # subprocces.run would send the SIGKILL signal to ffmpeg and the # mp4 file couldn't be played, but if we ask ffmpeg to quit it # produces a file that is playable (this is mostly useful for live # streams). Note that Windows is not affected and produces playable # files (see https://github.com/ytdl-org/youtube-dl/issues/8300). - if sys.platform != 'win32': - proc.communicate(b'q') + if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'): + process_communicate_or_kill(proc, b'q') + else: + proc.kill() + proc.wait() raise return retval @@ -355,7 +503,7 @@ class AVconvFD(FFmpegFD): _BY_NAME = dict( (klass.get_basename(), klass) for name, klass in globals().items() - if name.endswith('FD') and name != 'ExternalFD' + if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD') ) @@ -368,4 +516,4 @@ def get_external_downloader(external_downloader): downloader . """ # Drop .exe extension on Windows bn = os.path.splitext(os.path.basename(external_downloader))[0] - return _BY_NAME[bn] + return _BY_NAME.get(bn) diff --git a/hypervideo_dl/downloader/f4m.py b/hypervideo_dl/downloader/f4m.py index 8dd3c2e..9da2776 100644 --- a/hypervideo_dl/downloader/f4m.py +++ b/hypervideo_dl/downloader/f4m.py @@ -267,13 +267,14 @@ class F4mFD(FragmentFD): media = doc.findall(_add_ns('media')) if not media: self.report_error('No media found') - for e in (doc.findall(_add_ns('drmAdditionalHeader')) - + doc.findall(_add_ns('drmAdditionalHeaderSet'))): - # If id attribute is missing it's valid for all media nodes - # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute - if 'id' not in e.attrib: - self.report_error('Missing ID in f4m DRM') - media = remove_encrypted_media(media) + if not self.params.get('allow_unplayable_formats'): + for e in (doc.findall(_add_ns('drmAdditionalHeader')) + + doc.findall(_add_ns('drmAdditionalHeaderSet'))): + # If id attribute is missing it's valid for all media nodes + # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute + if 'id' not in e.attrib: + self.report_error('Missing ID in f4m DRM') + media = remove_encrypted_media(media) if not media: self.report_error('Unsupported DRM') return media @@ -379,7 +380,7 @@ class F4mFD(FragmentFD): base_url_parsed = compat_urllib_parse_urlparse(base_url) - self._start_frag_download(ctx) + self._start_frag_download(ctx, info_dict) frag_index = 0 while fragments_list: @@ -433,6 +434,6 @@ class F4mFD(FragmentFD): msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1)) self.report_warning(msg) - self._finish_frag_download(ctx) + self._finish_frag_download(ctx, info_dict) return True diff --git a/hypervideo_dl/downloader/fragment.py b/hypervideo_dl/downloader/fragment.py index b82e3cf..57068db 100644 --- a/hypervideo_dl/downloader/fragment.py +++ b/hypervideo_dl/downloader/fragment.py @@ -3,10 +3,23 @@ from __future__ import division, unicode_literals import os import time import json +from math import ceil + +try: + import concurrent.futures + can_threaded_download = True +except ImportError: + can_threaded_download = False from .common import FileDownloader from .http import HttpFD +from ..aes import aes_cbc_decrypt_bytes +from ..compat import ( + compat_urllib_error, + compat_struct_pack, +) from ..utils import ( + DownloadError, error_to_compat_str, encodeFilename, sanitize_open, @@ -31,6 +44,7 @@ class FragmentFD(FileDownloader): Skip unavailable fragments (DASH and hlsnative only) keep_fragments: Keep downloaded fragments on disk after downloading is finished + _no_ytdl_file: Don't use .ytdl file For each incomplete fragment download hypervideo keeps on disk a special bookkeeping file with download state and metadata (in future such files will @@ -55,29 +69,31 @@ class FragmentFD(FileDownloader): def report_retry_fragment(self, err, frag_index, count, retries): self.to_screen( - '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...' + '\r[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...' % (error_to_compat_str(err), frag_index, count, self.format_retries(retries))) def report_skip_fragment(self, frag_index): - self.to_screen('[download] Skipping fragment %d...' % frag_index) + self.to_screen('[download] Skipping fragment %d ...' % frag_index) def _prepare_url(self, info_dict, url): headers = info_dict.get('http_headers') return sanitized_Request(url, None, headers) if headers else url - def _prepare_and_start_frag_download(self, ctx): + def _prepare_and_start_frag_download(self, ctx, info_dict): self._prepare_frag_download(ctx) - self._start_frag_download(ctx) + self._start_frag_download(ctx, info_dict) - @staticmethod - def __do_ytdl_file(ctx): - return not ctx['live'] and not ctx['tmpfilename'] == '-' + def __do_ytdl_file(self, ctx): + return not ctx['live'] and not ctx['tmpfilename'] == '-' and not self.params.get('_no_ytdl_file') def _read_ytdl_file(self, ctx): assert 'ytdl_corrupt' not in ctx stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r') try: - ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index'] + ytdl_data = json.loads(stream.read()) + ctx['fragment_index'] = ytdl_data['downloader']['current_fragment']['index'] + if 'extra_state' in ytdl_data['downloader']: + ctx['extra_state'] = ytdl_data['downloader']['extra_state'] except Exception: ctx['ytdl_corrupt'] = True finally: @@ -85,32 +101,42 @@ class FragmentFD(FileDownloader): def _write_ytdl_file(self, ctx): frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w') - downloader = { - 'current_fragment': { - 'index': ctx['fragment_index'], - }, - } - if ctx.get('fragment_count') is not None: - downloader['fragment_count'] = ctx['fragment_count'] - frag_index_stream.write(json.dumps({'downloader': downloader})) - frag_index_stream.close() + try: + downloader = { + 'current_fragment': { + 'index': ctx['fragment_index'], + }, + } + if 'extra_state' in ctx: + downloader['extra_state'] = ctx['extra_state'] + if ctx.get('fragment_count') is not None: + downloader['fragment_count'] = ctx['fragment_count'] + frag_index_stream.write(json.dumps({'downloader': downloader})) + finally: + frag_index_stream.close() - def _download_fragment(self, ctx, frag_url, info_dict, headers=None): + def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None): fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index']) fragment_info_dict = { 'url': frag_url, 'http_headers': headers or info_dict.get('http_headers'), + 'request_data': request_data, + 'ctx_id': ctx.get('ctx_id'), } success = ctx['dl'].download(fragment_filename, fragment_info_dict) if not success: return False, None if fragment_info_dict.get('filetime'): ctx['fragment_filetime'] = fragment_info_dict.get('filetime') - down, frag_sanitized = sanitize_open(fragment_filename, 'rb') + ctx['fragment_filename_sanitized'] = fragment_filename + return True, self._read_fragment(ctx) + + def _read_fragment(self, ctx): + down, frag_sanitized = sanitize_open(ctx['fragment_filename_sanitized'], 'rb') ctx['fragment_filename_sanitized'] = frag_sanitized frag_content = down.read() down.close() - return True, frag_content + return frag_content def _append_fragment(self, ctx, frag_content): try: @@ -173,7 +199,7 @@ class FragmentFD(FileDownloader): '.ytdl file is corrupt' if is_corrupt else 'Inconsistent state of incomplete fragment download') self.report_warning( - '%s. Restarting from the beginning...' % message) + '%s. Restarting from the beginning ...' % message) ctx['fragment_index'] = resume_len = 0 if 'ytdl_corrupt' in ctx: del ctx['ytdl_corrupt'] @@ -192,9 +218,10 @@ class FragmentFD(FileDownloader): 'complete_frags_downloaded_bytes': resume_len, }) - def _start_frag_download(self, ctx): + def _start_frag_download(self, ctx, info_dict): resume_len = ctx['complete_frags_downloaded_bytes'] total_frags = ctx['total_frags'] + ctx_id = ctx.get('ctx_id') # This dict stores the download progress, it's updated by the progress # hook state = { @@ -218,9 +245,16 @@ class FragmentFD(FileDownloader): if s['status'] not in ('downloading', 'finished'): return + if ctx_id is not None and s.get('ctx_id') != ctx_id: + return + + state['max_progress'] = ctx.get('max_progress') + state['progress_idx'] = ctx.get('progress_idx') + time_now = time.time() state['elapsed'] = time_now - start frag_total_bytes = s.get('total_bytes') or 0 + s['fragment_info_dict'] = s.pop('info_dict', {}) if not ctx['live']: estimated_size = ( (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) @@ -243,13 +277,13 @@ class FragmentFD(FileDownloader): state['speed'] = s.get('speed') or ctx.get('speed') ctx['speed'] = state['speed'] ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes - self._hook_progress(state) + self._hook_progress(state, info_dict) ctx['dl'].add_progress_hook(frag_progress_hook) return start - def _finish_frag_download(self, ctx): + def _finish_frag_download(self, ctx, info_dict): ctx['dest_stream'].close() if self.__do_ytdl_file(ctx): ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) @@ -276,4 +310,177 @@ class FragmentFD(FileDownloader): 'filename': ctx['filename'], 'status': 'finished', 'elapsed': elapsed, + 'ctx_id': ctx.get('ctx_id'), + 'max_progress': ctx.get('max_progress'), + 'progress_idx': ctx.get('progress_idx'), + }, info_dict) + + def _prepare_external_frag_download(self, ctx): + if 'live' not in ctx: + ctx['live'] = False + if not ctx['live']: + total_frags_str = '%d' % ctx['total_frags'] + ad_frags = ctx.get('ad_frags', 0) + if ad_frags: + total_frags_str += ' (not including %d ad)' % ad_frags + else: + total_frags_str = 'unknown (live)' + self.to_screen( + '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) + + tmpfilename = self.temp_name(ctx['filename']) + + # Should be initialized before ytdl file check + ctx.update({ + 'tmpfilename': tmpfilename, + 'fragment_index': 0, }) + + def decrypter(self, info_dict): + _key_cache = {} + + def _get_key(url): + if url not in _key_cache: + _key_cache[url] = self.ydl.urlopen(self._prepare_url(info_dict, url)).read() + return _key_cache[url] + + def decrypt_fragment(fragment, frag_content): + decrypt_info = fragment.get('decrypt_info') + if not decrypt_info or decrypt_info['METHOD'] != 'AES-128': + return frag_content + iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence']) + decrypt_info['KEY'] = decrypt_info.get('KEY') or _get_key(info_dict.get('_decryption_key_url') or decrypt_info['URI']) + # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block + # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded, + # not what it decrypts to. + if self.params.get('test', False): + return frag_content + padding_len = 16 - (len(frag_content) % 16) + decrypted_data = aes_cbc_decrypt_bytes(frag_content + bytes([padding_len] * padding_len), decrypt_info['KEY'], iv) + return decrypted_data[:-decrypted_data[-1]] + + return decrypt_fragment + + def download_and_append_fragments_multiple(self, *args, pack_func=None, finish_func=None): + ''' + @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ... + all args must be either tuple or list + ''' + max_progress = len(args) + if max_progress == 1: + return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func) + max_workers = self.params.get('concurrent_fragment_downloads', max_progress) + self._prepare_multiline_status(max_progress) + + def thread_func(idx, ctx, fragments, info_dict, tpe): + ctx['max_progress'] = max_progress + ctx['progress_idx'] = idx + return self.download_and_append_fragments(ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func, tpe=tpe) + + class FTPE(concurrent.futures.ThreadPoolExecutor): + # has to stop this or it's going to wait on the worker thread itself + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + spins = [] + for idx, (ctx, fragments, info_dict) in enumerate(args): + tpe = FTPE(ceil(max_workers / max_progress)) + job = tpe.submit(thread_func, idx, ctx, fragments, info_dict, tpe) + spins.append((tpe, job)) + + result = True + for tpe, job in spins: + try: + result = result and job.result() + finally: + tpe.shutdown(wait=True) + return result + + def download_and_append_fragments(self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None, tpe=None): + fragment_retries = self.params.get('fragment_retries', 0) + is_fatal = (lambda idx: idx == 0) if self.params.get('skip_unavailable_fragments', True) else (lambda _: True) + if not pack_func: + pack_func = lambda frag_content, _: frag_content + + def download_fragment(fragment, ctx): + frag_index = ctx['fragment_index'] = fragment['frag_index'] + headers = info_dict.get('http_headers', {}).copy() + byte_range = fragment.get('byte_range') + if byte_range: + headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) + + # Never skip the first fragment + fatal = is_fatal(fragment.get('index') or (frag_index - 1)) + count, frag_content = 0, None + while count <= fragment_retries: + try: + success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers) + if not success: + return False, frag_index + break + except compat_urllib_error.HTTPError as err: + # Unavailable (possibly temporary) fragments may be served. + # First we try to retry then either skip or abort. + # See https://github.com/ytdl-org/youtube-dl/issues/10165, + # https://github.com/ytdl-org/youtube-dl/issues/10448). + count += 1 + if count <= fragment_retries: + self.report_retry_fragment(err, frag_index, count, fragment_retries) + except DownloadError: + # Don't retry fragment if error occurred during HTTP downloading + # itself since it has own retry settings + if not fatal: + break + raise + + if count > fragment_retries: + if not fatal: + return False, frag_index + ctx['dest_stream'].close() + self.report_error('Giving up after %s fragment retries' % fragment_retries) + return False, frag_index + return frag_content, frag_index + + def append_fragment(frag_content, frag_index, ctx): + if not frag_content: + if not is_fatal(frag_index - 1): + self.report_skip_fragment(frag_index) + return True + else: + ctx['dest_stream'].close() + self.report_error( + 'fragment %s not found, unable to continue' % frag_index) + return False + self._append_fragment(ctx, pack_func(frag_content, frag_index)) + return True + + decrypt_fragment = self.decrypter(info_dict) + + max_workers = self.params.get('concurrent_fragment_downloads', 1) + if can_threaded_download and max_workers > 1: + + def _download_fragment(fragment): + ctx_copy = ctx.copy() + frag_content, frag_index = download_fragment(fragment, ctx_copy) + return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized') + + self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome') + with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: + for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments): + ctx['fragment_filename_sanitized'] = frag_filename + ctx['fragment_index'] = frag_index + result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) + if not result: + return False + else: + for fragment in fragments: + frag_content, frag_index = download_fragment(fragment, ctx) + result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) + if not result: + return False + + if finish_func is not None: + ctx['dest_stream'].write(finish_func()) + ctx['dest_stream'].flush() + self._finish_frag_download(ctx, info_dict) + return True diff --git a/hypervideo_dl/downloader/hls.py b/hypervideo_dl/downloader/hls.py index 7aaebc9..61312c5 100644 --- a/hypervideo_dl/downloader/hls.py +++ b/hypervideo_dl/downloader/hls.py @@ -1,36 +1,37 @@ from __future__ import unicode_literals import re +import io import binascii -try: - from Crypto.Cipher import AES - can_decrypt_frag = True -except ImportError: - can_decrypt_frag = False +from ..downloader import get_suitable_downloader from .fragment import FragmentFD from .external import FFmpegFD from ..compat import ( - compat_urllib_error, + compat_pycrypto_AES, compat_urlparse, - compat_struct_pack, ) from ..utils import ( parse_m3u8_attributes, update_url_query, + bug_reports_message, ) +from .. import webvtt class HlsFD(FragmentFD): - """ A limited implementation that does not require ffmpeg """ + """ + Download segments in a m3u8 manifest. External downloaders can take over + the fragment downloads by supporting the 'm3u8_frag_urls' protocol and + re-defining 'supports_manifest' function + """ FD_NAME = 'hlsnative' @staticmethod - def can_download(manifest, info_dict): - UNSUPPORTED_FEATURES = ( - r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] + def can_download(manifest, info_dict, allow_unplayable_formats=False): + UNSUPPORTED_FEATURES = [ # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] # Live streams heuristic does not always work (e.g. geo restricted to Germany @@ -42,20 +43,23 @@ class HlsFD(FragmentFD): # no segments will definitely be appended to the end of the playlist. # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of # # event media playlists [4] - r'#EXT-X-MAP:', # media initialization [5] - + # r'#EXT-X-MAP:', # media initialization [5] # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5 - ) - check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] - is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest - check_results.append(can_decrypt_frag or not is_aes128_enc) - check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest)) - check_results.append(not info_dict.get('is_live')) - return all(check_results) + ] + if not allow_unplayable_formats: + UNSUPPORTED_FEATURES += [ + r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] + ] + + def check_results(): + yield not info_dict.get('is_live') + for feature in UNSUPPORTED_FEATURES: + yield not re.search(feature, manifest) + return all(check_results()) def real_download(self, filename, info_dict): man_url = info_dict['url'] @@ -65,17 +69,32 @@ class HlsFD(FragmentFD): man_url = urlh.geturl() s = urlh.read().decode('utf-8', 'ignore') - if not self.can_download(s, info_dict): - if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'): - self.report_error('pycrypto not found. Please install it.') - return False - self.report_warning( - 'hlsnative has detected features it does not support, ' - 'extraction will be delegated to ffmpeg') + can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None + if can_download and not compat_pycrypto_AES and '#EXT-X-KEY:METHOD=AES-128' in s: + if FFmpegFD.available(): + can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available' + else: + message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; ' + 'Decryption will be performed natively, but will be extremely slow') + if not can_download: + message = message or 'Unsupported features have been detected' fd = FFmpegFD(self.ydl, self.params) - for ph in self._progress_hooks: - fd.add_progress_hook(ph) + self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}') return fd.real_download(filename, info_dict) + elif message: + self.report_warning(message) + + is_webvtt = info_dict['ext'] == 'vtt' + if is_webvtt: + real_downloader = None # Packing the fragments is not currently supported for external downloader + else: + real_downloader = get_suitable_downloader( + info_dict, self.params, None, protocol='m3u8_frag_urls', to_stdout=(filename == '-')) + if real_downloader and not real_downloader.supports_manifest(s): + real_downloader = None + if real_downloader: + self.to_screen( + '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) def is_ad_fragment_start(s): return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s @@ -85,6 +104,8 @@ class HlsFD(FragmentFD): return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) + fragments = [] + media_frags = 0 ad_frags = 0 ad_frag_next = False @@ -109,12 +130,14 @@ class HlsFD(FragmentFD): 'ad_frags': ad_frags, } - self._prepare_and_start_frag_download(ctx) + if real_downloader: + self._prepare_external_frag_download(ctx) + else: + self._prepare_and_start_frag_download(ctx, info_dict) - fragment_retries = self.params.get('fragment_retries', 0) - skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - test = self.params.get('test', False) + extra_state = ctx.setdefault('extra_state', {}) + format_index = info_dict.get('format_index') extra_query = None extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') if extra_param_to_segment_url: @@ -123,12 +146,15 @@ class HlsFD(FragmentFD): media_sequence = 0 decrypt_info = {'METHOD': 'NONE'} byte_range = {} + discontinuity_count = 0 frag_index = 0 ad_frag_next = False for line in s.splitlines(): line = line.strip() if line: if not line.startswith('#'): + if format_index and discontinuity_count != format_index: + continue if ad_frag_next: continue frag_index += 1 @@ -140,50 +166,49 @@ class HlsFD(FragmentFD): else compat_urlparse.urljoin(man_url, line)) if extra_query: frag_url = update_url_query(frag_url, extra_query) - count = 0 - headers = info_dict.get('http_headers', {}) - if byte_range: - headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) - while count <= fragment_retries: - try: - success, frag_content = self._download_fragment( - ctx, frag_url, info_dict, headers) - if not success: - return False - break - except compat_urllib_error.HTTPError as err: - # Unavailable (possibly temporary) fragments may be served. - # First we try to retry then either skip or abort. - # See https://github.com/ytdl-org/youtube-dl/issues/10165, - # https://github.com/ytdl-org/youtube-dl/issues/10448). - count += 1 - if count <= fragment_retries: - self.report_retry_fragment(err, frag_index, count, fragment_retries) - if count > fragment_retries: - if skip_unavailable_fragments: - i += 1 - media_sequence += 1 - self.report_skip_fragment(frag_index) - continue + + fragments.append({ + 'frag_index': frag_index, + 'url': frag_url, + 'decrypt_info': decrypt_info, + 'byte_range': byte_range, + 'media_sequence': media_sequence, + }) + media_sequence += 1 + + elif line.startswith('#EXT-X-MAP'): + if format_index and discontinuity_count != format_index: + continue + if frag_index > 0: self.report_error( - 'giving up after %s fragment retries' % fragment_retries) + 'Initialization fragment found after media fragments, unable to download') return False - if decrypt_info['METHOD'] == 'AES-128': - iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) - decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( - self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read() - # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block - # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded, - # not what it decrypts to. - if not test: - frag_content = AES.new( - decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) - self._append_fragment(ctx, frag_content) - # We only download the first fragment during the test - if test: - break - i += 1 + frag_index += 1 + map_info = parse_m3u8_attributes(line[11:]) + frag_url = ( + map_info.get('URI') + if re.match(r'^https?://', map_info.get('URI')) + else compat_urlparse.urljoin(man_url, map_info.get('URI'))) + if extra_query: + frag_url = update_url_query(frag_url, extra_query) + + fragments.append({ + 'frag_index': frag_index, + 'url': frag_url, + 'decrypt_info': decrypt_info, + 'byte_range': byte_range, + 'media_sequence': media_sequence + }) media_sequence += 1 + + if map_info.get('BYTERANGE'): + splitted_byte_range = map_info.get('BYTERANGE').split('@') + sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] + byte_range = { + 'start': sub_range_start, + 'end': sub_range_start + int(splitted_byte_range[0]), + } + elif line.startswith('#EXT-X-KEY'): decrypt_url = decrypt_info.get('URI') decrypt_info = parse_m3u8_attributes(line[11:]) @@ -197,6 +222,7 @@ class HlsFD(FragmentFD): decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) if decrypt_url != decrypt_info['URI']: decrypt_info['KEY'] = None + elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): media_sequence = int(line[22:]) elif line.startswith('#EXT-X-BYTERANGE'): @@ -210,7 +236,114 @@ class HlsFD(FragmentFD): ad_frag_next = True elif is_ad_fragment_end(line): ad_frag_next = False + elif line.startswith('#EXT-X-DISCONTINUITY'): + discontinuity_count += 1 + i += 1 + + # We only download the first fragment during the test + if self.params.get('test', False): + fragments = [fragments[0] if fragments else None] + + if real_downloader: + info_dict['fragments'] = fragments + fd = real_downloader(self.ydl, self.params) + # TODO: Make progress updates work without hooking twice + # for ph in self._progress_hooks: + # fd.add_progress_hook(ph) + return fd.real_download(filename, info_dict) + + if is_webvtt: + def pack_fragment(frag_content, frag_index): + output = io.StringIO() + adjust = 0 + overflow = False + mpegts_last = None + for block in webvtt.parse_fragment(frag_content): + if isinstance(block, webvtt.CueBlock): + extra_state['webvtt_mpegts_last'] = mpegts_last + if overflow: + extra_state['webvtt_mpegts_adjust'] += 1 + overflow = False + block.start += adjust + block.end += adjust + + dedup_window = extra_state.setdefault('webvtt_dedup_window', []) + + ready = [] + + i = 0 + is_new = True + while i < len(dedup_window): + wcue = dedup_window[i] + wblock = webvtt.CueBlock.from_json(wcue) + i += 1 + if wblock.hinges(block): + wcue['end'] = block.end + is_new = False + continue + if wblock == block: + is_new = False + continue + if wblock.end > block.start: + continue + ready.append(wblock) + i -= 1 + del dedup_window[i] + + if is_new: + dedup_window.append(block.as_json) + for block in ready: + block.write_into(output) + + # we only emit cues once they fall out of the duplicate window + continue + elif isinstance(block, webvtt.Magic): + # take care of MPEG PES timestamp overflow + if block.mpegts is None: + block.mpegts = 0 + extra_state.setdefault('webvtt_mpegts_adjust', 0) + block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33 + if block.mpegts < extra_state.get('webvtt_mpegts_last', 0): + overflow = True + block.mpegts += 1 << 33 + mpegts_last = block.mpegts + + if frag_index == 1: + extra_state['webvtt_mpegts'] = block.mpegts or 0 + extra_state['webvtt_local'] = block.local or 0 + # XXX: block.local = block.mpegts = None ? + else: + if block.mpegts is not None and block.local is not None: + adjust = ( + (block.mpegts - extra_state.get('webvtt_mpegts', 0)) + - (block.local - extra_state.get('webvtt_local', 0)) + ) + continue + elif isinstance(block, webvtt.HeaderBlock): + if frag_index != 1: + # XXX: this should probably be silent as well + # or verify that all segments contain the same data + self.report_warning(bug_reports_message( + 'Discarding a %s block found in the middle of the stream; ' + 'if the subtitles display incorrectly,' + % (type(block).__name__))) + continue + block.write_into(output) + + return output.getvalue().encode('utf-8') + + def fin_fragments(): + dedup_window = extra_state.get('webvtt_dedup_window') + if not dedup_window: + return b'' + + output = io.StringIO() + for cue in dedup_window: + webvtt.CueBlock.from_json(cue).write_into(output) - self._finish_frag_download(ctx) + return output.getvalue().encode('utf-8') - return True + self.download_and_append_fragments( + ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments) + else: + return self.download_and_append_fragments(ctx, fragments, info_dict) diff --git a/hypervideo_dl/downloader/http.py b/hypervideo_dl/downloader/http.py index d8ac41d..2e95bb9 100644 --- a/hypervideo_dl/downloader/http.py +++ b/hypervideo_dl/downloader/http.py @@ -18,6 +18,7 @@ from ..utils import ( int_or_none, sanitize_open, sanitized_Request, + ThrottledDownload, write_xattr, XAttrMetadataError, XAttrUnavailableError, @@ -27,6 +28,7 @@ from ..utils import ( class HttpFD(FileDownloader): def real_download(self, filename, info_dict): url = info_dict['url'] + request_data = info_dict.get('request_data', None) class DownloadContext(dict): __getattr__ = dict.get @@ -46,8 +48,9 @@ class HttpFD(FileDownloader): is_test = self.params.get('test', False) chunk_size = self._TEST_FILE_SIZE if is_test else ( - info_dict.get('downloader_options', {}).get('http_chunk_size') - or self.params.get('http_chunk_size') or 0) + self.params.get('http_chunk_size') + or info_dict.get('downloader_options', {}).get('http_chunk_size') + or 0) ctx.open_mode = 'wb' ctx.resume_len = 0 @@ -55,6 +58,7 @@ class HttpFD(FileDownloader): ctx.block_size = self.params.get('buffersize', 1024) ctx.start_time = time.time() ctx.chunk_size = None + throttle_start = None if self.params.get('continuedl', True): # Establish possible resume length @@ -101,7 +105,7 @@ class HttpFD(FileDownloader): range_end = ctx.data_len - 1 has_range = range_start is not None ctx.has_range = has_range - request = sanitized_Request(url, None, headers) + request = sanitized_Request(url, request_data, headers) if has_range: set_range(request, range_start, range_end) # Establish connection @@ -152,7 +156,7 @@ class HttpFD(FileDownloader): try: # Open the connection again without the range header ctx.data = self.ydl.urlopen( - sanitized_Request(url, None, headers)) + sanitized_Request(url, request_data, headers)) content_length = ctx.data.info()['Content-Length'] except (compat_urllib_error.HTTPError, ) as err: if err.code < 500 or err.code >= 600: @@ -175,7 +179,7 @@ class HttpFD(FileDownloader): 'status': 'finished', 'downloaded_bytes': ctx.resume_len, 'total_bytes': ctx.resume_len, - }) + }, info_dict) raise SucceedDownload() else: # The length does not match, we start the download over @@ -194,6 +198,7 @@ class HttpFD(FileDownloader): raise RetryDownload(err) def download(): + nonlocal throttle_start data_len = ctx.data.info().get('Content-length', None) # Range HTTP header may be ignored/unsupported by a webserver @@ -235,7 +240,7 @@ class HttpFD(FileDownloader): while True: try: # Download and write - data_block = ctx.data.read(block_size if data_len is None else min(block_size, data_len - byte_counter)) + data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) # socket.timeout is a subclass of socket.error but may not have # errno set except socket.timeout as e: @@ -307,11 +312,24 @@ class HttpFD(FileDownloader): 'eta': eta, 'speed': speed, 'elapsed': now - ctx.start_time, - }) + 'ctx_id': info_dict.get('ctx_id'), + }, info_dict) if data_len is not None and byte_counter == data_len: break + if speed and speed < (self.params.get('throttledratelimit') or 0): + # The speed must stay below the limit for 3 seconds + # This prevents raising error when the speed temporarily goes down + if throttle_start is None: + throttle_start = now + elif now - throttle_start > 3: + if ctx.stream is not None and ctx.tmpfilename != '-': + ctx.stream.close() + raise ThrottledDownload() + elif speed: + throttle_start = None + if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len: ctx.resume_len = byte_counter # ctx.block_size = block_size @@ -342,7 +360,8 @@ class HttpFD(FileDownloader): 'filename': ctx.filename, 'status': 'finished', 'elapsed': time.time() - ctx.start_time, - }) + 'ctx_id': info_dict.get('ctx_id'), + }, info_dict) return True @@ -354,6 +373,8 @@ class HttpFD(FileDownloader): count += 1 if count <= retries: self.report_retry(e.source_error, count, retries) + else: + self.to_screen(f'[download] Got server HTTP error: {e.source_error}') continue except NextFragment: continue diff --git a/hypervideo_dl/downloader/ism.py b/hypervideo_dl/downloader/ism.py index 1ca666b..09516ab 100644 --- a/hypervideo_dl/downloader/ism.py +++ b/hypervideo_dl/downloader/ism.py @@ -48,7 +48,7 @@ def write_piff_header(stream, params): language = params.get('language', 'und') height = params.get('height', 0) width = params.get('width', 0) - is_audio = width == 0 and height == 0 + stream_type = params['stream_type'] creation_time = modification_time = int(time.time()) ftyp_payload = b'isml' # major brand @@ -77,7 +77,7 @@ def write_piff_header(stream, params): tkhd_payload += u32.pack(0) * 2 # reserved tkhd_payload += s16.pack(0) # layer tkhd_payload += s16.pack(0) # alternate group - tkhd_payload += s88.pack(1 if is_audio else 0) # volume + tkhd_payload += s88.pack(1 if stream_type == 'audio' else 0) # volume tkhd_payload += u16.pack(0) # reserved tkhd_payload += unity_matrix tkhd_payload += u1616.pack(width) @@ -93,19 +93,34 @@ def write_piff_header(stream, params): mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box hdlr_payload = u32.pack(0) # pre defined - hdlr_payload += b'soun' if is_audio else b'vide' # handler type - hdlr_payload += u32.pack(0) * 3 # reserved - hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name + if stream_type == 'audio': # handler type + hdlr_payload += b'soun' + hdlr_payload += u32.pack(0) * 3 # reserved + hdlr_payload += b'SoundHandler\0' # name + elif stream_type == 'video': + hdlr_payload += b'vide' + hdlr_payload += u32.pack(0) * 3 # reserved + hdlr_payload += b'VideoHandler\0' # name + elif stream_type == 'text': + hdlr_payload += b'subt' + hdlr_payload += u32.pack(0) * 3 # reserved + hdlr_payload += b'SubtitleHandler\0' # name + else: + assert False mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box - if is_audio: + if stream_type == 'audio': smhd_payload = s88.pack(0) # balance smhd_payload += u16.pack(0) # reserved media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header - else: + elif stream_type == 'video': vmhd_payload = u16.pack(0) # graphics mode vmhd_payload += u16.pack(0) * 3 # opcolor media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header + elif stream_type == 'text': + media_header_box = full_box(b'sthd', 0, 0, b'') # Subtitle Media Header + else: + assert False minf_payload = media_header_box dref_payload = u32.pack(1) # entry count @@ -117,7 +132,7 @@ def write_piff_header(stream, params): sample_entry_payload = u8.pack(0) * 6 # reserved sample_entry_payload += u16.pack(1) # data reference index - if is_audio: + if stream_type == 'audio': sample_entry_payload += u32.pack(0) * 2 # reserved sample_entry_payload += u16.pack(params.get('channels', 2)) sample_entry_payload += u16.pack(params.get('bits_per_sample', 16)) @@ -127,7 +142,7 @@ def write_piff_header(stream, params): if fourcc == 'AACL': sample_entry_box = box(b'mp4a', sample_entry_payload) - else: + elif stream_type == 'video': sample_entry_payload += u16.pack(0) # pre defined sample_entry_payload += u16.pack(0) # reserved sample_entry_payload += u32.pack(0) * 3 # pre defined @@ -155,6 +170,18 @@ def write_piff_header(stream, params): avcc_payload += pps sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry + else: + assert False + elif stream_type == 'text': + if fourcc == 'TTML': + sample_entry_payload += b'http://www.w3.org/ns/ttml\0' # namespace + sample_entry_payload += b'\0' # schema location + sample_entry_payload += b'\0' # auxilary mime types(??) + sample_entry_box = box(b'stpp', sample_entry_payload) + else: + assert False + else: + assert False stsd_payload += sample_entry_box stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box @@ -219,12 +246,15 @@ class IsmFD(FragmentFD): 'total_frags': len(segments), } - self._prepare_and_start_frag_download(ctx) + self._prepare_and_start_frag_download(ctx, info_dict) + + extra_state = ctx.setdefault('extra_state', { + 'ism_track_written': False, + }) fragment_retries = self.params.get('fragment_retries', 0) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - track_written = False frag_index = 0 for i, segment in enumerate(segments): frag_index += 1 @@ -236,11 +266,11 @@ class IsmFD(FragmentFD): success, frag_content = self._download_fragment(ctx, segment['url'], info_dict) if not success: return False - if not track_written: + if not extra_state['ism_track_written']: tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd']) info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0] write_piff_header(ctx['dest_stream'], info_dict['_download_params']) - track_written = True + extra_state['ism_track_written'] = True self._append_fragment(ctx, frag_content) break except compat_urllib_error.HTTPError as err: @@ -254,6 +284,6 @@ class IsmFD(FragmentFD): self.report_error('giving up after %s fragment retries' % fragment_retries) return False - self._finish_frag_download(ctx) + self._finish_frag_download(ctx, info_dict) return True diff --git a/hypervideo_dl/downloader/mhtml.py b/hypervideo_dl/downloader/mhtml.py new file mode 100644 index 0000000..f0f4dc6 --- /dev/null +++ b/hypervideo_dl/downloader/mhtml.py @@ -0,0 +1,202 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import io +import quopri +import re +import uuid + +from .fragment import FragmentFD +from ..utils import ( + escapeHTML, + formatSeconds, + srt_subtitles_timecode, + urljoin, +) +from ..version import __version__ as YT_DLP_VERSION + + +class MhtmlFD(FragmentFD): + FD_NAME = 'mhtml' + + _STYLESHEET = """\ +html, body { + margin: 0; + padding: 0; + height: 100vh; +} + +html { + overflow-y: scroll; + scroll-snap-type: y mandatory; +} + +body { + scroll-snap-type: y mandatory; + display: flex; + flex-flow: column; +} + +body > figure { + max-width: 100vw; + max-height: 100vh; + scroll-snap-align: center; +} + +body > figure > figcaption { + text-align: center; + height: 2.5em; +} + +body > figure > img { + display: block; + margin: auto; + max-width: 100%; + max-height: calc(100vh - 5em); +} +""" + _STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET) + _STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET) + + @staticmethod + def _escape_mime(s): + return '=?utf-8?Q?' + (b''.join( + bytes((b,)) if b >= 0x20 else b'=%02X' % b + for b in quopri.encodestring(s.encode('utf-8'), header=True) + )).decode('us-ascii') + '?=' + + def _gen_cid(self, i, fragment, frag_boundary): + return '%u.%s@hypervideo.github.io.invalid' % (i, frag_boundary) + + def _gen_stub(self, *, fragments, frag_boundary, title): + output = io.StringIO() + + output.write(( + '<!DOCTYPE html>' + '<html>' + '<head>' + '' '<meta name="generator" content="hypervideo {version}">' + '' '<title>{title}</title>' + '' '<style>{styles}</style>' + '<body>' + ).format( + version=escapeHTML(YT_DLP_VERSION), + styles=self._STYLESHEET, + title=escapeHTML(title) + )) + + t0 = 0 + for i, frag in enumerate(fragments): + output.write('<figure>') + try: + t1 = t0 + frag['duration'] + output.write(( + '<figcaption>Slide #{num}: {t0} – {t1} (duration: {duration})</figcaption>' + ).format( + num=i + 1, + t0=srt_subtitles_timecode(t0), + t1=srt_subtitles_timecode(t1), + duration=formatSeconds(frag['duration'], msec=True) + )) + except (KeyError, ValueError, TypeError): + t1 = None + output.write(( + '<figcaption>Slide #{num}</figcaption>' + ).format(num=i + 1)) + output.write('<img src="cid:{cid}">'.format( + cid=self._gen_cid(i, frag, frag_boundary))) + output.write('</figure>') + t0 = t1 + + return output.getvalue() + + def real_download(self, filename, info_dict): + fragment_base_url = info_dict.get('fragment_base_url') + fragments = info_dict['fragments'][:1] if self.params.get( + 'test', False) else info_dict['fragments'] + title = info_dict['title'] + origin = info_dict['webpage_url'] + + ctx = { + 'filename': filename, + 'total_frags': len(fragments), + } + + self._prepare_and_start_frag_download(ctx, info_dict) + + extra_state = ctx.setdefault('extra_state', { + 'header_written': False, + 'mime_boundary': str(uuid.uuid4()).replace('-', ''), + }) + + frag_boundary = extra_state['mime_boundary'] + + if not extra_state['header_written']: + stub = self._gen_stub( + fragments=fragments, + frag_boundary=frag_boundary, + title=title + ) + + ctx['dest_stream'].write(( + 'MIME-Version: 1.0\r\n' + 'From: <nowhere@hypervideo.github.io.invalid>\r\n' + 'To: <nowhere@hypervideo.github.io.invalid>\r\n' + 'Subject: {title}\r\n' + 'Content-type: multipart/related; ' + '' 'boundary="{boundary}"; ' + '' 'type="text/html"\r\n' + 'X.hypervideo.Origin: {origin}\r\n' + '\r\n' + '--{boundary}\r\n' + 'Content-Type: text/html; charset=utf-8\r\n' + 'Content-Length: {length}\r\n' + '\r\n' + '{stub}\r\n' + ).format( + origin=origin, + boundary=frag_boundary, + length=len(stub), + title=self._escape_mime(title), + stub=stub + ).encode('utf-8')) + extra_state['header_written'] = True + + for i, fragment in enumerate(fragments): + if (i + 1) <= ctx['fragment_index']: + continue + + fragment_url = urljoin(fragment_base_url, fragment['path']) + success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) + if not success: + continue + + mime_type = b'image/jpeg' + if frag_content.startswith(b'\x89PNG\r\n\x1a\n'): + mime_type = b'image/png' + if frag_content.startswith((b'GIF87a', b'GIF89a')): + mime_type = b'image/gif' + if frag_content.startswith(b'RIFF') and frag_content[8:12] == 'WEBP': + mime_type = b'image/webp' + + frag_header = io.BytesIO() + frag_header.write( + b'--%b\r\n' % frag_boundary.encode('us-ascii')) + frag_header.write( + b'Content-ID: <%b>\r\n' % self._gen_cid(i, fragment, frag_boundary).encode('us-ascii')) + frag_header.write( + b'Content-type: %b\r\n' % mime_type) + frag_header.write( + b'Content-length: %u\r\n' % len(frag_content)) + frag_header.write( + b'Content-location: %b\r\n' % fragment_url.encode('us-ascii')) + frag_header.write( + b'X.hypervideo.Duration: %f\r\n' % fragment['duration']) + frag_header.write(b'\r\n') + self._append_fragment( + ctx, frag_header.getvalue() + frag_content + b'\r\n') + + ctx['dest_stream'].write( + b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii')) + self._finish_frag_download(ctx, info_dict) + return True diff --git a/hypervideo_dl/downloader/niconico.py b/hypervideo_dl/downloader/niconico.py new file mode 100644 index 0000000..521dfec --- /dev/null +++ b/hypervideo_dl/downloader/niconico.py @@ -0,0 +1,57 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import threading + +from .common import FileDownloader +from ..downloader import get_suitable_downloader +from ..extractor.niconico import NiconicoIE +from ..utils import sanitized_Request + + +class NiconicoDmcFD(FileDownloader): + """ Downloading niconico douga from DMC with heartbeat """ + + FD_NAME = 'niconico_dmc' + + def real_download(self, filename, info_dict): + self.to_screen('[%s] Downloading from DMC' % self.FD_NAME) + + ie = NiconicoIE(self.ydl) + info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict) + + fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params) + + success = download_complete = False + timer = [None] + heartbeat_lock = threading.Lock() + heartbeat_url = heartbeat_info_dict['url'] + heartbeat_data = heartbeat_info_dict['data'].encode() + heartbeat_interval = heartbeat_info_dict.get('interval', 30) + + request = sanitized_Request(heartbeat_url, heartbeat_data) + + def heartbeat(): + try: + self.ydl.urlopen(request).read() + except Exception: + self.to_screen('[%s] Heartbeat failed' % self.FD_NAME) + + with heartbeat_lock: + if not download_complete: + timer[0] = threading.Timer(heartbeat_interval, heartbeat) + timer[0].start() + + heartbeat_info_dict['ping']() + self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval)) + try: + heartbeat() + if type(fd).__name__ == 'HlsFD': + info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0]) + success = fd.real_download(filename, info_dict) + finally: + if heartbeat_lock: + with heartbeat_lock: + timer[0].cancel() + download_complete = True + return success diff --git a/hypervideo_dl/downloader/rtmp.py b/hypervideo_dl/downloader/rtmp.py index fbb7f51..6dca647 100644 --- a/hypervideo_dl/downloader/rtmp.py +++ b/hypervideo_dl/downloader/rtmp.py @@ -66,7 +66,7 @@ class RtmpFD(FileDownloader): 'eta': eta, 'elapsed': time_now - start, 'speed': speed, - }) + }, info_dict) cursor_in_new_line = False else: # no percent for live streams @@ -82,18 +82,20 @@ class RtmpFD(FileDownloader): 'status': 'downloading', 'elapsed': time_now - start, 'speed': speed, - }) + }, info_dict) cursor_in_new_line = False elif self.params.get('verbose', False): if not cursor_in_new_line: self.to_screen('') cursor_in_new_line = True self.to_screen('[rtmpdump] ' + line) - finally: + if not cursor_in_new_line: + self.to_screen('') + return proc.wait() + except BaseException: # Including KeyboardInterrupt + proc.kill() proc.wait() - if not cursor_in_new_line: - self.to_screen('') - return proc.returncode + raise url = info_dict['url'] player_url = info_dict.get('player_url') @@ -115,7 +117,7 @@ class RtmpFD(FileDownloader): # Check for rtmpdump first if not check_executable('rtmpdump', ['-h']): - self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.') + self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install') return False # Download using rtmpdump. rtmpdump returns exit code 2 when @@ -206,7 +208,7 @@ class RtmpFD(FileDownloader): 'filename': filename, 'status': 'finished', 'elapsed': time.time() - started, - }) + }, info_dict) return True else: self.to_stderr('\n') diff --git a/hypervideo_dl/downloader/rtsp.py b/hypervideo_dl/downloader/rtsp.py index 939358b..7815d59 100644 --- a/hypervideo_dl/downloader/rtsp.py +++ b/hypervideo_dl/downloader/rtsp.py @@ -24,7 +24,7 @@ class RtspFD(FileDownloader): args = [ 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url] else: - self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.') + self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install one') return False self._debug_cmd(args) @@ -39,7 +39,7 @@ class RtspFD(FileDownloader): 'total_bytes': fsize, 'filename': filename, 'status': 'finished', - }) + }, info_dict) return True else: self.to_stderr('\n') diff --git a/hypervideo_dl/downloader/websocket.py b/hypervideo_dl/downloader/websocket.py new file mode 100644 index 0000000..0882220 --- /dev/null +++ b/hypervideo_dl/downloader/websocket.py @@ -0,0 +1,59 @@ +import os +import signal +import asyncio +import threading + +try: + import websockets + has_websockets = True +except ImportError: + has_websockets = False + +from .common import FileDownloader +from .external import FFmpegFD + + +class FFmpegSinkFD(FileDownloader): + """ A sink to ffmpeg for downloading fragments in any form """ + + def real_download(self, filename, info_dict): + info_copy = info_dict.copy() + info_copy['url'] = '-' + + async def call_conn(proc, stdin): + try: + await self.real_connection(stdin, info_dict) + except (BrokenPipeError, OSError): + pass + finally: + try: + stdin.flush() + stdin.close() + except OSError: + pass + os.kill(os.getpid(), signal.SIGINT) + + class FFmpegStdinFD(FFmpegFD): + @classmethod + def get_basename(cls): + return FFmpegFD.get_basename() + + def on_process_started(self, proc, stdin): + thread = threading.Thread(target=asyncio.run, daemon=True, args=(call_conn(proc, stdin), )) + thread.start() + + return FFmpegStdinFD(self.ydl, self.params or {}).download(filename, info_copy) + + async def real_connection(self, sink, info_dict): + """ Override this in subclasses """ + raise NotImplementedError('This method must be implemented by subclasses') + + +class WebSocketFragmentFD(FFmpegSinkFD): + async def real_connection(self, sink, info_dict): + async with websockets.connect(info_dict['url'], extra_headers=info_dict.get('http_headers', {})) as ws: + while True: + recv = await ws.recv() + if isinstance(recv, str): + recv = recv.encode('utf8') + sink.write(recv) diff --git a/hypervideo_dl/downloader/youtube_live_chat.py b/hypervideo_dl/downloader/youtube_live_chat.py new file mode 100644 index 0000000..ef4205e --- /dev/null +++ b/hypervideo_dl/downloader/youtube_live_chat.py @@ -0,0 +1,236 @@ +from __future__ import division, unicode_literals + +import json +import time + +from .fragment import FragmentFD +from ..compat import compat_urllib_error +from ..utils import ( + try_get, + dict_get, + int_or_none, + RegexNotFoundError, +) +from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE + + +class YoutubeLiveChatFD(FragmentFD): + """ Downloads YouTube live chats fragment by fragment """ + + FD_NAME = 'youtube_live_chat' + + def real_download(self, filename, info_dict): + video_id = info_dict['video_id'] + self.to_screen('[%s] Downloading live chat' % self.FD_NAME) + + fragment_retries = self.params.get('fragment_retries', 0) + test = self.params.get('test', False) + + ctx = { + 'filename': filename, + 'live': True, + 'total_frags': None, + } + + ie = YT_BaseIE(self.ydl) + + start_time = int(time.time() * 1000) + + def dl_fragment(url, data=None, headers=None): + http_headers = info_dict.get('http_headers', {}) + if headers: + http_headers = http_headers.copy() + http_headers.update(headers) + return self._download_fragment(ctx, url, info_dict, http_headers, data) + + def parse_actions_replay(live_chat_continuation): + offset = continuation_id = click_tracking_params = None + processed_fragment = bytearray() + for action in live_chat_continuation.get('actions', []): + if 'replayChatItemAction' in action: + replay_chat_item_action = action['replayChatItemAction'] + offset = int(replay_chat_item_action['videoOffsetTimeMsec']) + processed_fragment.extend( + json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n') + if offset is not None: + continuation = try_get( + live_chat_continuation, + lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict) + if continuation: + continuation_id = continuation.get('continuation') + click_tracking_params = continuation.get('clickTrackingParams') + self._append_fragment(ctx, processed_fragment) + return continuation_id, offset, click_tracking_params + + def try_refresh_replay_beginning(live_chat_continuation): + # choose the second option that contains the unfiltered live chat replay + refresh_continuation = try_get( + live_chat_continuation, + lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict) + if refresh_continuation: + # no data yet but required to call _append_fragment + self._append_fragment(ctx, b'') + refresh_continuation_id = refresh_continuation.get('continuation') + offset = 0 + click_tracking_params = refresh_continuation.get('trackingParams') + return refresh_continuation_id, offset, click_tracking_params + return parse_actions_replay(live_chat_continuation) + + live_offset = 0 + + def parse_actions_live(live_chat_continuation): + nonlocal live_offset + continuation_id = click_tracking_params = None + processed_fragment = bytearray() + for action in live_chat_continuation.get('actions', []): + timestamp = self.parse_live_timestamp(action) + if timestamp is not None: + live_offset = timestamp - start_time + # compatibility with replay format + pseudo_action = { + 'replayChatItemAction': {'actions': [action]}, + 'videoOffsetTimeMsec': str(live_offset), + 'isLive': True, + } + processed_fragment.extend( + json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n') + continuation_data_getters = [ + lambda x: x['continuations'][0]['invalidationContinuationData'], + lambda x: x['continuations'][0]['timedContinuationData'], + ] + continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict) + if continuation_data: + continuation_id = continuation_data.get('continuation') + click_tracking_params = continuation_data.get('clickTrackingParams') + timeout_ms = int_or_none(continuation_data.get('timeoutMs')) + if timeout_ms is not None: + time.sleep(timeout_ms / 1000) + self._append_fragment(ctx, processed_fragment) + return continuation_id, live_offset, click_tracking_params + + def download_and_parse_fragment(url, frag_index, request_data=None, headers=None): + count = 0 + while count <= fragment_retries: + try: + success, raw_fragment = dl_fragment(url, request_data, headers) + if not success: + return False, None, None, None + try: + data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) + except RegexNotFoundError: + data = None + if not data: + data = json.loads(raw_fragment) + live_chat_continuation = try_get( + data, + lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} + if info_dict['protocol'] == 'youtube_live_chat_replay': + if frag_index == 1: + continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation) + else: + continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation) + elif info_dict['protocol'] == 'youtube_live_chat': + continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation) + return True, continuation_id, offset, click_tracking_params + except compat_urllib_error.HTTPError as err: + count += 1 + if count <= fragment_retries: + self.report_retry_fragment(err, frag_index, count, fragment_retries) + if count > fragment_retries: + self.report_error('giving up after %s fragment retries' % fragment_retries) + return False, None, None, None + + self._prepare_and_start_frag_download(ctx, info_dict) + + success, raw_fragment = dl_fragment(info_dict['url']) + if not success: + return False + try: + data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) + except RegexNotFoundError: + return False + continuation_id = try_get( + data, + lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']) + # no data yet but required to call _append_fragment + self._append_fragment(ctx, b'') + + ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace')) + + if not ytcfg: + return False + api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY']) + innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT']) + if not api_key or not innertube_context: + return False + visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str) + if info_dict['protocol'] == 'youtube_live_chat_replay': + url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key + chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id + elif info_dict['protocol'] == 'youtube_live_chat': + url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key + chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id + + frag_index = offset = 0 + click_tracking_params = None + while continuation_id is not None: + frag_index += 1 + request_data = { + 'context': innertube_context, + 'continuation': continuation_id, + } + if frag_index > 1: + request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))} + if click_tracking_params: + request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params} + headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data) + headers.update({'content-type': 'application/json'}) + fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n' + success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( + url, frag_index, fragment_request_data, headers) + else: + success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( + chat_page_url, frag_index) + if not success: + return False + if test: + break + + self._finish_frag_download(ctx, info_dict) + return True + + @staticmethod + def parse_live_timestamp(action): + action_content = dict_get( + action, + ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand']) + if not isinstance(action_content, dict): + return None + item = dict_get(action_content, ['item', 'bannerRenderer']) + if not isinstance(item, dict): + return None + renderer = dict_get(item, [ + # text + 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', + 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', + # ticker + 'liveChatTickerPaidMessageItemRenderer', + 'liveChatTickerSponsorItemRenderer', + # banner + 'liveChatBannerRenderer', + ]) + if not isinstance(renderer, dict): + return None + parent_item_getters = [ + lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'], + lambda x: x['contents'], + ] + parent_item = try_get(renderer, parent_item_getters, dict) + if parent_item: + renderer = dict_get(parent_item, [ + 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', + 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', + ]) + if not isinstance(renderer, dict): + return None + return int_or_none(renderer.get('timestampUsec'), 1000) |