aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/downloader
diff options
context:
space:
mode:
authorJesús <heckyel@hyperbola.info>2021-10-18 15:24:21 -0500
committerJesús <heckyel@hyperbola.info>2021-10-18 15:24:21 -0500
commit5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e (patch)
tree65209bc739db35e31f1c9b5b868eb5df4fe12ae3 /hypervideo_dl/downloader
parent27fe903c511691c078942bef5ee9a05a43b15c8f (diff)
downloadhypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.tar.lz
hypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.tar.xz
hypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.zip
update from upstream
Diffstat (limited to 'hypervideo_dl/downloader')
-rw-r--r--hypervideo_dl/downloader/__init__.py105
-rw-r--r--hypervideo_dl/downloader/common.py171
-rw-r--r--hypervideo_dl/downloader/dash.py88
-rw-r--r--hypervideo_dl/downloader/external.py258
-rw-r--r--hypervideo_dl/downloader/f4m.py19
-rw-r--r--hypervideo_dl/downloader/fragment.py255
-rw-r--r--hypervideo_dl/downloader/hls.py285
-rw-r--r--hypervideo_dl/downloader/http.py37
-rw-r--r--hypervideo_dl/downloader/ism.py58
-rw-r--r--hypervideo_dl/downloader/mhtml.py202
-rw-r--r--hypervideo_dl/downloader/niconico.py57
-rw-r--r--hypervideo_dl/downloader/rtmp.py18
-rw-r--r--hypervideo_dl/downloader/rtsp.py4
-rw-r--r--hypervideo_dl/downloader/websocket.py59
-rw-r--r--hypervideo_dl/downloader/youtube_live_chat.py236
15 files changed, 1516 insertions, 336 deletions
diff --git a/hypervideo_dl/downloader/__init__.py b/hypervideo_dl/downloader/__init__.py
index 2e485df..2449c74 100644
--- a/hypervideo_dl/downloader/__init__.py
+++ b/hypervideo_dl/downloader/__init__.py
@@ -1,24 +1,47 @@
from __future__ import unicode_literals
+from ..compat import compat_str
+from ..utils import (
+ determine_protocol,
+ NO_DEFAULT
+)
+
+
+def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=None, to_stdout=False):
+ info_dict['protocol'] = determine_protocol(info_dict)
+ info_copy = info_dict.copy()
+ info_copy['to_stdout'] = to_stdout
+
+ downloaders = [_get_suitable_downloader(info_copy, proto, params, default)
+ for proto in (protocol or info_copy['protocol']).split('+')]
+ if set(downloaders) == {FFmpegFD} and FFmpegFD.can_merge_formats(info_copy, params):
+ return FFmpegFD
+ elif len(downloaders) == 1:
+ return downloaders[0]
+ return None
+
+
+# Some of these require get_suitable_downloader
from .common import FileDownloader
+from .dash import DashSegmentsFD
from .f4m import F4mFD
from .hls import HlsFD
from .http import HttpFD
from .rtmp import RtmpFD
-from .dash import DashSegmentsFD
from .rtsp import RtspFD
from .ism import IsmFD
+from .mhtml import MhtmlFD
+from .niconico import NiconicoDmcFD
+from .websocket import WebSocketFragmentFD
+from .youtube_live_chat import YoutubeLiveChatFD
from .external import (
get_external_downloader,
FFmpegFD,
)
-from ..utils import (
- determine_protocol,
-)
-
PROTOCOL_MAP = {
'rtmp': RtmpFD,
+ 'rtmp_ffmpeg': FFmpegFD,
'm3u8_native': HlsFD,
'm3u8': FFmpegFD,
'mms': RtspFD,
@@ -26,36 +49,78 @@ PROTOCOL_MAP = {
'f4m': F4mFD,
'http_dash_segments': DashSegmentsFD,
'ism': IsmFD,
+ 'mhtml': MhtmlFD,
+ 'niconico_dmc': NiconicoDmcFD,
+ 'websocket_frag': WebSocketFragmentFD,
+ 'youtube_live_chat': YoutubeLiveChatFD,
+ 'youtube_live_chat_replay': YoutubeLiveChatFD,
}
-def get_suitable_downloader(info_dict, params={}):
+def shorten_protocol_name(proto, simplify=False):
+ short_protocol_names = {
+ 'm3u8_native': 'm3u8_n',
+ 'rtmp_ffmpeg': 'rtmp_f',
+ 'http_dash_segments': 'dash',
+ 'niconico_dmc': 'dmc',
+ 'websocket_frag': 'WSfrag',
+ }
+ if simplify:
+ short_protocol_names.update({
+ 'https': 'http',
+ 'ftps': 'ftp',
+ 'm3u8_native': 'm3u8',
+ 'rtmp_ffmpeg': 'rtmp',
+ 'm3u8_frag_urls': 'm3u8',
+ 'dash_frag_urls': 'dash',
+ })
+ return short_protocol_names.get(proto, proto)
+
+
+def _get_suitable_downloader(info_dict, protocol, params, default):
"""Get the downloader class that can handle the info dict."""
- protocol = determine_protocol(info_dict)
- info_dict['protocol'] = protocol
+ if default is NO_DEFAULT:
+ default = HttpFD
# if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
# return FFmpegFD
- external_downloader = params.get('external_downloader')
- if external_downloader is not None:
+ info_dict['protocol'] = protocol
+ downloaders = params.get('external_downloader')
+ external_downloader = (
+ downloaders if isinstance(downloaders, compat_str) or downloaders is None
+ else downloaders.get(shorten_protocol_name(protocol, True), downloaders.get('default')))
+
+ if external_downloader is None:
+ if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params):
+ return FFmpegFD
+ elif external_downloader.lower() != 'native':
ed = get_external_downloader(external_downloader)
- if ed.can_download(info_dict):
+ if ed.can_download(info_dict, external_downloader):
return ed
- if protocol.startswith('m3u8') and info_dict.get('is_live'):
- return FFmpegFD
-
- if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
- return HlsFD
+ if protocol == 'http_dash_segments':
+ if info_dict.get('is_live') and (external_downloader or '').lower() != 'native':
+ return FFmpegFD
- if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False:
- return FFmpegFD
+ if protocol in ('m3u8', 'm3u8_native'):
+ if info_dict.get('is_live'):
+ return FFmpegFD
+ elif (external_downloader or '').lower() == 'native':
+ return HlsFD
+ elif get_suitable_downloader(
+ info_dict, params, None, protocol='m3u8_frag_urls', to_stdout=info_dict['to_stdout']):
+ return HlsFD
+ elif params.get('hls_prefer_native') is True:
+ return HlsFD
+ elif params.get('hls_prefer_native') is False:
+ return FFmpegFD
- return PROTOCOL_MAP.get(protocol, HttpFD)
+ return PROTOCOL_MAP.get(protocol, default)
__all__ = [
- 'get_suitable_downloader',
'FileDownloader',
+ 'get_suitable_downloader',
+ 'shorten_protocol_name',
]
diff --git a/hypervideo_dl/downloader/common.py b/hypervideo_dl/downloader/common.py
index d023168..27ca2cd 100644
--- a/hypervideo_dl/downloader/common.py
+++ b/hypervideo_dl/downloader/common.py
@@ -2,11 +2,9 @@ from __future__ import division, unicode_literals
import os
import re
-import sys
import time
import random
-from ..compat import compat_os_name
from ..utils import (
decodeArgument,
encodeFilename,
@@ -15,6 +13,12 @@ from ..utils import (
shell_quote,
timeconvert,
)
+from ..minicurses import (
+ MultilineLogger,
+ MultilinePrinter,
+ QuietMultilinePrinter,
+ BreaklineStatusPrinter
+)
class FileDownloader(object):
@@ -32,25 +36,28 @@ class FileDownloader(object):
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
ratelimit: Download speed limit, in bytes/sec.
+ throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
retries: Number of times to retry for HTTP error 5xx
buffersize: Size of download buffer in bytes.
noresizebuffer: Do not automatically resize the download buffer.
continuedl: Try to continue downloads if possible.
noprogress: Do not print the progress bar.
- logtostderr: Log messages to stderr instead of stdout.
- consoletitle: Display progress in console window's titlebar.
nopart: Do not use temporary .part files.
updatetime: Use the Last-modified header to set output file timestamps.
test: Download only first bytes to test the downloader.
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
- external_downloader_args: A list of additional command-line arguments for the
- external downloader.
+ external_downloader_args: A dictionary of downloader keys (in lower case)
+ and a list of additional command-line arguments for the
+ executable. Use 'default' as the name for arguments to be
+ passed to all downloaders. For compatibility with youtube-dl,
+ a single list of args can also be used
hls_use_mpegts: Use the mpegts container for HLS videos.
http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
useful for bypassing bandwidth throttling imposed by
a webserver (experimental)
+ progress_template: See YoutubeDL.py
Subclasses of this one must re-define the real_download method.
"""
@@ -63,6 +70,7 @@ class FileDownloader(object):
self.ydl = ydl
self._progress_hooks = []
self.params = params
+ self._prepare_multiline_status()
self.add_progress_hook(self.report_progress)
@staticmethod
@@ -147,10 +155,10 @@ class FileDownloader(object):
return int(round(number * multiplier))
def to_screen(self, *args, **kargs):
- self.ydl.to_screen(*args, **kargs)
+ self.ydl.to_stdout(*args, quiet=self.params.get('quiet'), **kargs)
def to_stderr(self, message):
- self.ydl.to_screen(message)
+ self.ydl.to_stderr(message)
def to_console_title(self, message):
self.ydl.to_console_title(message)
@@ -164,6 +172,9 @@ class FileDownloader(object):
def report_error(self, *args, **kargs):
self.ydl.report_error(*args, **kargs)
+ def write_debug(self, *args, **kargs):
+ self.ydl.write_debug(*args, **kargs)
+
def slow_down(self, start_time, now, byte_counter):
"""Sleep if the download speed is over the rate limit."""
rate_limit = self.params.get('ratelimit')
@@ -196,12 +207,12 @@ class FileDownloader(object):
return filename + '.ytdl'
def try_rename(self, old_filename, new_filename):
+ if old_filename == new_filename:
+ return
try:
- if old_filename == new_filename:
- return
- os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
+ os.replace(old_filename, new_filename)
except (IOError, OSError) as err:
- self.report_error('unable to rename file: %s' % error_to_compat_str(err))
+ self.report_error(f'unable to rename file: {err}')
def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file."""
@@ -228,39 +239,46 @@ class FileDownloader(object):
"""Report destination filename."""
self.to_screen('[download] Destination: ' + filename)
- def _report_progress_status(self, msg, is_last_line=False):
- fullmsg = '[download] ' + msg
- if self.params.get('progress_with_newline', False):
- self.to_screen(fullmsg)
+ def _prepare_multiline_status(self, lines=1):
+ if self.params.get('noprogress'):
+ self._multiline = QuietMultilinePrinter()
+ elif self.ydl.params.get('logger'):
+ self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
+ elif self.params.get('progress_with_newline'):
+ self._multiline = BreaklineStatusPrinter(self.ydl._screen_file, lines)
else:
- if compat_os_name == 'nt':
- prev_len = getattr(self, '_report_progress_prev_line_length',
- 0)
- if prev_len > len(fullmsg):
- fullmsg += ' ' * (prev_len - len(fullmsg))
- self._report_progress_prev_line_length = len(fullmsg)
- clear_line = '\r'
- else:
- clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r')
- self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
- self.to_console_title('hypervideo ' + msg)
+ self._multiline = MultilinePrinter(self.ydl._screen_file, lines, not self.params.get('quiet'))
+
+ def _finish_multiline_status(self):
+ self._multiline.end()
+
+ def _report_progress_status(self, s):
+ progress_dict = s.copy()
+ progress_dict.pop('info_dict')
+ progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
+
+ progress_template = self.params.get('progress_template', {})
+ self._multiline.print_at_line(self.ydl.evaluate_outtmpl(
+ progress_template.get('download') or '[download] %(progress._default_template)s',
+ progress_dict), s.get('progress_idx') or 0)
+ self.to_console_title(self.ydl.evaluate_outtmpl(
+ progress_template.get('download-title') or 'hypervideo %(progress._default_template)s',
+ progress_dict))
def report_progress(self, s):
if s['status'] == 'finished':
- if self.params.get('noprogress', False):
+ if self.params.get('noprogress'):
self.to_screen('[download] Download completed')
- else:
- msg_template = '100%%'
- if s.get('total_bytes') is not None:
- s['_total_bytes_str'] = format_bytes(s['total_bytes'])
- msg_template += ' of %(_total_bytes_str)s'
- if s.get('elapsed') is not None:
- s['_elapsed_str'] = self.format_seconds(s['elapsed'])
- msg_template += ' in %(_elapsed_str)s'
- self._report_progress_status(
- msg_template % s, is_last_line=True)
-
- if self.params.get('noprogress'):
+ msg_template = '100%%'
+ if s.get('total_bytes') is not None:
+ s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+ msg_template += ' of %(_total_bytes_str)s'
+ if s.get('elapsed') is not None:
+ s['_elapsed_str'] = self.format_seconds(s['elapsed'])
+ msg_template += ' in %(_elapsed_str)s'
+ s['_percent_str'] = self.format_percent(100)
+ s['_default_template'] = msg_template % s
+ self._report_progress_status(s)
return
if s['status'] != 'downloading':
@@ -302,8 +320,8 @@ class FileDownloader(object):
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
else:
msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
-
- self._report_progress_status(msg_template % s)
+ s['_default_template'] = msg_template % s
+ self._report_progress_status(s)
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
@@ -312,27 +330,30 @@ class FileDownloader(object):
def report_retry(self, err, count, retries):
"""Report retry in case of HTTP error 5xx"""
self.to_screen(
- '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
+ '[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...'
% (error_to_compat_str(err), count, self.format_retries(retries)))
- def report_file_already_downloaded(self, file_name):
+ def report_file_already_downloaded(self, *args, **kwargs):
"""Report file has already been fully downloaded."""
- try:
- self.to_screen('[download] %s has already been downloaded' % file_name)
- except UnicodeEncodeError:
- self.to_screen('[download] The file has already been downloaded')
+ return self.ydl.report_file_already_downloaded(*args, **kwargs)
def report_unable_to_resume(self):
"""Report it was impossible to resume download."""
self.to_screen('[download] Unable to resume')
- def download(self, filename, info_dict):
+ @staticmethod
+ def supports_manifest(manifest):
+ """ Whether the downloader can download the fragments from the manifest.
+ Redefine in subclasses if needed. """
+ pass
+
+ def download(self, filename, info_dict, subtitle=False):
"""Download to a filename using the info from info_dict
Return True on success and False otherwise
"""
nooverwrites_and_exists = (
- self.params.get('nooverwrites', False)
+ not self.params.get('overwrites', True)
and os.path.exists(encodeFilename(filename))
)
@@ -350,26 +371,43 @@ class FileDownloader(object):
'filename': filename,
'status': 'finished',
'total_bytes': os.path.getsize(encodeFilename(filename)),
- })
- return True
-
- min_sleep_interval = self.params.get('sleep_interval')
- if min_sleep_interval:
- max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
- sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
- self.to_screen(
- '[download] Sleeping %s seconds...' % (
- int(sleep_interval) if sleep_interval.is_integer()
- else '%.2f' % sleep_interval))
- time.sleep(sleep_interval)
-
- return self.real_download(filename, info_dict)
+ }, info_dict)
+ return True, False
+
+ if subtitle is False:
+ min_sleep_interval = self.params.get('sleep_interval')
+ if min_sleep_interval:
+ max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
+ sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
+ self.to_screen(
+ '[download] Sleeping %s seconds ...' % (
+ int(sleep_interval) if sleep_interval.is_integer()
+ else '%.2f' % sleep_interval))
+ time.sleep(sleep_interval)
+ else:
+ sleep_interval_sub = 0
+ if type(self.params.get('sleep_interval_subtitles')) is int:
+ sleep_interval_sub = self.params.get('sleep_interval_subtitles')
+ if sleep_interval_sub > 0:
+ self.to_screen(
+ '[download] Sleeping %s seconds ...' % (
+ sleep_interval_sub))
+ time.sleep(sleep_interval_sub)
+ ret = self.real_download(filename, info_dict)
+ self._finish_multiline_status()
+ return ret, True
def real_download(self, filename, info_dict):
"""Real download process. Redefine in subclasses."""
raise NotImplementedError('This method must be implemented by subclasses')
- def _hook_progress(self, status):
+ def _hook_progress(self, status, info_dict):
+ if not self._progress_hooks:
+ return
+ status['info_dict'] = info_dict
+ # youtube-dl passes the same status object to all the hooks.
+ # Some third party scripts seems to be relying on this.
+ # So keep this behavior if possible
for ph in self._progress_hooks:
ph(status)
@@ -387,5 +425,4 @@ class FileDownloader(object):
if exe is None:
exe = os.path.basename(str_args[0])
- self.to_screen('[debug] %s command line: %s' % (
- exe, shell_quote(str_args)))
+ self.write_debug('%s command line: %s' % (exe, shell_quote(str_args)))
diff --git a/hypervideo_dl/downloader/dash.py b/hypervideo_dl/downloader/dash.py
index c6d674b..6444ad6 100644
--- a/hypervideo_dl/downloader/dash.py
+++ b/hypervideo_dl/downloader/dash.py
@@ -1,80 +1,62 @@
from __future__ import unicode_literals
+from ..downloader import get_suitable_downloader
from .fragment import FragmentFD
-from ..compat import compat_urllib_error
-from ..utils import (
- DownloadError,
- urljoin,
-)
+
+from ..utils import urljoin
class DashSegmentsFD(FragmentFD):
"""
- Download segments in a DASH manifest
+ Download segments in a DASH manifest. External downloaders can take over
+ the fragment downloads by supporting the 'dash_frag_urls' protocol
"""
FD_NAME = 'dashsegments'
def real_download(self, filename, info_dict):
+ if info_dict.get('is_live'):
+ self.report_error('Live DASH videos are not supported')
+
fragment_base_url = info_dict.get('fragment_base_url')
fragments = info_dict['fragments'][:1] if self.params.get(
'test', False) else info_dict['fragments']
+ real_downloader = get_suitable_downloader(
+ info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-'))
+
ctx = {
'filename': filename,
'total_frags': len(fragments),
}
- self._prepare_and_start_frag_download(ctx)
-
- fragment_retries = self.params.get('fragment_retries', 0)
- skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+ if real_downloader:
+ self._prepare_external_frag_download(ctx)
+ else:
+ self._prepare_and_start_frag_download(ctx, info_dict)
+ fragments_to_download = []
frag_index = 0
for i, fragment in enumerate(fragments):
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
- # In DASH, the first segment contains necessary headers to
- # generate a valid MP4 file, so always abort for the first segment
- fatal = i == 0 or not skip_unavailable_fragments
- count = 0
- while count <= fragment_retries:
- try:
- fragment_url = fragment.get('url')
- if not fragment_url:
- assert fragment_base_url
- fragment_url = urljoin(fragment_base_url, fragment['path'])
- success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
- if not success:
- return False
- self._append_fragment(ctx, frag_content)
- break
- except compat_urllib_error.HTTPError as err:
- # YouTube may often return 404 HTTP error for a fragment causing the
- # whole download to fail. However if the same fragment is immediately
- # retried with the same request data this usually succeeds (1-2 attempts
- # is usually enough) thus allowing to download the whole file successfully.
- # To be future-proof we will retry all fragments that fail with any
- # HTTP error.
- count += 1
- if count <= fragment_retries:
- self.report_retry_fragment(err, frag_index, count, fragment_retries)
- except DownloadError:
- # Don't retry fragment if error occurred during HTTP downloading
- # itself since it has own retry settings
- if not fatal:
- self.report_skip_fragment(frag_index)
- break
- raise
-
- if count > fragment_retries:
- if not fatal:
- self.report_skip_fragment(frag_index)
- continue
- self.report_error('giving up after %s fragment retries' % fragment_retries)
- return False
-
- self._finish_frag_download(ctx)
-
- return True
+ fragment_url = fragment.get('url')
+ if not fragment_url:
+ assert fragment_base_url
+ fragment_url = urljoin(fragment_base_url, fragment['path'])
+
+ fragments_to_download.append({
+ 'frag_index': frag_index,
+ 'index': i,
+ 'url': fragment_url,
+ })
+
+ if real_downloader:
+ self.to_screen(
+ '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
+ info_dict['fragments'] = fragments_to_download
+ fd = real_downloader(self.ydl, self.params)
+ return fd.real_download(filename, info_dict)
+
+ return self.download_and_append_fragments(ctx, fragments_to_download, info_dict)
diff --git a/hypervideo_dl/downloader/external.py b/hypervideo_dl/downloader/external.py
index c31f891..74adb05 100644
--- a/hypervideo_dl/downloader/external.py
+++ b/hypervideo_dl/downloader/external.py
@@ -6,7 +6,7 @@ import subprocess
import sys
import time
-from .common import FileDownloader
+from .fragment import FragmentFD
from ..compat import (
compat_setenv,
compat_str,
@@ -16,16 +16,21 @@ from ..utils import (
cli_option,
cli_valueless_option,
cli_bool_option,
- cli_configuration_args,
+ _configuration_args,
encodeFilename,
encodeArgument,
handle_youtubedl_headers,
check_executable,
is_outdated_version,
+ process_communicate_or_kill,
+ sanitize_open,
)
-class ExternalFD(FileDownloader):
+class ExternalFD(FragmentFD):
+ SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps')
+ can_download_to_stdout = False
+
def real_download(self, filename, info_dict):
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
@@ -56,7 +61,7 @@ class ExternalFD(FileDownloader):
'downloaded_bytes': fsize,
'total_bytes': fsize,
})
- self._hook_progress(status)
+ self._hook_progress(status, info_dict)
return True
else:
self.to_stderr('\n')
@@ -70,19 +75,25 @@ class ExternalFD(FileDownloader):
@property
def exe(self):
- return self.params.get('external_downloader')
+ return self.get_basename()
@classmethod
- def available(cls):
- return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT])
+ def available(cls, path=None):
+ path = check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT])
+ if path:
+ cls.exe = path
+ return path
+ return False
@classmethod
def supports(cls, info_dict):
- return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
+ return (
+ (cls.can_download_to_stdout or not info_dict.get('to_stdout'))
+ and info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS)
@classmethod
- def can_download(cls, info_dict):
- return cls.available() and cls.supports(info_dict)
+ def can_download(cls, info_dict, path=None):
+ return cls.available(path) and cls.supports(info_dict)
def _option(self, command_option, param):
return cli_option(self.params, command_option, param)
@@ -93,8 +104,10 @@ class ExternalFD(FileDownloader):
def _valueless_option(self, command_option, param, expected_value=True):
return cli_valueless_option(self.params, command_option, param, expected_value)
- def _configuration_args(self, default=[]):
- return cli_configuration_args(self.params, 'external_downloader_args', default)
+ def _configuration_args(self, keys=None, *args, **kwargs):
+ return _configuration_args(
+ self.get_basename(), self.params.get('external_downloader_args'), self.get_basename(),
+ keys, *args, **kwargs)
def _call_downloader(self, tmpfilename, info_dict):
""" Either overwrite this or implement _make_cmd """
@@ -102,12 +115,56 @@ class ExternalFD(FileDownloader):
self._debug_cmd(cmd)
- p = subprocess.Popen(
- cmd, stderr=subprocess.PIPE)
- _, stderr = p.communicate()
- if p.returncode != 0:
+ if 'fragments' not in info_dict:
+ p = subprocess.Popen(
+ cmd, stderr=subprocess.PIPE)
+ _, stderr = process_communicate_or_kill(p)
+ if p.returncode != 0:
+ self.to_stderr(stderr.decode('utf-8', 'replace'))
+ return p.returncode
+
+ fragment_retries = self.params.get('fragment_retries', 0)
+ skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+
+ count = 0
+ while count <= fragment_retries:
+ p = subprocess.Popen(
+ cmd, stderr=subprocess.PIPE)
+ _, stderr = process_communicate_or_kill(p)
+ if p.returncode == 0:
+ break
+ # TODO: Decide whether to retry based on error code
+ # https://aria2.github.io/manual/en/html/aria2c.html#exit-status
self.to_stderr(stderr.decode('utf-8', 'replace'))
- return p.returncode
+ count += 1
+ if count <= fragment_retries:
+ self.to_screen(
+ '[%s] Got error. Retrying fragments (attempt %d of %s)...'
+ % (self.get_basename(), count, self.format_retries(fragment_retries)))
+ if count > fragment_retries:
+ if not skip_unavailable_fragments:
+ self.report_error('Giving up after %s fragment retries' % fragment_retries)
+ return -1
+
+ decrypt_fragment = self.decrypter(info_dict)
+ dest, _ = sanitize_open(tmpfilename, 'wb')
+ for frag_index, fragment in enumerate(info_dict['fragments']):
+ fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index)
+ try:
+ src, _ = sanitize_open(fragment_filename, 'rb')
+ except IOError:
+ if skip_unavailable_fragments and frag_index > 1:
+ self.to_screen('[%s] Skipping fragment %d ...' % (self.get_basename(), frag_index))
+ continue
+ self.report_error('Unable to open fragment %d' % frag_index)
+ return -1
+ dest.write(decrypt_fragment(fragment, src.read()))
+ src.close()
+ if not self.params.get('keep_fragments', False):
+ os.remove(encodeFilename(fragment_filename))
+ dest.close()
+ os.remove(encodeFilename('%s.frag.urls' % tmpfilename))
+ return 0
class CurlFD(ExternalFD):
@@ -115,8 +172,10 @@ class CurlFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '--location', '-o', tmpfilename]
- for key, val in info_dict['http_headers'].items():
- cmd += ['--header', '%s: %s' % (key, val)]
+ if info_dict.get('http_headers') is not None:
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['--header', '%s: %s' % (key, val)]
+
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
cmd += self._valueless_option('--silent', 'noprogress')
cmd += self._valueless_option('--verbose', 'verbose')
@@ -141,7 +200,7 @@ class CurlFD(ExternalFD):
# curl writes the progress to stderr so don't capture it.
p = subprocess.Popen(cmd)
- p.communicate()
+ process_communicate_or_kill(p)
return p.returncode
@@ -150,8 +209,9 @@ class AxelFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-o', tmpfilename]
- for key, val in info_dict['http_headers'].items():
- cmd += ['-H', '%s: %s' % (key, val)]
+ if info_dict.get('http_headers') is not None:
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['-H', '%s: %s' % (key, val)]
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
@@ -162,8 +222,9 @@ class WgetFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
- for key, val in info_dict['http_headers'].items():
- cmd += ['--header', '%s: %s' % (key, val)]
+ if info_dict.get('http_headers') is not None:
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['--header', '%s: %s' % (key, val)]
cmd += self._option('--limit-rate', 'ratelimit')
retry = self._option('--tries', 'retries')
if len(retry) == 2:
@@ -180,51 +241,115 @@ class WgetFD(ExternalFD):
class Aria2cFD(ExternalFD):
AVAILABLE_OPT = '-v'
+ SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'dash_frag_urls', 'm3u8_frag_urls')
+
+ @staticmethod
+ def supports_manifest(manifest):
+ UNSUPPORTED_FEATURES = [
+ r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1]
+ # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
+ ]
+ check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
+ return all(check_results)
def _make_cmd(self, tmpfilename, info_dict):
- cmd = [self.exe, '-c']
- cmd += self._configuration_args([
- '--min-split-size', '1M', '--max-connection-per-server', '4'])
- dn = os.path.dirname(tmpfilename)
- if dn:
- cmd += ['--dir', dn]
- cmd += ['--out', os.path.basename(tmpfilename)]
- for key, val in info_dict['http_headers'].items():
- cmd += ['--header', '%s: %s' % (key, val)]
+ cmd = [self.exe, '-c',
+ '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
+ '--file-allocation=none', '-x16', '-j16', '-s16']
+ if 'fragments' in info_dict:
+ cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
+ else:
+ cmd += ['--min-split-size', '1M']
+
+ if info_dict.get('http_headers') is not None:
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['--header', '%s: %s' % (key, val)]
+ cmd += self._option('--max-overall-download-limit', 'ratelimit')
cmd += self._option('--interface', 'source_address')
cmd += self._option('--all-proxy', 'proxy')
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
- cmd += ['--', info_dict['url']]
+ cmd += self._configuration_args()
+
+ # aria2c strips out spaces from the beginning/end of filenames and paths.
+ # We work around this issue by adding a "./" to the beginning of the
+ # filename and relative path, and adding a "/" at the end of the path.
+ # See: https://github.com/hypervideo/hypervideo/issues/276
+ # https://github.com/ytdl-org/youtube-dl/issues/20312
+ # https://github.com/aria2/aria2/issues/1373
+ dn = os.path.dirname(tmpfilename)
+ if dn:
+ if not os.path.isabs(dn):
+ dn = '.%s%s' % (os.path.sep, dn)
+ cmd += ['--dir', dn + os.path.sep]
+ if 'fragments' not in info_dict:
+ cmd += ['--out', '.%s%s' % (os.path.sep, os.path.basename(tmpfilename))]
+ cmd += ['--auto-file-renaming=false']
+
+ if 'fragments' in info_dict:
+ cmd += ['--file-allocation=none', '--uri-selector=inorder']
+ url_list_file = '%s.frag.urls' % tmpfilename
+ url_list = []
+ for frag_index, fragment in enumerate(info_dict['fragments']):
+ fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
+ url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename))
+ stream, _ = sanitize_open(url_list_file, 'wb')
+ stream.write('\n'.join(url_list).encode('utf-8'))
+ stream.close()
+ cmd += ['-i', url_list_file]
+ else:
+ cmd += ['--', info_dict['url']]
return cmd
class HttpieFD(ExternalFD):
+ AVAILABLE_OPT = '--version'
+
@classmethod
- def available(cls):
- return check_executable('http', ['--version'])
+ def available(cls, path=None):
+ return ExternalFD.available(cls, path or 'http')
def _make_cmd(self, tmpfilename, info_dict):
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
- for key, val in info_dict['http_headers'].items():
- cmd += ['%s:%s' % (key, val)]
+
+ if info_dict.get('http_headers') is not None:
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['%s:%s' % (key, val)]
return cmd
class FFmpegFD(ExternalFD):
+ SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments')
+ can_download_to_stdout = True
+
+ @classmethod
+ def available(cls, path=None):
+ # TODO: Fix path for ffmpeg
+ # Fixme: This may be wrong when --ffmpeg-location is used
+ return FFmpegPostProcessor().available
+
@classmethod
def supports(cls, info_dict):
- return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
+ return all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+'))
+
+ def on_process_started(self, proc, stdin):
+ """ Override this in subclasses """
+ pass
@classmethod
- def available(cls):
- return FFmpegPostProcessor().available
+ def can_merge_formats(cls, info_dict, params):
+ return (
+ info_dict.get('requested_formats')
+ and info_dict.get('protocol')
+ and not params.get('allow_unplayable_formats')
+ and 'no-direct-merge' not in params.get('compat_opts', [])
+ and cls.can_download(info_dict))
def _call_downloader(self, tmpfilename, info_dict):
- url = info_dict['url']
+ urls = [f['url'] for f in info_dict.get('requested_formats', [])] or [info_dict['url']]
ffpp = FFmpegPostProcessor(downloader=self)
if not ffpp.available:
- self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
+ self.report_error('m3u8 download detected but ffmpeg could not be found. Please install')
return False
ffpp.check_version()
@@ -234,7 +359,12 @@ class FFmpegFD(ExternalFD):
if self.params.get(log_level, False):
args += ['-loglevel', log_level]
break
+ if not self.params.get('verbose'):
+ args += ['-hide_banner']
+ args += info_dict.get('_ffmpeg_args', [])
+
+ # This option exists only for compatibility. Extractors should use `_ffmpeg_args` instead
seekable = info_dict.get('_seekable')
if seekable is not None:
# setting -seekable prevents ffmpeg from guessing if the server
@@ -244,8 +374,6 @@ class FFmpegFD(ExternalFD):
# http://trac.ffmpeg.org/ticket/6125#comment:10
args += ['-seekable', '1' if seekable else '0']
- args += self._configuration_args()
-
# start_time = info_dict.get('start_time') or 0
# if start_time:
# args += ['-ss', compat_str(start_time)]
@@ -253,7 +381,7 @@ class FFmpegFD(ExternalFD):
# if end_time:
# args += ['-t', compat_str(end_time - start_time)]
- if info_dict['http_headers'] and re.match(r'^https?://', url):
+ if info_dict.get('http_headers') is not None and re.match(r'^https?://', urls[0]):
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
headers = handle_youtubedl_headers(info_dict['http_headers'])
@@ -311,13 +439,25 @@ class FFmpegFD(ExternalFD):
elif isinstance(conn, compat_str):
args += ['-rtmp_conn', conn]
- args += ['-i', url, '-c', 'copy']
+ for i, url in enumerate(urls):
+ args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url]
+
+ args += ['-c', 'copy']
+ if info_dict.get('requested_formats') or protocol == 'http_dash_segments':
+ for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]):
+ stream_number = fmt.get('manifest_stream_number', 0)
+ a_or_v = 'a' if fmt.get('acodec') != 'none' else 'v'
+ args.extend(['-map', f'{i}:{a_or_v}:{stream_number}'])
if self.params.get('test', False):
args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
+ ext = info_dict['ext']
if protocol in ('m3u8', 'm3u8_native'):
- if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
+ use_mpegts = (tmpfilename == '-') or self.params.get('hls_use_mpegts')
+ if use_mpegts is None:
+ use_mpegts = info_dict.get('is_live')
+ if use_mpegts:
args += ['-f', 'mpegts']
else:
args += ['-f', 'mp4']
@@ -325,25 +465,33 @@ class FFmpegFD(ExternalFD):
args += ['-bsf:a', 'aac_adtstoasc']
elif protocol == 'rtmp':
args += ['-f', 'flv']
+ elif ext == 'mp4' and tmpfilename == '-':
+ args += ['-f', 'mpegts']
else:
- args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
+ args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
+
+ args += self._configuration_args(('_o1', '_o', ''))
args = [encodeArgument(opt) for opt in args]
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
-
self._debug_cmd(args)
proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
+ if url in ('-', 'pipe:'):
+ self.on_process_started(proc, proc.stdin)
try:
retval = proc.wait()
- except KeyboardInterrupt:
+ except BaseException as e:
# subprocces.run would send the SIGKILL signal to ffmpeg and the
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
# produces a file that is playable (this is mostly useful for live
# streams). Note that Windows is not affected and produces playable
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
- if sys.platform != 'win32':
- proc.communicate(b'q')
+ if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'):
+ process_communicate_or_kill(proc, b'q')
+ else:
+ proc.kill()
+ proc.wait()
raise
return retval
@@ -355,7 +503,7 @@ class AVconvFD(FFmpegFD):
_BY_NAME = dict(
(klass.get_basename(), klass)
for name, klass in globals().items()
- if name.endswith('FD') and name != 'ExternalFD'
+ if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD')
)
@@ -368,4 +516,4 @@ def get_external_downloader(external_downloader):
downloader . """
# Drop .exe extension on Windows
bn = os.path.splitext(os.path.basename(external_downloader))[0]
- return _BY_NAME[bn]
+ return _BY_NAME.get(bn)
diff --git a/hypervideo_dl/downloader/f4m.py b/hypervideo_dl/downloader/f4m.py
index 8dd3c2e..9da2776 100644
--- a/hypervideo_dl/downloader/f4m.py
+++ b/hypervideo_dl/downloader/f4m.py
@@ -267,13 +267,14 @@ class F4mFD(FragmentFD):
media = doc.findall(_add_ns('media'))
if not media:
self.report_error('No media found')
- for e in (doc.findall(_add_ns('drmAdditionalHeader'))
- + doc.findall(_add_ns('drmAdditionalHeaderSet'))):
- # If id attribute is missing it's valid for all media nodes
- # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
- if 'id' not in e.attrib:
- self.report_error('Missing ID in f4m DRM')
- media = remove_encrypted_media(media)
+ if not self.params.get('allow_unplayable_formats'):
+ for e in (doc.findall(_add_ns('drmAdditionalHeader'))
+ + doc.findall(_add_ns('drmAdditionalHeaderSet'))):
+ # If id attribute is missing it's valid for all media nodes
+ # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
+ if 'id' not in e.attrib:
+ self.report_error('Missing ID in f4m DRM')
+ media = remove_encrypted_media(media)
if not media:
self.report_error('Unsupported DRM')
return media
@@ -379,7 +380,7 @@ class F4mFD(FragmentFD):
base_url_parsed = compat_urllib_parse_urlparse(base_url)
- self._start_frag_download(ctx)
+ self._start_frag_download(ctx, info_dict)
frag_index = 0
while fragments_list:
@@ -433,6 +434,6 @@ class F4mFD(FragmentFD):
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
self.report_warning(msg)
- self._finish_frag_download(ctx)
+ self._finish_frag_download(ctx, info_dict)
return True
diff --git a/hypervideo_dl/downloader/fragment.py b/hypervideo_dl/downloader/fragment.py
index b82e3cf..57068db 100644
--- a/hypervideo_dl/downloader/fragment.py
+++ b/hypervideo_dl/downloader/fragment.py
@@ -3,10 +3,23 @@ from __future__ import division, unicode_literals
import os
import time
import json
+from math import ceil
+
+try:
+ import concurrent.futures
+ can_threaded_download = True
+except ImportError:
+ can_threaded_download = False
from .common import FileDownloader
from .http import HttpFD
+from ..aes import aes_cbc_decrypt_bytes
+from ..compat import (
+ compat_urllib_error,
+ compat_struct_pack,
+)
from ..utils import (
+ DownloadError,
error_to_compat_str,
encodeFilename,
sanitize_open,
@@ -31,6 +44,7 @@ class FragmentFD(FileDownloader):
Skip unavailable fragments (DASH and hlsnative only)
keep_fragments: Keep downloaded fragments on disk after downloading is
finished
+ _no_ytdl_file: Don't use .ytdl file
For each incomplete fragment download hypervideo keeps on disk a special
bookkeeping file with download state and metadata (in future such files will
@@ -55,29 +69,31 @@ class FragmentFD(FileDownloader):
def report_retry_fragment(self, err, frag_index, count, retries):
self.to_screen(
- '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...'
+ '\r[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...'
% (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
def report_skip_fragment(self, frag_index):
- self.to_screen('[download] Skipping fragment %d...' % frag_index)
+ self.to_screen('[download] Skipping fragment %d ...' % frag_index)
def _prepare_url(self, info_dict, url):
headers = info_dict.get('http_headers')
return sanitized_Request(url, None, headers) if headers else url
- def _prepare_and_start_frag_download(self, ctx):
+ def _prepare_and_start_frag_download(self, ctx, info_dict):
self._prepare_frag_download(ctx)
- self._start_frag_download(ctx)
+ self._start_frag_download(ctx, info_dict)
- @staticmethod
- def __do_ytdl_file(ctx):
- return not ctx['live'] and not ctx['tmpfilename'] == '-'
+ def __do_ytdl_file(self, ctx):
+ return not ctx['live'] and not ctx['tmpfilename'] == '-' and not self.params.get('_no_ytdl_file')
def _read_ytdl_file(self, ctx):
assert 'ytdl_corrupt' not in ctx
stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
try:
- ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
+ ytdl_data = json.loads(stream.read())
+ ctx['fragment_index'] = ytdl_data['downloader']['current_fragment']['index']
+ if 'extra_state' in ytdl_data['downloader']:
+ ctx['extra_state'] = ytdl_data['downloader']['extra_state']
except Exception:
ctx['ytdl_corrupt'] = True
finally:
@@ -85,32 +101,42 @@ class FragmentFD(FileDownloader):
def _write_ytdl_file(self, ctx):
frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
- downloader = {
- 'current_fragment': {
- 'index': ctx['fragment_index'],
- },
- }
- if ctx.get('fragment_count') is not None:
- downloader['fragment_count'] = ctx['fragment_count']
- frag_index_stream.write(json.dumps({'downloader': downloader}))
- frag_index_stream.close()
+ try:
+ downloader = {
+ 'current_fragment': {
+ 'index': ctx['fragment_index'],
+ },
+ }
+ if 'extra_state' in ctx:
+ downloader['extra_state'] = ctx['extra_state']
+ if ctx.get('fragment_count') is not None:
+ downloader['fragment_count'] = ctx['fragment_count']
+ frag_index_stream.write(json.dumps({'downloader': downloader}))
+ finally:
+ frag_index_stream.close()
- def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
+ def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None):
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
fragment_info_dict = {
'url': frag_url,
'http_headers': headers or info_dict.get('http_headers'),
+ 'request_data': request_data,
+ 'ctx_id': ctx.get('ctx_id'),
}
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success:
return False, None
if fragment_info_dict.get('filetime'):
ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
- down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
+ ctx['fragment_filename_sanitized'] = fragment_filename
+ return True, self._read_fragment(ctx)
+
+ def _read_fragment(self, ctx):
+ down, frag_sanitized = sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
ctx['fragment_filename_sanitized'] = frag_sanitized
frag_content = down.read()
down.close()
- return True, frag_content
+ return frag_content
def _append_fragment(self, ctx, frag_content):
try:
@@ -173,7 +199,7 @@ class FragmentFD(FileDownloader):
'.ytdl file is corrupt' if is_corrupt else
'Inconsistent state of incomplete fragment download')
self.report_warning(
- '%s. Restarting from the beginning...' % message)
+ '%s. Restarting from the beginning ...' % message)
ctx['fragment_index'] = resume_len = 0
if 'ytdl_corrupt' in ctx:
del ctx['ytdl_corrupt']
@@ -192,9 +218,10 @@ class FragmentFD(FileDownloader):
'complete_frags_downloaded_bytes': resume_len,
})
- def _start_frag_download(self, ctx):
+ def _start_frag_download(self, ctx, info_dict):
resume_len = ctx['complete_frags_downloaded_bytes']
total_frags = ctx['total_frags']
+ ctx_id = ctx.get('ctx_id')
# This dict stores the download progress, it's updated by the progress
# hook
state = {
@@ -218,9 +245,16 @@ class FragmentFD(FileDownloader):
if s['status'] not in ('downloading', 'finished'):
return
+ if ctx_id is not None and s.get('ctx_id') != ctx_id:
+ return
+
+ state['max_progress'] = ctx.get('max_progress')
+ state['progress_idx'] = ctx.get('progress_idx')
+
time_now = time.time()
state['elapsed'] = time_now - start
frag_total_bytes = s.get('total_bytes') or 0
+ s['fragment_info_dict'] = s.pop('info_dict', {})
if not ctx['live']:
estimated_size = (
(ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
@@ -243,13 +277,13 @@ class FragmentFD(FileDownloader):
state['speed'] = s.get('speed') or ctx.get('speed')
ctx['speed'] = state['speed']
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
- self._hook_progress(state)
+ self._hook_progress(state, info_dict)
ctx['dl'].add_progress_hook(frag_progress_hook)
return start
- def _finish_frag_download(self, ctx):
+ def _finish_frag_download(self, ctx, info_dict):
ctx['dest_stream'].close()
if self.__do_ytdl_file(ctx):
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
@@ -276,4 +310,177 @@ class FragmentFD(FileDownloader):
'filename': ctx['filename'],
'status': 'finished',
'elapsed': elapsed,
+ 'ctx_id': ctx.get('ctx_id'),
+ 'max_progress': ctx.get('max_progress'),
+ 'progress_idx': ctx.get('progress_idx'),
+ }, info_dict)
+
+ def _prepare_external_frag_download(self, ctx):
+ if 'live' not in ctx:
+ ctx['live'] = False
+ if not ctx['live']:
+ total_frags_str = '%d' % ctx['total_frags']
+ ad_frags = ctx.get('ad_frags', 0)
+ if ad_frags:
+ total_frags_str += ' (not including %d ad)' % ad_frags
+ else:
+ total_frags_str = 'unknown (live)'
+ self.to_screen(
+ '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
+
+ tmpfilename = self.temp_name(ctx['filename'])
+
+ # Should be initialized before ytdl file check
+ ctx.update({
+ 'tmpfilename': tmpfilename,
+ 'fragment_index': 0,
})
+
+ def decrypter(self, info_dict):
+ _key_cache = {}
+
+ def _get_key(url):
+ if url not in _key_cache:
+ _key_cache[url] = self.ydl.urlopen(self._prepare_url(info_dict, url)).read()
+ return _key_cache[url]
+
+ def decrypt_fragment(fragment, frag_content):
+ decrypt_info = fragment.get('decrypt_info')
+ if not decrypt_info or decrypt_info['METHOD'] != 'AES-128':
+ return frag_content
+ iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence'])
+ decrypt_info['KEY'] = decrypt_info.get('KEY') or _get_key(info_dict.get('_decryption_key_url') or decrypt_info['URI'])
+ # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
+ # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
+ # not what it decrypts to.
+ if self.params.get('test', False):
+ return frag_content
+ padding_len = 16 - (len(frag_content) % 16)
+ decrypted_data = aes_cbc_decrypt_bytes(frag_content + bytes([padding_len] * padding_len), decrypt_info['KEY'], iv)
+ return decrypted_data[:-decrypted_data[-1]]
+
+ return decrypt_fragment
+
+ def download_and_append_fragments_multiple(self, *args, pack_func=None, finish_func=None):
+ '''
+ @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ...
+ all args must be either tuple or list
+ '''
+ max_progress = len(args)
+ if max_progress == 1:
+ return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func)
+ max_workers = self.params.get('concurrent_fragment_downloads', max_progress)
+ self._prepare_multiline_status(max_progress)
+
+ def thread_func(idx, ctx, fragments, info_dict, tpe):
+ ctx['max_progress'] = max_progress
+ ctx['progress_idx'] = idx
+ return self.download_and_append_fragments(ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func, tpe=tpe)
+
+ class FTPE(concurrent.futures.ThreadPoolExecutor):
+ # has to stop this or it's going to wait on the worker thread itself
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ pass
+
+ spins = []
+ for idx, (ctx, fragments, info_dict) in enumerate(args):
+ tpe = FTPE(ceil(max_workers / max_progress))
+ job = tpe.submit(thread_func, idx, ctx, fragments, info_dict, tpe)
+ spins.append((tpe, job))
+
+ result = True
+ for tpe, job in spins:
+ try:
+ result = result and job.result()
+ finally:
+ tpe.shutdown(wait=True)
+ return result
+
+ def download_and_append_fragments(self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None, tpe=None):
+ fragment_retries = self.params.get('fragment_retries', 0)
+ is_fatal = (lambda idx: idx == 0) if self.params.get('skip_unavailable_fragments', True) else (lambda _: True)
+ if not pack_func:
+ pack_func = lambda frag_content, _: frag_content
+
+ def download_fragment(fragment, ctx):
+ frag_index = ctx['fragment_index'] = fragment['frag_index']
+ headers = info_dict.get('http_headers', {}).copy()
+ byte_range = fragment.get('byte_range')
+ if byte_range:
+ headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
+
+ # Never skip the first fragment
+ fatal = is_fatal(fragment.get('index') or (frag_index - 1))
+ count, frag_content = 0, None
+ while count <= fragment_retries:
+ try:
+ success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers)
+ if not success:
+ return False, frag_index
+ break
+ except compat_urllib_error.HTTPError as err:
+ # Unavailable (possibly temporary) fragments may be served.
+ # First we try to retry then either skip or abort.
+ # See https://github.com/ytdl-org/youtube-dl/issues/10165,
+ # https://github.com/ytdl-org/youtube-dl/issues/10448).
+ count += 1
+ if count <= fragment_retries:
+ self.report_retry_fragment(err, frag_index, count, fragment_retries)
+ except DownloadError:
+ # Don't retry fragment if error occurred during HTTP downloading
+ # itself since it has own retry settings
+ if not fatal:
+ break
+ raise
+
+ if count > fragment_retries:
+ if not fatal:
+ return False, frag_index
+ ctx['dest_stream'].close()
+ self.report_error('Giving up after %s fragment retries' % fragment_retries)
+ return False, frag_index
+ return frag_content, frag_index
+
+ def append_fragment(frag_content, frag_index, ctx):
+ if not frag_content:
+ if not is_fatal(frag_index - 1):
+ self.report_skip_fragment(frag_index)
+ return True
+ else:
+ ctx['dest_stream'].close()
+ self.report_error(
+ 'fragment %s not found, unable to continue' % frag_index)
+ return False
+ self._append_fragment(ctx, pack_func(frag_content, frag_index))
+ return True
+
+ decrypt_fragment = self.decrypter(info_dict)
+
+ max_workers = self.params.get('concurrent_fragment_downloads', 1)
+ if can_threaded_download and max_workers > 1:
+
+ def _download_fragment(fragment):
+ ctx_copy = ctx.copy()
+ frag_content, frag_index = download_fragment(fragment, ctx_copy)
+ return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized')
+
+ self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
+ with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
+ for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments):
+ ctx['fragment_filename_sanitized'] = frag_filename
+ ctx['fragment_index'] = frag_index
+ result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
+ if not result:
+ return False
+ else:
+ for fragment in fragments:
+ frag_content, frag_index = download_fragment(fragment, ctx)
+ result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
+ if not result:
+ return False
+
+ if finish_func is not None:
+ ctx['dest_stream'].write(finish_func())
+ ctx['dest_stream'].flush()
+ self._finish_frag_download(ctx, info_dict)
+ return True
diff --git a/hypervideo_dl/downloader/hls.py b/hypervideo_dl/downloader/hls.py
index 7aaebc9..61312c5 100644
--- a/hypervideo_dl/downloader/hls.py
+++ b/hypervideo_dl/downloader/hls.py
@@ -1,36 +1,37 @@
from __future__ import unicode_literals
import re
+import io
import binascii
-try:
- from Crypto.Cipher import AES
- can_decrypt_frag = True
-except ImportError:
- can_decrypt_frag = False
+from ..downloader import get_suitable_downloader
from .fragment import FragmentFD
from .external import FFmpegFD
from ..compat import (
- compat_urllib_error,
+ compat_pycrypto_AES,
compat_urlparse,
- compat_struct_pack,
)
from ..utils import (
parse_m3u8_attributes,
update_url_query,
+ bug_reports_message,
)
+from .. import webvtt
class HlsFD(FragmentFD):
- """ A limited implementation that does not require ffmpeg """
+ """
+ Download segments in a m3u8 manifest. External downloaders can take over
+ the fragment downloads by supporting the 'm3u8_frag_urls' protocol and
+ re-defining 'supports_manifest' function
+ """
FD_NAME = 'hlsnative'
@staticmethod
- def can_download(manifest, info_dict):
- UNSUPPORTED_FEATURES = (
- r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
+ def can_download(manifest, info_dict, allow_unplayable_formats=False):
+ UNSUPPORTED_FEATURES = [
# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
# Live streams heuristic does not always work (e.g. geo restricted to Germany
@@ -42,20 +43,23 @@ class HlsFD(FragmentFD):
# no segments will definitely be appended to the end of the playlist.
# r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
# # event media playlists [4]
- r'#EXT-X-MAP:', # media initialization [5]
-
+ # r'#EXT-X-MAP:', # media initialization [5]
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
# 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5
- )
- check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
- is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
- check_results.append(can_decrypt_frag or not is_aes128_enc)
- check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest))
- check_results.append(not info_dict.get('is_live'))
- return all(check_results)
+ ]
+ if not allow_unplayable_formats:
+ UNSUPPORTED_FEATURES += [
+ r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
+ ]
+
+ def check_results():
+ yield not info_dict.get('is_live')
+ for feature in UNSUPPORTED_FEATURES:
+ yield not re.search(feature, manifest)
+ return all(check_results())
def real_download(self, filename, info_dict):
man_url = info_dict['url']
@@ -65,17 +69,32 @@ class HlsFD(FragmentFD):
man_url = urlh.geturl()
s = urlh.read().decode('utf-8', 'ignore')
- if not self.can_download(s, info_dict):
- if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'):
- self.report_error('pycrypto not found. Please install it.')
- return False
- self.report_warning(
- 'hlsnative has detected features it does not support, '
- 'extraction will be delegated to ffmpeg')
+ can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
+ if can_download and not compat_pycrypto_AES and '#EXT-X-KEY:METHOD=AES-128' in s:
+ if FFmpegFD.available():
+ can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available'
+ else:
+ message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; '
+ 'Decryption will be performed natively, but will be extremely slow')
+ if not can_download:
+ message = message or 'Unsupported features have been detected'
fd = FFmpegFD(self.ydl, self.params)
- for ph in self._progress_hooks:
- fd.add_progress_hook(ph)
+ self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}')
return fd.real_download(filename, info_dict)
+ elif message:
+ self.report_warning(message)
+
+ is_webvtt = info_dict['ext'] == 'vtt'
+ if is_webvtt:
+ real_downloader = None # Packing the fragments is not currently supported for external downloader
+ else:
+ real_downloader = get_suitable_downloader(
+ info_dict, self.params, None, protocol='m3u8_frag_urls', to_stdout=(filename == '-'))
+ if real_downloader and not real_downloader.supports_manifest(s):
+ real_downloader = None
+ if real_downloader:
+ self.to_screen(
+ '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
def is_ad_fragment_start(s):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
@@ -85,6 +104,8 @@ class HlsFD(FragmentFD):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
+ fragments = []
+
media_frags = 0
ad_frags = 0
ad_frag_next = False
@@ -109,12 +130,14 @@ class HlsFD(FragmentFD):
'ad_frags': ad_frags,
}
- self._prepare_and_start_frag_download(ctx)
+ if real_downloader:
+ self._prepare_external_frag_download(ctx)
+ else:
+ self._prepare_and_start_frag_download(ctx, info_dict)
- fragment_retries = self.params.get('fragment_retries', 0)
- skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
- test = self.params.get('test', False)
+ extra_state = ctx.setdefault('extra_state', {})
+ format_index = info_dict.get('format_index')
extra_query = None
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
if extra_param_to_segment_url:
@@ -123,12 +146,15 @@ class HlsFD(FragmentFD):
media_sequence = 0
decrypt_info = {'METHOD': 'NONE'}
byte_range = {}
+ discontinuity_count = 0
frag_index = 0
ad_frag_next = False
for line in s.splitlines():
line = line.strip()
if line:
if not line.startswith('#'):
+ if format_index and discontinuity_count != format_index:
+ continue
if ad_frag_next:
continue
frag_index += 1
@@ -140,50 +166,49 @@ class HlsFD(FragmentFD):
else compat_urlparse.urljoin(man_url, line))
if extra_query:
frag_url = update_url_query(frag_url, extra_query)
- count = 0
- headers = info_dict.get('http_headers', {})
- if byte_range:
- headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
- while count <= fragment_retries:
- try:
- success, frag_content = self._download_fragment(
- ctx, frag_url, info_dict, headers)
- if not success:
- return False
- break
- except compat_urllib_error.HTTPError as err:
- # Unavailable (possibly temporary) fragments may be served.
- # First we try to retry then either skip or abort.
- # See https://github.com/ytdl-org/youtube-dl/issues/10165,
- # https://github.com/ytdl-org/youtube-dl/issues/10448).
- count += 1
- if count <= fragment_retries:
- self.report_retry_fragment(err, frag_index, count, fragment_retries)
- if count > fragment_retries:
- if skip_unavailable_fragments:
- i += 1
- media_sequence += 1
- self.report_skip_fragment(frag_index)
- continue
+
+ fragments.append({
+ 'frag_index': frag_index,
+ 'url': frag_url,
+ 'decrypt_info': decrypt_info,
+ 'byte_range': byte_range,
+ 'media_sequence': media_sequence,
+ })
+ media_sequence += 1
+
+ elif line.startswith('#EXT-X-MAP'):
+ if format_index and discontinuity_count != format_index:
+ continue
+ if frag_index > 0:
self.report_error(
- 'giving up after %s fragment retries' % fragment_retries)
+ 'Initialization fragment found after media fragments, unable to download')
return False
- if decrypt_info['METHOD'] == 'AES-128':
- iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
- decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
- self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
- # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
- # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
- # not what it decrypts to.
- if not test:
- frag_content = AES.new(
- decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
- self._append_fragment(ctx, frag_content)
- # We only download the first fragment during the test
- if test:
- break
- i += 1
+ frag_index += 1
+ map_info = parse_m3u8_attributes(line[11:])
+ frag_url = (
+ map_info.get('URI')
+ if re.match(r'^https?://', map_info.get('URI'))
+ else compat_urlparse.urljoin(man_url, map_info.get('URI')))
+ if extra_query:
+ frag_url = update_url_query(frag_url, extra_query)
+
+ fragments.append({
+ 'frag_index': frag_index,
+ 'url': frag_url,
+ 'decrypt_info': decrypt_info,
+ 'byte_range': byte_range,
+ 'media_sequence': media_sequence
+ })
media_sequence += 1
+
+ if map_info.get('BYTERANGE'):
+ splitted_byte_range = map_info.get('BYTERANGE').split('@')
+ sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
+ byte_range = {
+ 'start': sub_range_start,
+ 'end': sub_range_start + int(splitted_byte_range[0]),
+ }
+
elif line.startswith('#EXT-X-KEY'):
decrypt_url = decrypt_info.get('URI')
decrypt_info = parse_m3u8_attributes(line[11:])
@@ -197,6 +222,7 @@ class HlsFD(FragmentFD):
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
if decrypt_url != decrypt_info['URI']:
decrypt_info['KEY'] = None
+
elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
media_sequence = int(line[22:])
elif line.startswith('#EXT-X-BYTERANGE'):
@@ -210,7 +236,114 @@ class HlsFD(FragmentFD):
ad_frag_next = True
elif is_ad_fragment_end(line):
ad_frag_next = False
+ elif line.startswith('#EXT-X-DISCONTINUITY'):
+ discontinuity_count += 1
+ i += 1
+
+ # We only download the first fragment during the test
+ if self.params.get('test', False):
+ fragments = [fragments[0] if fragments else None]
+
+ if real_downloader:
+ info_dict['fragments'] = fragments
+ fd = real_downloader(self.ydl, self.params)
+ # TODO: Make progress updates work without hooking twice
+ # for ph in self._progress_hooks:
+ # fd.add_progress_hook(ph)
+ return fd.real_download(filename, info_dict)
+
+ if is_webvtt:
+ def pack_fragment(frag_content, frag_index):
+ output = io.StringIO()
+ adjust = 0
+ overflow = False
+ mpegts_last = None
+ for block in webvtt.parse_fragment(frag_content):
+ if isinstance(block, webvtt.CueBlock):
+ extra_state['webvtt_mpegts_last'] = mpegts_last
+ if overflow:
+ extra_state['webvtt_mpegts_adjust'] += 1
+ overflow = False
+ block.start += adjust
+ block.end += adjust
+
+ dedup_window = extra_state.setdefault('webvtt_dedup_window', [])
+
+ ready = []
+
+ i = 0
+ is_new = True
+ while i < len(dedup_window):
+ wcue = dedup_window[i]
+ wblock = webvtt.CueBlock.from_json(wcue)
+ i += 1
+ if wblock.hinges(block):
+ wcue['end'] = block.end
+ is_new = False
+ continue
+ if wblock == block:
+ is_new = False
+ continue
+ if wblock.end > block.start:
+ continue
+ ready.append(wblock)
+ i -= 1
+ del dedup_window[i]
+
+ if is_new:
+ dedup_window.append(block.as_json)
+ for block in ready:
+ block.write_into(output)
+
+ # we only emit cues once they fall out of the duplicate window
+ continue
+ elif isinstance(block, webvtt.Magic):
+ # take care of MPEG PES timestamp overflow
+ if block.mpegts is None:
+ block.mpegts = 0
+ extra_state.setdefault('webvtt_mpegts_adjust', 0)
+ block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33
+ if block.mpegts < extra_state.get('webvtt_mpegts_last', 0):
+ overflow = True
+ block.mpegts += 1 << 33
+ mpegts_last = block.mpegts
+
+ if frag_index == 1:
+ extra_state['webvtt_mpegts'] = block.mpegts or 0
+ extra_state['webvtt_local'] = block.local or 0
+ # XXX: block.local = block.mpegts = None ?
+ else:
+ if block.mpegts is not None and block.local is not None:
+ adjust = (
+ (block.mpegts - extra_state.get('webvtt_mpegts', 0))
+ - (block.local - extra_state.get('webvtt_local', 0))
+ )
+ continue
+ elif isinstance(block, webvtt.HeaderBlock):
+ if frag_index != 1:
+ # XXX: this should probably be silent as well
+ # or verify that all segments contain the same data
+ self.report_warning(bug_reports_message(
+ 'Discarding a %s block found in the middle of the stream; '
+ 'if the subtitles display incorrectly,'
+ % (type(block).__name__)))
+ continue
+ block.write_into(output)
+
+ return output.getvalue().encode('utf-8')
+
+ def fin_fragments():
+ dedup_window = extra_state.get('webvtt_dedup_window')
+ if not dedup_window:
+ return b''
+
+ output = io.StringIO()
+ for cue in dedup_window:
+ webvtt.CueBlock.from_json(cue).write_into(output)
- self._finish_frag_download(ctx)
+ return output.getvalue().encode('utf-8')
- return True
+ self.download_and_append_fragments(
+ ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
+ else:
+ return self.download_and_append_fragments(ctx, fragments, info_dict)
diff --git a/hypervideo_dl/downloader/http.py b/hypervideo_dl/downloader/http.py
index d8ac41d..2e95bb9 100644
--- a/hypervideo_dl/downloader/http.py
+++ b/hypervideo_dl/downloader/http.py
@@ -18,6 +18,7 @@ from ..utils import (
int_or_none,
sanitize_open,
sanitized_Request,
+ ThrottledDownload,
write_xattr,
XAttrMetadataError,
XAttrUnavailableError,
@@ -27,6 +28,7 @@ from ..utils import (
class HttpFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
+ request_data = info_dict.get('request_data', None)
class DownloadContext(dict):
__getattr__ = dict.get
@@ -46,8 +48,9 @@ class HttpFD(FileDownloader):
is_test = self.params.get('test', False)
chunk_size = self._TEST_FILE_SIZE if is_test else (
- info_dict.get('downloader_options', {}).get('http_chunk_size')
- or self.params.get('http_chunk_size') or 0)
+ self.params.get('http_chunk_size')
+ or info_dict.get('downloader_options', {}).get('http_chunk_size')
+ or 0)
ctx.open_mode = 'wb'
ctx.resume_len = 0
@@ -55,6 +58,7 @@ class HttpFD(FileDownloader):
ctx.block_size = self.params.get('buffersize', 1024)
ctx.start_time = time.time()
ctx.chunk_size = None
+ throttle_start = None
if self.params.get('continuedl', True):
# Establish possible resume length
@@ -101,7 +105,7 @@ class HttpFD(FileDownloader):
range_end = ctx.data_len - 1
has_range = range_start is not None
ctx.has_range = has_range
- request = sanitized_Request(url, None, headers)
+ request = sanitized_Request(url, request_data, headers)
if has_range:
set_range(request, range_start, range_end)
# Establish connection
@@ -152,7 +156,7 @@ class HttpFD(FileDownloader):
try:
# Open the connection again without the range header
ctx.data = self.ydl.urlopen(
- sanitized_Request(url, None, headers))
+ sanitized_Request(url, request_data, headers))
content_length = ctx.data.info()['Content-Length']
except (compat_urllib_error.HTTPError, ) as err:
if err.code < 500 or err.code >= 600:
@@ -175,7 +179,7 @@ class HttpFD(FileDownloader):
'status': 'finished',
'downloaded_bytes': ctx.resume_len,
'total_bytes': ctx.resume_len,
- })
+ }, info_dict)
raise SucceedDownload()
else:
# The length does not match, we start the download over
@@ -194,6 +198,7 @@ class HttpFD(FileDownloader):
raise RetryDownload(err)
def download():
+ nonlocal throttle_start
data_len = ctx.data.info().get('Content-length', None)
# Range HTTP header may be ignored/unsupported by a webserver
@@ -235,7 +240,7 @@ class HttpFD(FileDownloader):
while True:
try:
# Download and write
- data_block = ctx.data.read(block_size if data_len is None else min(block_size, data_len - byte_counter))
+ data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
# socket.timeout is a subclass of socket.error but may not have
# errno set
except socket.timeout as e:
@@ -307,11 +312,24 @@ class HttpFD(FileDownloader):
'eta': eta,
'speed': speed,
'elapsed': now - ctx.start_time,
- })
+ 'ctx_id': info_dict.get('ctx_id'),
+ }, info_dict)
if data_len is not None and byte_counter == data_len:
break
+ if speed and speed < (self.params.get('throttledratelimit') or 0):
+ # The speed must stay below the limit for 3 seconds
+ # This prevents raising error when the speed temporarily goes down
+ if throttle_start is None:
+ throttle_start = now
+ elif now - throttle_start > 3:
+ if ctx.stream is not None and ctx.tmpfilename != '-':
+ ctx.stream.close()
+ raise ThrottledDownload()
+ elif speed:
+ throttle_start = None
+
if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
ctx.resume_len = byte_counter
# ctx.block_size = block_size
@@ -342,7 +360,8 @@ class HttpFD(FileDownloader):
'filename': ctx.filename,
'status': 'finished',
'elapsed': time.time() - ctx.start_time,
- })
+ 'ctx_id': info_dict.get('ctx_id'),
+ }, info_dict)
return True
@@ -354,6 +373,8 @@ class HttpFD(FileDownloader):
count += 1
if count <= retries:
self.report_retry(e.source_error, count, retries)
+ else:
+ self.to_screen(f'[download] Got server HTTP error: {e.source_error}')
continue
except NextFragment:
continue
diff --git a/hypervideo_dl/downloader/ism.py b/hypervideo_dl/downloader/ism.py
index 1ca666b..09516ab 100644
--- a/hypervideo_dl/downloader/ism.py
+++ b/hypervideo_dl/downloader/ism.py
@@ -48,7 +48,7 @@ def write_piff_header(stream, params):
language = params.get('language', 'und')
height = params.get('height', 0)
width = params.get('width', 0)
- is_audio = width == 0 and height == 0
+ stream_type = params['stream_type']
creation_time = modification_time = int(time.time())
ftyp_payload = b'isml' # major brand
@@ -77,7 +77,7 @@ def write_piff_header(stream, params):
tkhd_payload += u32.pack(0) * 2 # reserved
tkhd_payload += s16.pack(0) # layer
tkhd_payload += s16.pack(0) # alternate group
- tkhd_payload += s88.pack(1 if is_audio else 0) # volume
+ tkhd_payload += s88.pack(1 if stream_type == 'audio' else 0) # volume
tkhd_payload += u16.pack(0) # reserved
tkhd_payload += unity_matrix
tkhd_payload += u1616.pack(width)
@@ -93,19 +93,34 @@ def write_piff_header(stream, params):
mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box
hdlr_payload = u32.pack(0) # pre defined
- hdlr_payload += b'soun' if is_audio else b'vide' # handler type
- hdlr_payload += u32.pack(0) * 3 # reserved
- hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name
+ if stream_type == 'audio': # handler type
+ hdlr_payload += b'soun'
+ hdlr_payload += u32.pack(0) * 3 # reserved
+ hdlr_payload += b'SoundHandler\0' # name
+ elif stream_type == 'video':
+ hdlr_payload += b'vide'
+ hdlr_payload += u32.pack(0) * 3 # reserved
+ hdlr_payload += b'VideoHandler\0' # name
+ elif stream_type == 'text':
+ hdlr_payload += b'subt'
+ hdlr_payload += u32.pack(0) * 3 # reserved
+ hdlr_payload += b'SubtitleHandler\0' # name
+ else:
+ assert False
mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box
- if is_audio:
+ if stream_type == 'audio':
smhd_payload = s88.pack(0) # balance
smhd_payload += u16.pack(0) # reserved
media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
- else:
+ elif stream_type == 'video':
vmhd_payload = u16.pack(0) # graphics mode
vmhd_payload += u16.pack(0) * 3 # opcolor
media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header
+ elif stream_type == 'text':
+ media_header_box = full_box(b'sthd', 0, 0, b'') # Subtitle Media Header
+ else:
+ assert False
minf_payload = media_header_box
dref_payload = u32.pack(1) # entry count
@@ -117,7 +132,7 @@ def write_piff_header(stream, params):
sample_entry_payload = u8.pack(0) * 6 # reserved
sample_entry_payload += u16.pack(1) # data reference index
- if is_audio:
+ if stream_type == 'audio':
sample_entry_payload += u32.pack(0) * 2 # reserved
sample_entry_payload += u16.pack(params.get('channels', 2))
sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
@@ -127,7 +142,7 @@ def write_piff_header(stream, params):
if fourcc == 'AACL':
sample_entry_box = box(b'mp4a', sample_entry_payload)
- else:
+ elif stream_type == 'video':
sample_entry_payload += u16.pack(0) # pre defined
sample_entry_payload += u16.pack(0) # reserved
sample_entry_payload += u32.pack(0) * 3 # pre defined
@@ -155,6 +170,18 @@ def write_piff_header(stream, params):
avcc_payload += pps
sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record
sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
+ else:
+ assert False
+ elif stream_type == 'text':
+ if fourcc == 'TTML':
+ sample_entry_payload += b'http://www.w3.org/ns/ttml\0' # namespace
+ sample_entry_payload += b'\0' # schema location
+ sample_entry_payload += b'\0' # auxilary mime types(??)
+ sample_entry_box = box(b'stpp', sample_entry_payload)
+ else:
+ assert False
+ else:
+ assert False
stsd_payload += sample_entry_box
stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box
@@ -219,12 +246,15 @@ class IsmFD(FragmentFD):
'total_frags': len(segments),
}
- self._prepare_and_start_frag_download(ctx)
+ self._prepare_and_start_frag_download(ctx, info_dict)
+
+ extra_state = ctx.setdefault('extra_state', {
+ 'ism_track_written': False,
+ })
fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
- track_written = False
frag_index = 0
for i, segment in enumerate(segments):
frag_index += 1
@@ -236,11 +266,11 @@ class IsmFD(FragmentFD):
success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
if not success:
return False
- if not track_written:
+ if not extra_state['ism_track_written']:
tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
- track_written = True
+ extra_state['ism_track_written'] = True
self._append_fragment(ctx, frag_content)
break
except compat_urllib_error.HTTPError as err:
@@ -254,6 +284,6 @@ class IsmFD(FragmentFD):
self.report_error('giving up after %s fragment retries' % fragment_retries)
return False
- self._finish_frag_download(ctx)
+ self._finish_frag_download(ctx, info_dict)
return True
diff --git a/hypervideo_dl/downloader/mhtml.py b/hypervideo_dl/downloader/mhtml.py
new file mode 100644
index 0000000..f0f4dc6
--- /dev/null
+++ b/hypervideo_dl/downloader/mhtml.py
@@ -0,0 +1,202 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import io
+import quopri
+import re
+import uuid
+
+from .fragment import FragmentFD
+from ..utils import (
+ escapeHTML,
+ formatSeconds,
+ srt_subtitles_timecode,
+ urljoin,
+)
+from ..version import __version__ as YT_DLP_VERSION
+
+
+class MhtmlFD(FragmentFD):
+ FD_NAME = 'mhtml'
+
+ _STYLESHEET = """\
+html, body {
+ margin: 0;
+ padding: 0;
+ height: 100vh;
+}
+
+html {
+ overflow-y: scroll;
+ scroll-snap-type: y mandatory;
+}
+
+body {
+ scroll-snap-type: y mandatory;
+ display: flex;
+ flex-flow: column;
+}
+
+body > figure {
+ max-width: 100vw;
+ max-height: 100vh;
+ scroll-snap-align: center;
+}
+
+body > figure > figcaption {
+ text-align: center;
+ height: 2.5em;
+}
+
+body > figure > img {
+ display: block;
+ margin: auto;
+ max-width: 100%;
+ max-height: calc(100vh - 5em);
+}
+"""
+ _STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET)
+ _STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET)
+
+ @staticmethod
+ def _escape_mime(s):
+ return '=?utf-8?Q?' + (b''.join(
+ bytes((b,)) if b >= 0x20 else b'=%02X' % b
+ for b in quopri.encodestring(s.encode('utf-8'), header=True)
+ )).decode('us-ascii') + '?='
+
+ def _gen_cid(self, i, fragment, frag_boundary):
+ return '%u.%s@hypervideo.github.io.invalid' % (i, frag_boundary)
+
+ def _gen_stub(self, *, fragments, frag_boundary, title):
+ output = io.StringIO()
+
+ output.write((
+ '<!DOCTYPE html>'
+ '<html>'
+ '<head>'
+ '' '<meta name="generator" content="hypervideo {version}">'
+ '' '<title>{title}</title>'
+ '' '<style>{styles}</style>'
+ '<body>'
+ ).format(
+ version=escapeHTML(YT_DLP_VERSION),
+ styles=self._STYLESHEET,
+ title=escapeHTML(title)
+ ))
+
+ t0 = 0
+ for i, frag in enumerate(fragments):
+ output.write('<figure>')
+ try:
+ t1 = t0 + frag['duration']
+ output.write((
+ '<figcaption>Slide #{num}: {t0} – {t1} (duration: {duration})</figcaption>'
+ ).format(
+ num=i + 1,
+ t0=srt_subtitles_timecode(t0),
+ t1=srt_subtitles_timecode(t1),
+ duration=formatSeconds(frag['duration'], msec=True)
+ ))
+ except (KeyError, ValueError, TypeError):
+ t1 = None
+ output.write((
+ '<figcaption>Slide #{num}</figcaption>'
+ ).format(num=i + 1))
+ output.write('<img src="cid:{cid}">'.format(
+ cid=self._gen_cid(i, frag, frag_boundary)))
+ output.write('</figure>')
+ t0 = t1
+
+ return output.getvalue()
+
+ def real_download(self, filename, info_dict):
+ fragment_base_url = info_dict.get('fragment_base_url')
+ fragments = info_dict['fragments'][:1] if self.params.get(
+ 'test', False) else info_dict['fragments']
+ title = info_dict['title']
+ origin = info_dict['webpage_url']
+
+ ctx = {
+ 'filename': filename,
+ 'total_frags': len(fragments),
+ }
+
+ self._prepare_and_start_frag_download(ctx, info_dict)
+
+ extra_state = ctx.setdefault('extra_state', {
+ 'header_written': False,
+ 'mime_boundary': str(uuid.uuid4()).replace('-', ''),
+ })
+
+ frag_boundary = extra_state['mime_boundary']
+
+ if not extra_state['header_written']:
+ stub = self._gen_stub(
+ fragments=fragments,
+ frag_boundary=frag_boundary,
+ title=title
+ )
+
+ ctx['dest_stream'].write((
+ 'MIME-Version: 1.0\r\n'
+ 'From: <nowhere@hypervideo.github.io.invalid>\r\n'
+ 'To: <nowhere@hypervideo.github.io.invalid>\r\n'
+ 'Subject: {title}\r\n'
+ 'Content-type: multipart/related; '
+ '' 'boundary="{boundary}"; '
+ '' 'type="text/html"\r\n'
+ 'X.hypervideo.Origin: {origin}\r\n'
+ '\r\n'
+ '--{boundary}\r\n'
+ 'Content-Type: text/html; charset=utf-8\r\n'
+ 'Content-Length: {length}\r\n'
+ '\r\n'
+ '{stub}\r\n'
+ ).format(
+ origin=origin,
+ boundary=frag_boundary,
+ length=len(stub),
+ title=self._escape_mime(title),
+ stub=stub
+ ).encode('utf-8'))
+ extra_state['header_written'] = True
+
+ for i, fragment in enumerate(fragments):
+ if (i + 1) <= ctx['fragment_index']:
+ continue
+
+ fragment_url = urljoin(fragment_base_url, fragment['path'])
+ success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
+ if not success:
+ continue
+
+ mime_type = b'image/jpeg'
+ if frag_content.startswith(b'\x89PNG\r\n\x1a\n'):
+ mime_type = b'image/png'
+ if frag_content.startswith((b'GIF87a', b'GIF89a')):
+ mime_type = b'image/gif'
+ if frag_content.startswith(b'RIFF') and frag_content[8:12] == 'WEBP':
+ mime_type = b'image/webp'
+
+ frag_header = io.BytesIO()
+ frag_header.write(
+ b'--%b\r\n' % frag_boundary.encode('us-ascii'))
+ frag_header.write(
+ b'Content-ID: <%b>\r\n' % self._gen_cid(i, fragment, frag_boundary).encode('us-ascii'))
+ frag_header.write(
+ b'Content-type: %b\r\n' % mime_type)
+ frag_header.write(
+ b'Content-length: %u\r\n' % len(frag_content))
+ frag_header.write(
+ b'Content-location: %b\r\n' % fragment_url.encode('us-ascii'))
+ frag_header.write(
+ b'X.hypervideo.Duration: %f\r\n' % fragment['duration'])
+ frag_header.write(b'\r\n')
+ self._append_fragment(
+ ctx, frag_header.getvalue() + frag_content + b'\r\n')
+
+ ctx['dest_stream'].write(
+ b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii'))
+ self._finish_frag_download(ctx, info_dict)
+ return True
diff --git a/hypervideo_dl/downloader/niconico.py b/hypervideo_dl/downloader/niconico.py
new file mode 100644
index 0000000..521dfec
--- /dev/null
+++ b/hypervideo_dl/downloader/niconico.py
@@ -0,0 +1,57 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import threading
+
+from .common import FileDownloader
+from ..downloader import get_suitable_downloader
+from ..extractor.niconico import NiconicoIE
+from ..utils import sanitized_Request
+
+
+class NiconicoDmcFD(FileDownloader):
+ """ Downloading niconico douga from DMC with heartbeat """
+
+ FD_NAME = 'niconico_dmc'
+
+ def real_download(self, filename, info_dict):
+ self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
+
+ ie = NiconicoIE(self.ydl)
+ info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
+
+ fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params)
+
+ success = download_complete = False
+ timer = [None]
+ heartbeat_lock = threading.Lock()
+ heartbeat_url = heartbeat_info_dict['url']
+ heartbeat_data = heartbeat_info_dict['data'].encode()
+ heartbeat_interval = heartbeat_info_dict.get('interval', 30)
+
+ request = sanitized_Request(heartbeat_url, heartbeat_data)
+
+ def heartbeat():
+ try:
+ self.ydl.urlopen(request).read()
+ except Exception:
+ self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
+
+ with heartbeat_lock:
+ if not download_complete:
+ timer[0] = threading.Timer(heartbeat_interval, heartbeat)
+ timer[0].start()
+
+ heartbeat_info_dict['ping']()
+ self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
+ try:
+ heartbeat()
+ if type(fd).__name__ == 'HlsFD':
+ info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
+ success = fd.real_download(filename, info_dict)
+ finally:
+ if heartbeat_lock:
+ with heartbeat_lock:
+ timer[0].cancel()
+ download_complete = True
+ return success
diff --git a/hypervideo_dl/downloader/rtmp.py b/hypervideo_dl/downloader/rtmp.py
index fbb7f51..6dca647 100644
--- a/hypervideo_dl/downloader/rtmp.py
+++ b/hypervideo_dl/downloader/rtmp.py
@@ -66,7 +66,7 @@ class RtmpFD(FileDownloader):
'eta': eta,
'elapsed': time_now - start,
'speed': speed,
- })
+ }, info_dict)
cursor_in_new_line = False
else:
# no percent for live streams
@@ -82,18 +82,20 @@ class RtmpFD(FileDownloader):
'status': 'downloading',
'elapsed': time_now - start,
'speed': speed,
- })
+ }, info_dict)
cursor_in_new_line = False
elif self.params.get('verbose', False):
if not cursor_in_new_line:
self.to_screen('')
cursor_in_new_line = True
self.to_screen('[rtmpdump] ' + line)
- finally:
+ if not cursor_in_new_line:
+ self.to_screen('')
+ return proc.wait()
+ except BaseException: # Including KeyboardInterrupt
+ proc.kill()
proc.wait()
- if not cursor_in_new_line:
- self.to_screen('')
- return proc.returncode
+ raise
url = info_dict['url']
player_url = info_dict.get('player_url')
@@ -115,7 +117,7 @@ class RtmpFD(FileDownloader):
# Check for rtmpdump first
if not check_executable('rtmpdump', ['-h']):
- self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.')
+ self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install')
return False
# Download using rtmpdump. rtmpdump returns exit code 2 when
@@ -206,7 +208,7 @@ class RtmpFD(FileDownloader):
'filename': filename,
'status': 'finished',
'elapsed': time.time() - started,
- })
+ }, info_dict)
return True
else:
self.to_stderr('\n')
diff --git a/hypervideo_dl/downloader/rtsp.py b/hypervideo_dl/downloader/rtsp.py
index 939358b..7815d59 100644
--- a/hypervideo_dl/downloader/rtsp.py
+++ b/hypervideo_dl/downloader/rtsp.py
@@ -24,7 +24,7 @@ class RtspFD(FileDownloader):
args = [
'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url]
else:
- self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
+ self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install one')
return False
self._debug_cmd(args)
@@ -39,7 +39,7 @@ class RtspFD(FileDownloader):
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
- })
+ }, info_dict)
return True
else:
self.to_stderr('\n')
diff --git a/hypervideo_dl/downloader/websocket.py b/hypervideo_dl/downloader/websocket.py
new file mode 100644
index 0000000..0882220
--- /dev/null
+++ b/hypervideo_dl/downloader/websocket.py
@@ -0,0 +1,59 @@
+import os
+import signal
+import asyncio
+import threading
+
+try:
+ import websockets
+ has_websockets = True
+except ImportError:
+ has_websockets = False
+
+from .common import FileDownloader
+from .external import FFmpegFD
+
+
+class FFmpegSinkFD(FileDownloader):
+ """ A sink to ffmpeg for downloading fragments in any form """
+
+ def real_download(self, filename, info_dict):
+ info_copy = info_dict.copy()
+ info_copy['url'] = '-'
+
+ async def call_conn(proc, stdin):
+ try:
+ await self.real_connection(stdin, info_dict)
+ except (BrokenPipeError, OSError):
+ pass
+ finally:
+ try:
+ stdin.flush()
+ stdin.close()
+ except OSError:
+ pass
+ os.kill(os.getpid(), signal.SIGINT)
+
+ class FFmpegStdinFD(FFmpegFD):
+ @classmethod
+ def get_basename(cls):
+ return FFmpegFD.get_basename()
+
+ def on_process_started(self, proc, stdin):
+ thread = threading.Thread(target=asyncio.run, daemon=True, args=(call_conn(proc, stdin), ))
+ thread.start()
+
+ return FFmpegStdinFD(self.ydl, self.params or {}).download(filename, info_copy)
+
+ async def real_connection(self, sink, info_dict):
+ """ Override this in subclasses """
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+
+class WebSocketFragmentFD(FFmpegSinkFD):
+ async def real_connection(self, sink, info_dict):
+ async with websockets.connect(info_dict['url'], extra_headers=info_dict.get('http_headers', {})) as ws:
+ while True:
+ recv = await ws.recv()
+ if isinstance(recv, str):
+ recv = recv.encode('utf8')
+ sink.write(recv)
diff --git a/hypervideo_dl/downloader/youtube_live_chat.py b/hypervideo_dl/downloader/youtube_live_chat.py
new file mode 100644
index 0000000..ef4205e
--- /dev/null
+++ b/hypervideo_dl/downloader/youtube_live_chat.py
@@ -0,0 +1,236 @@
+from __future__ import division, unicode_literals
+
+import json
+import time
+
+from .fragment import FragmentFD
+from ..compat import compat_urllib_error
+from ..utils import (
+ try_get,
+ dict_get,
+ int_or_none,
+ RegexNotFoundError,
+)
+from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
+
+
+class YoutubeLiveChatFD(FragmentFD):
+ """ Downloads YouTube live chats fragment by fragment """
+
+ FD_NAME = 'youtube_live_chat'
+
+ def real_download(self, filename, info_dict):
+ video_id = info_dict['video_id']
+ self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
+
+ fragment_retries = self.params.get('fragment_retries', 0)
+ test = self.params.get('test', False)
+
+ ctx = {
+ 'filename': filename,
+ 'live': True,
+ 'total_frags': None,
+ }
+
+ ie = YT_BaseIE(self.ydl)
+
+ start_time = int(time.time() * 1000)
+
+ def dl_fragment(url, data=None, headers=None):
+ http_headers = info_dict.get('http_headers', {})
+ if headers:
+ http_headers = http_headers.copy()
+ http_headers.update(headers)
+ return self._download_fragment(ctx, url, info_dict, http_headers, data)
+
+ def parse_actions_replay(live_chat_continuation):
+ offset = continuation_id = click_tracking_params = None
+ processed_fragment = bytearray()
+ for action in live_chat_continuation.get('actions', []):
+ if 'replayChatItemAction' in action:
+ replay_chat_item_action = action['replayChatItemAction']
+ offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
+ processed_fragment.extend(
+ json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
+ if offset is not None:
+ continuation = try_get(
+ live_chat_continuation,
+ lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict)
+ if continuation:
+ continuation_id = continuation.get('continuation')
+ click_tracking_params = continuation.get('clickTrackingParams')
+ self._append_fragment(ctx, processed_fragment)
+ return continuation_id, offset, click_tracking_params
+
+ def try_refresh_replay_beginning(live_chat_continuation):
+ # choose the second option that contains the unfiltered live chat replay
+ refresh_continuation = try_get(
+ live_chat_continuation,
+ lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
+ if refresh_continuation:
+ # no data yet but required to call _append_fragment
+ self._append_fragment(ctx, b'')
+ refresh_continuation_id = refresh_continuation.get('continuation')
+ offset = 0
+ click_tracking_params = refresh_continuation.get('trackingParams')
+ return refresh_continuation_id, offset, click_tracking_params
+ return parse_actions_replay(live_chat_continuation)
+
+ live_offset = 0
+
+ def parse_actions_live(live_chat_continuation):
+ nonlocal live_offset
+ continuation_id = click_tracking_params = None
+ processed_fragment = bytearray()
+ for action in live_chat_continuation.get('actions', []):
+ timestamp = self.parse_live_timestamp(action)
+ if timestamp is not None:
+ live_offset = timestamp - start_time
+ # compatibility with replay format
+ pseudo_action = {
+ 'replayChatItemAction': {'actions': [action]},
+ 'videoOffsetTimeMsec': str(live_offset),
+ 'isLive': True,
+ }
+ processed_fragment.extend(
+ json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n')
+ continuation_data_getters = [
+ lambda x: x['continuations'][0]['invalidationContinuationData'],
+ lambda x: x['continuations'][0]['timedContinuationData'],
+ ]
+ continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
+ if continuation_data:
+ continuation_id = continuation_data.get('continuation')
+ click_tracking_params = continuation_data.get('clickTrackingParams')
+ timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
+ if timeout_ms is not None:
+ time.sleep(timeout_ms / 1000)
+ self._append_fragment(ctx, processed_fragment)
+ return continuation_id, live_offset, click_tracking_params
+
+ def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
+ count = 0
+ while count <= fragment_retries:
+ try:
+ success, raw_fragment = dl_fragment(url, request_data, headers)
+ if not success:
+ return False, None, None, None
+ try:
+ data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
+ except RegexNotFoundError:
+ data = None
+ if not data:
+ data = json.loads(raw_fragment)
+ live_chat_continuation = try_get(
+ data,
+ lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
+ if info_dict['protocol'] == 'youtube_live_chat_replay':
+ if frag_index == 1:
+ continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation)
+ else:
+ continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation)
+ elif info_dict['protocol'] == 'youtube_live_chat':
+ continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)
+ return True, continuation_id, offset, click_tracking_params
+ except compat_urllib_error.HTTPError as err:
+ count += 1
+ if count <= fragment_retries:
+ self.report_retry_fragment(err, frag_index, count, fragment_retries)
+ if count > fragment_retries:
+ self.report_error('giving up after %s fragment retries' % fragment_retries)
+ return False, None, None, None
+
+ self._prepare_and_start_frag_download(ctx, info_dict)
+
+ success, raw_fragment = dl_fragment(info_dict['url'])
+ if not success:
+ return False
+ try:
+ data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
+ except RegexNotFoundError:
+ return False
+ continuation_id = try_get(
+ data,
+ lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
+ # no data yet but required to call _append_fragment
+ self._append_fragment(ctx, b'')
+
+ ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
+
+ if not ytcfg:
+ return False
+ api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
+ innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
+ if not api_key or not innertube_context:
+ return False
+ visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
+ if info_dict['protocol'] == 'youtube_live_chat_replay':
+ url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
+ chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
+ elif info_dict['protocol'] == 'youtube_live_chat':
+ url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
+ chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id
+
+ frag_index = offset = 0
+ click_tracking_params = None
+ while continuation_id is not None:
+ frag_index += 1
+ request_data = {
+ 'context': innertube_context,
+ 'continuation': continuation_id,
+ }
+ if frag_index > 1:
+ request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
+ if click_tracking_params:
+ request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
+ headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
+ headers.update({'content-type': 'application/json'})
+ fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
+ success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
+ url, frag_index, fragment_request_data, headers)
+ else:
+ success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
+ chat_page_url, frag_index)
+ if not success:
+ return False
+ if test:
+ break
+
+ self._finish_frag_download(ctx, info_dict)
+ return True
+
+ @staticmethod
+ def parse_live_timestamp(action):
+ action_content = dict_get(
+ action,
+ ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
+ if not isinstance(action_content, dict):
+ return None
+ item = dict_get(action_content, ['item', 'bannerRenderer'])
+ if not isinstance(item, dict):
+ return None
+ renderer = dict_get(item, [
+ # text
+ 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
+ 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
+ # ticker
+ 'liveChatTickerPaidMessageItemRenderer',
+ 'liveChatTickerSponsorItemRenderer',
+ # banner
+ 'liveChatBannerRenderer',
+ ])
+ if not isinstance(renderer, dict):
+ return None
+ parent_item_getters = [
+ lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
+ lambda x: x['contents'],
+ ]
+ parent_item = try_get(renderer, parent_item_getters, dict)
+ if parent_item:
+ renderer = dict_get(parent_item, [
+ 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
+ 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
+ ])
+ if not isinstance(renderer, dict):
+ return None
+ return int_or_none(renderer.get('timestampUsec'), 1000)