aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/canvas.py
diff options
context:
space:
mode:
authorpukkandan <pukkandan.ytdlp@gmail.com>2021-04-28 19:02:43 +0530
committerGitHub <noreply@github.com>2021-04-28 19:02:43 +0530
commitbe6202f12b97858b9d716e608394b51065d0419f (patch)
tree71f920777c24d9d81c990f0bf57d66e9d5bbaff7 /yt_dlp/extractor/canvas.py
parentdb9a564b6a5c31472f8298969584eead0b59fa1c (diff)
parente8f834cd8dfc07011d1080321e42bc130e7201bb (diff)
downloadhypervideo-pre-be6202f12b97858b9d716e608394b51065d0419f.tar.lz
hypervideo-pre-be6202f12b97858b9d716e608394b51065d0419f.tar.xz
hypervideo-pre-be6202f12b97858b9d716e608394b51065d0419f.zip
Subtitle extraction from streaming media manifests #247
Authored by fstirlitz Modified from: https://github.com/ytdl-org/youtube-dl/pull/6144 Closes: #73 Fixes: https://github.com/ytdl-org/youtube-dl/issues/6106 https://github.com/ytdl-org/youtube-dl/issues/14977 https://github.com/ytdl-org/youtube-dl/issues/21438 https://github.com/ytdl-org/youtube-dl/issues/23609 https://github.com/ytdl-org/youtube-dl/issues/28132 Might also fix (untested): https://github.com/ytdl-org/youtube-dl/issues/15424 https://github.com/ytdl-org/youtube-dl/issues/18267 https://github.com/ytdl-org/youtube-dl/issues/23899 https://github.com/ytdl-org/youtube-dl/issues/24375 https://github.com/ytdl-org/youtube-dl/issues/24595 https://github.com/ytdl-org/youtube-dl/issues/27899 Related: https://github.com/ytdl-org/youtube-dl/issues/22379 https://github.com/ytdl-org/youtube-dl/pull/24517 https://github.com/ytdl-org/youtube-dl/pull/24886 https://github.com/ytdl-org/youtube-dl/pull/27215 Notes: * The functions `extractor.common._extract_..._formats` are still kept for compatibility * Only some extractors have currently been moved to using `_extract_..._formats_and_subtitles` * Direct subtitle manifests (without a master) are not supported and are wrongly identified as containing video formats * AES support is untested * The fragmented TTML subtitles extracted from DASH/ISM are valid, but are unsupported by `ffmpeg` and most video players * Their XML fragments can be dumped using `ffmpeg -i in.mp4 -f data -map 0 -c copy out.ttml`. Once the unnecessary headers are stripped out of this, it becomes a valid self-contained ttml file * The ttml subs downloaded from DASH manifests can also be directly opened with <https://github.com/SubtitleEdit> * Fragmented WebVTT files extracted from DASH/ISM are also unsupported by most tools * Unlike the ttml files, the XML fragments of these cannot be dumped using `ffmpeg` * The webtt subs extracted from DASH can be parsed by <https://github.com/gpac/gpac> * But validity of the those extracted from ISM are untested
Diffstat (limited to 'yt_dlp/extractor/canvas.py')
-rw-r--r--yt_dlp/extractor/canvas.py20
1 files changed, 13 insertions, 7 deletions
diff --git a/yt_dlp/extractor/canvas.py b/yt_dlp/extractor/canvas.py
index eefbab241..1b7c1d2ff 100644
--- a/yt_dlp/extractor/canvas.py
+++ b/yt_dlp/extractor/canvas.py
@@ -83,24 +83,31 @@ class CanvasIE(InfoExtractor):
description = data.get('description')
formats = []
+ subtitles = {}
for target in data['targetUrls']:
format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type'))
if not format_url or not format_type:
continue
format_type = format_type.upper()
if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
- formats.extend(self._extract_m3u8_formats(
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
- m3u8_id=format_type, fatal=False))
+ m3u8_id=format_type, fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
elif format_type == 'HDS':
formats.extend(self._extract_f4m_formats(
format_url, video_id, f4m_id=format_type, fatal=False))
elif format_type == 'MPEG_DASH':
- formats.extend(self._extract_mpd_formats(
- format_url, video_id, mpd_id=format_type, fatal=False))
+ fmts, subs = self._extract_mpd_formats_and_subtitles(
+ format_url, video_id, mpd_id=format_type, fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
elif format_type == 'HSS':
- formats.extend(self._extract_ism_formats(
- format_url, video_id, ism_id='mss', fatal=False))
+ fmts, subs = self._extract_ism_formats_and_subtitles(
+ format_url, video_id, ism_id='mss', fatal=False)
+ formats.extend(fmts)
+ subtitles = self._merge_subtitles(subtitles, subs)
else:
formats.append({
'format_id': format_type,
@@ -108,7 +115,6 @@ class CanvasIE(InfoExtractor):
})
self._sort_formats(formats)
- subtitles = {}
subtitle_urls = data.get('subtitleUrls')
if isinstance(subtitle_urls, list):
for subtitle in subtitle_urls: