aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFelix S <felix.von.s@posteo.de>2021-10-02 18:43:42 +0000
committerGitHub <noreply@github.com>2021-10-03 00:13:42 +0530
commit9359f3d4f02856128f5626e754c7f64e2232b02f (patch)
tree601dd118dfa2c5b8226086bf5f303656986dc735
parent0eaec13ba6abe18d6ddf35f2ebffdcaf3937e485 (diff)
downloadhypervideo-pre-9359f3d4f02856128f5626e754c7f64e2232b02f.tar.lz
hypervideo-pre-9359f3d4f02856128f5626e754c7f64e2232b02f.tar.xz
hypervideo-pre-9359f3d4f02856128f5626e754c7f64e2232b02f.zip
[extractor] Extract storyboards from SMIL manifests (#1128)
Authored by: fstirlitz
-rw-r--r--yt_dlp/YoutubeDL.py6
-rw-r--r--yt_dlp/extractor/common.py23
-rw-r--r--yt_dlp/utils.py39
3 files changed, 56 insertions, 12 deletions
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index c42a29ee3..9c4dd3ec5 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -3029,9 +3029,7 @@ class YoutubeDL(object):
@staticmethod
def format_resolution(format, default='unknown'):
- if format.get('vcodec') == 'none':
- if format.get('acodec') == 'none':
- return 'images'
+ if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
return 'audio only'
if format.get('resolution') is not None:
return format['resolution']
@@ -3043,6 +3041,8 @@ class YoutubeDL(object):
res = '%dx?' % format['width']
else:
res = default
+ if format.get('vcodec') == 'none' and format.get('acodec') == 'none':
+ res += ' (images)'
return res
def _format_note(self, fdict):
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 5da29dc63..f65a098d7 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -2346,14 +2346,15 @@ class InfoExtractor(object):
rtmp_count = 0
http_count = 0
m3u8_count = 0
+ imgs_count = 0
- srcs = []
+ srcs = set()
media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
for medium in media:
src = medium.get('src')
if not src or src in srcs:
continue
- srcs.append(src)
+ srcs.add(src)
bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000)
filesize = int_or_none(medium.get('size') or medium.get('fileSize'))
@@ -2427,6 +2428,24 @@ class InfoExtractor(object):
'height': height,
})
+ for medium in smil.findall(self._xpath_ns('.//imagestream', namespace)):
+ src = medium.get('src')
+ if not src or src in srcs:
+ continue
+ srcs.add(src)
+
+ imgs_count += 1
+ formats.append({
+ 'format_id': 'imagestream-%d' % (imgs_count),
+ 'url': src,
+ 'ext': mimetype2ext(medium.get('type')),
+ 'acodec': 'none',
+ 'vcodec': 'none',
+ 'width': int_or_none(medium.get('width')),
+ 'height': int_or_none(medium.get('height')),
+ 'format_note': 'SMIL storyboards',
+ })
+
return formats
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 1bc0ac767..7a77edf4c 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -4546,20 +4546,24 @@ def mimetype2ext(mt):
if mt is None:
return None
- ext = {
+ mt, _, params = mt.partition(';')
+ mt = mt.strip()
+
+ FULL_MAP = {
'audio/mp4': 'm4a',
# Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
# it's the most popular one
'audio/mpeg': 'mp3',
'audio/x-wav': 'wav',
- }.get(mt)
+ 'audio/wav': 'wav',
+ 'audio/wave': 'wav',
+ }
+
+ ext = FULL_MAP.get(mt)
if ext is not None:
return ext
- _, _, res = mt.rpartition('/')
- res = res.split(';')[0].strip().lower()
-
- return {
+ SUBTYPE_MAP = {
'3gpp': '3gp',
'smptett+xml': 'tt',
'ttaf+xml': 'dfxp',
@@ -4578,7 +4582,28 @@ def mimetype2ext(mt):
'quicktime': 'mov',
'mp2t': 'ts',
'x-wav': 'wav',
- }.get(res, res)
+ 'filmstrip+json': 'fs',
+ 'svg+xml': 'svg',
+ }
+
+ _, _, subtype = mt.rpartition('/')
+ ext = SUBTYPE_MAP.get(subtype.lower())
+ if ext is not None:
+ return ext
+
+ SUFFIX_MAP = {
+ 'json': 'json',
+ 'xml': 'xml',
+ 'zip': 'zip',
+ 'gzip': 'gz',
+ }
+
+ _, _, suffix = subtype.partition('+')
+ ext = SUFFIX_MAP.get(suffix)
+ if ext is not None:
+ return ext
+
+ return subtype.replace('+', '.')
def parse_codecs(codecs_str):