diff options
| author | Felix S <felix.von.s@posteo.de> | 2021-10-02 18:43:42 +0000 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-10-03 00:13:42 +0530 | 
| commit | 9359f3d4f02856128f5626e754c7f64e2232b02f (patch) | |
| tree | 601dd118dfa2c5b8226086bf5f303656986dc735 | |
| parent | 0eaec13ba6abe18d6ddf35f2ebffdcaf3937e485 (diff) | |
| download | hypervideo-pre-9359f3d4f02856128f5626e754c7f64e2232b02f.tar.lz hypervideo-pre-9359f3d4f02856128f5626e754c7f64e2232b02f.tar.xz hypervideo-pre-9359f3d4f02856128f5626e754c7f64e2232b02f.zip | |
[extractor] Extract storyboards from SMIL manifests (#1128)
Authored by: fstirlitz
| -rw-r--r-- | yt_dlp/YoutubeDL.py | 6 | ||||
| -rw-r--r-- | yt_dlp/extractor/common.py | 23 | ||||
| -rw-r--r-- | yt_dlp/utils.py | 39 | 
3 files changed, 56 insertions, 12 deletions
| diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index c42a29ee3..9c4dd3ec5 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3029,9 +3029,7 @@ class YoutubeDL(object):      @staticmethod      def format_resolution(format, default='unknown'): -        if format.get('vcodec') == 'none': -            if format.get('acodec') == 'none': -                return 'images' +        if format.get('vcodec') == 'none' and format.get('acodec') != 'none':              return 'audio only'          if format.get('resolution') is not None:              return format['resolution'] @@ -3043,6 +3041,8 @@ class YoutubeDL(object):              res = '%dx?' % format['width']          else:              res = default +        if format.get('vcodec') == 'none' and format.get('acodec') == 'none': +            res += ' (images)'          return res      def _format_note(self, fdict): diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 5da29dc63..f65a098d7 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2346,14 +2346,15 @@ class InfoExtractor(object):          rtmp_count = 0          http_count = 0          m3u8_count = 0 +        imgs_count = 0 -        srcs = [] +        srcs = set()          media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))          for medium in media:              src = medium.get('src')              if not src or src in srcs:                  continue -            srcs.append(src) +            srcs.add(src)              bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000)              filesize = int_or_none(medium.get('size') or medium.get('fileSize')) @@ -2427,6 +2428,24 @@ class InfoExtractor(object):                      'height': height,                  }) +        for medium in smil.findall(self._xpath_ns('.//imagestream', namespace)): +            src = medium.get('src') +            if not src or src in srcs: +                continue +            srcs.add(src) + +            imgs_count += 1 +            formats.append({ +                'format_id': 'imagestream-%d' % (imgs_count), +                'url': src, +                'ext': mimetype2ext(medium.get('type')), +                'acodec': 'none', +                'vcodec': 'none', +                'width': int_or_none(medium.get('width')), +                'height': int_or_none(medium.get('height')), +                'format_note': 'SMIL storyboards', +            }) +          return formats      def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 1bc0ac767..7a77edf4c 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4546,20 +4546,24 @@ def mimetype2ext(mt):      if mt is None:          return None -    ext = { +    mt, _, params = mt.partition(';') +    mt = mt.strip() + +    FULL_MAP = {          'audio/mp4': 'm4a',          # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as          # it's the most popular one          'audio/mpeg': 'mp3',          'audio/x-wav': 'wav', -    }.get(mt) +        'audio/wav': 'wav', +        'audio/wave': 'wav', +    } + +    ext = FULL_MAP.get(mt)      if ext is not None:          return ext -    _, _, res = mt.rpartition('/') -    res = res.split(';')[0].strip().lower() - -    return { +    SUBTYPE_MAP = {          '3gpp': '3gp',          'smptett+xml': 'tt',          'ttaf+xml': 'dfxp', @@ -4578,7 +4582,28 @@ def mimetype2ext(mt):          'quicktime': 'mov',          'mp2t': 'ts',          'x-wav': 'wav', -    }.get(res, res) +        'filmstrip+json': 'fs', +        'svg+xml': 'svg', +    } + +    _, _, subtype = mt.rpartition('/') +    ext = SUBTYPE_MAP.get(subtype.lower()) +    if ext is not None: +        return ext + +    SUFFIX_MAP = { +        'json': 'json', +        'xml': 'xml', +        'zip': 'zip', +        'gzip': 'gz', +    } + +    _, _, suffix = subtype.partition('+') +    ext = SUFFIX_MAP.get(suffix) +    if ext is not None: +        return ext + +    return subtype.replace('+', '.')  def parse_codecs(codecs_str): | 
