13 files changed, 713 insertions, 155 deletions
diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py
index 090c7b47b..bbe998993 100644
--- a/test/test_postprocessors.py
+++ b/test/test_postprocessors.py
@@ -124,11 +124,11 @@ class TestModifyChaptersPP(unittest.TestCase):
         chapters = self._chapters([70], ['c']) + [
             self._sponsor_chapter(10, 20, 'sponsor'),
             self._sponsor_chapter(30, 40, 'preview'),
-            self._sponsor_chapter(50, 60, 'sponsor')]
+            self._sponsor_chapter(50, 60, 'filler')]
         expected = self._chapters(
             [10, 20, 30, 40, 50, 60, 70],
             ['c', '[SponsorBlock]: Sponsor', 'c', '[SponsorBlock]: Preview/Recap',
-             'c', '[SponsorBlock]: Sponsor', 'c'])
+             'c', '[SponsorBlock]: Filler Tangent', 'c'])
         self._remove_marked_arrange_sponsors_test_impl(chapters, expected, [])
 
     def test_remove_marked_arrange_sponsors_UniqueNamesForOverlappingSponsors(self):
diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py
index d90fcb13a..467fe4875 100644
--- a/yt_dlp/extractor/archiveorg.py
+++ b/yt_dlp/extractor/archiveorg.py
@@ -3,33 +3,36 @@ from __future__ import unicode_literals
 
 import re
 import json
-
 from .common import InfoExtractor
-from .youtube import YoutubeIE
+from .youtube import YoutubeIE, YoutubeBaseInfoExtractor
 from ..compat import (
     compat_urllib_parse_unquote,
     compat_urllib_parse_unquote_plus,
     compat_HTTPError
 )
 from ..utils import (
+    bug_reports_message,
     clean_html,
-    determine_ext,
     dict_get,
     extract_attributes,
     ExtractorError,
+    get_element_by_id,
     HEADRequest,
     int_or_none,
     KNOWN_EXTENSIONS,
     merge_dicts,
     mimetype2ext,
+    orderedSet,
     parse_duration,
     parse_qs,
-    RegexNotFoundError,
     str_to_int,
     str_or_none,
+    traverse_obj,
     try_get,
     unified_strdate,
     unified_timestamp,
+    urlhandle_detect_ext,
+    url_or_none
 )
 
 
@@ -262,12 +265,12 @@ class YoutubeWebArchiveIE(InfoExtractor):
     _VALID_URL = r"""(?x)^
                 (?:https?://)?web\.archive\.org/
                     (?:web/)?
-                    (?:[0-9A-Za-z_*]+/)?  # /web and the version index is optional
+                    (?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)?  # /web and the version index is optional
 
                 (?:https?(?::|%3[Aa])//)?
                 (?:
-                    (?:\w+\.)?youtube\.com/watch(?:\?|%3[fF])(?:[^\#]+(?:&|%26))?v(?:=|%3[dD])  # Youtube URL
-                    |(wayback-fakeurl\.archive\.org/yt/)  # Or the internal fake url
+                    (?:\w+\.)?youtube\.com(?::(?:80|443))?/watch(?:\.php)?(?:\?|%3[fF])(?:[^\#]+(?:&|%26))?v(?:=|%3[dD])  # Youtube URL
+                    |(?:wayback-fakeurl\.archive\.org/yt/)  # Or the internal fake url
                 )
                 (?P<id>[0-9A-Za-z_-]{11})(?:%26|\#|&|$)
                 """
@@ -278,141 +281,391 @@ class YoutubeWebArchiveIE(InfoExtractor):
             'info_dict': {
                 'id': 'aYAGB11YrSs',
                 'ext': 'webm',
-                'title': 'Team Fortress 2 - Sandviches!'
+                'title': 'Team Fortress 2 - Sandviches!',
+                'description': 'md5:4984c0f9a07f349fc5d8e82ab7af4eaf',
+                'upload_date': '20110926',
+                'uploader': 'Zeurel',
+                'channel_id': 'UCukCyHaD-bK3in_pKpfH9Eg',
+                'duration': 32,
+                'uploader_id': 'Zeurel',
+                'uploader_url': 'http://www.youtube.com/user/Zeurel'
             }
-        },
-        {
+        }, {
             # Internal link
             'url': 'https://web.archive.org/web/2oe/http://wayback-fakeurl.archive.org/yt/97t7Xj_iBv0',
             'info_dict': {
                 'id': '97t7Xj_iBv0',
                 'ext': 'mp4',
-                'title': 'How Flexible Machines Could Save The World'
+                'title': 'Why Machines That Bend Are Better',
+                'description': 'md5:00404df2c632d16a674ff8df1ecfbb6c',
+                'upload_date': '20190312',
+                'uploader': 'Veritasium',
+                'channel_id': 'UCHnyfMqiRRG1u-2MsSQLbXA',
+                'duration': 771,
+                'uploader_id': '1veritasium',
+                'uploader_url': 'http://www.youtube.com/user/1veritasium'
             }
-        },
-        {
-            # Video from 2012, webm format itag 45.
+        }, {
+            # Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description.
+            # Should use the date in the link. Title ends with '- Youtube'. Capture has description in eow-description
             'url': 'https://web.archive.org/web/20120712231619/http://www.youtube.com/watch?v=AkhihxRKcrs&gl=US&hl=en',
             'info_dict': {
                 'id': 'AkhihxRKcrs',
                 'ext': 'webm',
-                'title': 'Limited Run: Mondo\'s Modern Classic 1 of 3 (SDCC 2012)'
+                'title': 'Limited Run: Mondo\'s Modern Classic 1 of 3 (SDCC 2012)',
+                'upload_date': '20120712',
+                'duration': 398,
+                'description': 'md5:ff4de6a7980cb65d951c2f6966a4f2f3',
+                'uploader_id': 'machinima',
+                'uploader_url': 'http://www.youtube.com/user/machinima'
             }
-        },
-        {
-            # Old flash-only video. Webpage title starts with "YouTube - ".
+        }, {
+            # FLV video. Video file URL does not provide itag information
             'url': 'https://web.archive.org/web/20081211103536/http://www.youtube.com/watch?v=jNQXAC9IVRw',
             'info_dict': {
                 'id': 'jNQXAC9IVRw',
-                'ext': 'unknown_video',
-                'title': 'Me at the zoo'
+                'ext': 'flv',
+                'title': 'Me at the zoo',
+                'upload_date': '20050423',
+                'channel_id': 'UC4QobU6STFB0P71PMvOGN5A',
+                'duration': 19,
+                'description': 'md5:10436b12e07ac43ff8df65287a56efb4',
+                'uploader_id': 'jawed',
+                'uploader_url': 'http://www.youtube.com/user/jawed'
             }
-        },
-        {
-            # Flash video with .flv extension (itag 34). Title has prefix "YouTube         -"
-            # Title has some weird unicode characters too.
+        }, {
             'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA',
             'info_dict': {
                 'id': 'lTx3G6h2xyA',
                 'ext': 'flv',
-                'title': '‪Madeon - Pop Culture (live mashup)‬‏'
+                'title': 'Madeon - Pop Culture (live mashup)',
+                'upload_date': '20110711',
+                'uploader': 'Madeon',
+                'channel_id': 'UCqMDNf3Pn5L7pcNkuSEeO3w',
+                'duration': 204,
+                'description': 'md5:f7535343b6eda34a314eff8b85444680',
+                'uploader_id': 'itsmadeon',
+                'uploader_url': 'http://www.youtube.com/user/itsmadeon'
             }
-        },
-        {   # Some versions of Youtube have have "YouTube" as page title in html (and later rewritten by js).
-            'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
+        }, {
+            # First capture is of dead video, second is the oldest from CDX response.
+            'url': 'https://web.archive.org/https://www.youtube.com/watch?v=1JYutPM8O6E',
             'info_dict': {
-                'id': 'kH-G_aIBlFw',
+                'id': '1JYutPM8O6E',
                 'ext': 'mp4',
-                'title': 'kH-G_aIBlFw'
-            },
-            'expected_warnings': [
-                'unable to extract title',
-            ]
-        },
-        {
-            # First capture is a 302 redirect intermediary page.
-            'url': 'https://web.archive.org/web/20050214000000/http://www.youtube.com/watch?v=0altSZ96U4M',
+                'title': 'Fake Teen Doctor Strikes AGAIN! - Weekly Weird News',
+                'upload_date': '20160218',
+                'channel_id': 'UCdIaNUarhzLSXGoItz7BHVA',
+                'duration': 1236,
+                'description': 'md5:21032bae736421e89c2edf36d1936947',
+                'uploader_id': 'MachinimaETC',
+                'uploader_url': 'http://www.youtube.com/user/MachinimaETC'
+            }
+        }, {
+            # First capture of dead video, capture date in link links to dead capture.
+            'url': 'https://web.archive.org/web/20180803221945/https://www.youtube.com/watch?v=6FPhZJGvf4E',
             'info_dict': {
-                'id': '0altSZ96U4M',
+                'id': '6FPhZJGvf4E',
                 'ext': 'mp4',
-                'title': '0altSZ96U4M'
+                'title': 'WTF: Video Games Still Launch BROKEN?! - T.U.G.S.',
+                'upload_date': '20160219',
+                'channel_id': 'UCdIaNUarhzLSXGoItz7BHVA',
+                'duration': 798,
+                'description': 'md5:a1dbf12d9a3bd7cb4c5e33b27d77ffe7',
+                'uploader_id': 'MachinimaETC',
+                'uploader_url': 'http://www.youtube.com/user/MachinimaETC'
             },
             'expected_warnings': [
-                'unable to extract title',
+                r'unable to download capture webpage \(it may not be archived\)'
             ]
-        },
-        {
+        }, {   # Very old YouTube page, has - YouTube in title.
+            'url': 'http://web.archive.org/web/20070302011044/http://youtube.com/watch?v=-06-KB9XTzg',
+            'info_dict': {
+                'id': '-06-KB9XTzg',
+                'ext': 'flv',
+                'title': 'New Coin Hack!! 100% Safe!!'
+            }
+        }, {
+            'url': 'web.archive.org/https://www.youtube.com/watch?v=dWW7qP423y8',
+            'info_dict': {
+                'id': 'dWW7qP423y8',
+                'ext': 'mp4',
+                'title': 'It\'s Bootleg AirPods Time.',
+                'upload_date': '20211021',
+                'channel_id': 'UC7Jwj9fkrf1adN4fMmTkpug',
+                'channel_url': 'http://www.youtube.com/channel/UC7Jwj9fkrf1adN4fMmTkpug',
+                'duration': 810,
+                'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc',
+                'uploader': 'DankPods',
+                'uploader_id': 'UC7Jwj9fkrf1adN4fMmTkpug',
+                'uploader_url': 'http://www.youtube.com/channel/UC7Jwj9fkrf1adN4fMmTkpug'
+            }
+        }, {
+            # player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093
+            'url': 'https://web.archive.org/web/20200827003909if_/http://www.youtube.com/watch?v=6Dh-RL__uN4',
+            'info_dict': {
+                'id': '6Dh-RL__uN4',
+                'ext': 'mp4',
+                'title': 'bitch lasagna',
+                'upload_date': '20181005',
+                'channel_id': 'UC-lHJZR3Gqxm24_Vd_AJ5Yw',
+                'channel_url': 'http://www.youtube.com/channel/UC-lHJZR3Gqxm24_Vd_AJ5Yw',
+                'duration': 135,
+                'description': 'md5:2dbe4051feeff2dab5f41f82bb6d11d0',
+                'uploader': 'PewDiePie',
+                'uploader_id': 'PewDiePie',
+                'uploader_url': 'http://www.youtube.com/user/PewDiePie'
+            }
+        }, {
+            'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw',
+            'only_matching': True
+        }, {
+            'url': 'https://web.archive.org/web/20050214000000_if/http://www.youtube.com/watch?v=0altSZ96U4M',
+            'only_matching': True
+        }, {
             # Video not archived, only capture is unavailable video page
             'url': 'https://web.archive.org/web/20210530071008/https://www.youtube.com/watch?v=lHJTf93HL1s&spfreload=10',
-            'only_matching': True,
-        },
-        {   # Encoded url
+            'only_matching': True
+        }, {   # Encoded url
             'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fgl%3DUS%26v%3DAkhihxRKcrs%26hl%3Den',
-            'only_matching': True,
-        },
-        {
+            'only_matching': True
+        }, {
             'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fv%3DAkhihxRKcrs%26gl%3DUS%26hl%3Den',
-            'only_matching': True,
+            'only_matching': True
+        }, {
+            'url': 'https://web.archive.org/web/20060527081937/http://www.youtube.com:80/watch.php?v=ELTFsLT73fA&amp;search=soccer',
+            'only_matching': True
+        }, {
+            'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg',
+            'only_matching': True
         }
     ]
+    _YT_INITIAL_DATA_RE = r'(?:(?:(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE
+    _YT_INITIAL_PLAYER_RESPONSE_RE = r'(?:(?:(?:window\s*\[\s*["\']ytInitialPlayerResponse["\']\s*\]|ytInitialPlayerResponse)\s*=[(\s]*({.+?})[)\s]*;)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_PLAYER_RESPONSE_RE
+    _YT_INITIAL_BOUNDARY_RE = r'(?:(?:var\s+meta|</script|\n)|%s)' % YoutubeBaseInfoExtractor._YT_INITIAL_BOUNDARY_RE
+
+    _YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com']  # thumbnails most likely archived on these servers
+    _YT_ALL_THUMB_SERVERS = orderedSet(
+        _YT_DEFAULT_THUMB_SERVERS + ['img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(0, 5), 9)]])
+
+    _WAYBACK_BASE_URL = 'https://web.archive.org/web/%sif_/'
+    _OLDEST_CAPTURE_DATE = 20050214000000
+    _NEWEST_CAPTURE_DATE = 20500101000000
+
+    def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note='Downloading CDX API JSON'):
+        # CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md
+        query = {
+            'url': url,
+            'output': 'json',
+            'fl': 'original,mimetype,length,timestamp',
+            'limit': 500,
+            'filter': ['statuscode:200'] + (filters or []),
+            'collapse': collapse or [],
+            **(query or {})
+        }
+        res = self._download_json('https://web.archive.org/cdx/search/cdx', item_id, note, query=query)
+        if isinstance(res, list) and len(res) >= 2:
+            # format response to make it easier to use
+            return list(dict(zip(res[0], v)) for v in res[1:])
+        elif not isinstance(res, list) or len(res) != 0:
+            self.report_warning('Error while parsing CDX API response' + bug_reports_message())
+
+    def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
+        return self._parse_json(self._search_regex(
+            (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
+             regex), webpage, name, default='{}'), video_id, fatal=False)
+
+    def _extract_webpage_title(self, webpage):
+        page_title = self._html_search_regex(
+            r'<title>([^<]*)</title>', webpage, 'title', default='')
+        # YouTube video pages appear to always have either 'YouTube -' as prefix or '- YouTube' as suffix.
+        return self._html_search_regex(
+            r'(?:YouTube\s*-\s*(.*)$)|(?:(.*)\s*-\s*YouTube$)',
+            page_title, 'title', default='')
+
+    def _extract_metadata(self, video_id, webpage):
+
+        search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None)) if webpage else (lambda x: None))
+        player_response = self._extract_yt_initial_variable(
+            webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE, video_id, 'initial player response') or {}
+        initial_data = self._extract_yt_initial_variable(
+            webpage, self._YT_INITIAL_DATA_RE, video_id, 'initial player response') or {}
+
+        initial_data_video = traverse_obj(
+            initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'videoPrimaryInfoRenderer'),
+            expected_type=dict, get_all=False, default={})
+
+        video_details = traverse_obj(
+            player_response, 'videoDetails', expected_type=dict, get_all=False, default={})
+
+        microformats = traverse_obj(
+            player_response, ('microformat', 'playerMicroformatRenderer'), expected_type=dict, get_all=False, default={})
+
+        video_title = (
+            video_details.get('title')
+            or YoutubeBaseInfoExtractor._get_text(microformats, 'title')
+            or YoutubeBaseInfoExtractor._get_text(initial_data_video, 'title')
+            or self._extract_webpage_title(webpage)
+            or search_meta(['og:title', 'twitter:title', 'title']))
+
+        channel_id = str_or_none(
+            video_details.get('channelId')
+            or microformats.get('externalChannelId')
+            or search_meta('channelId')
+            or self._search_regex(
+                r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',  # @b45a9e6
+                webpage, 'channel id', default=None, group='id'))
+        channel_url = f'http://www.youtube.com/channel/{channel_id}' if channel_id else None
+
+        duration = int_or_none(
+            video_details.get('lengthSeconds')
+            or microformats.get('lengthSeconds')
+            or parse_duration(search_meta('duration')))
+        description = (
+            video_details.get('shortDescription')
+            or YoutubeBaseInfoExtractor._get_text(microformats, 'description')
+            or clean_html(get_element_by_id('eow-description', webpage))  # @9e6dd23
+            or search_meta(['description', 'og:description', 'twitter:description']))
+
+        uploader = video_details.get('author')
+
+        # Uploader ID and URL
+        uploader_mobj = re.search(
+            r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',  # @fd05024
+            webpage)
+        if uploader_mobj is not None:
+            uploader_id, uploader_url = uploader_mobj.group('uploader_id'), uploader_mobj.group('uploader_url')
+        else:
+            # @a6211d2
+            uploader_url = url_or_none(microformats.get('ownerProfileUrl'))
+            uploader_id = self._search_regex(
+                r'(?:user|channel)/([^/]+)', uploader_url or '', 'uploader id', default=None)
+
+        upload_date = unified_strdate(
+            dict_get(microformats, ('uploadDate', 'publishDate'))
+            or search_meta(['uploadDate', 'datePublished'])
+            or self._search_regex(
+                [r'(?s)id="eow-date.*?>(.*?)</span>',
+                 r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],  # @7998520
+                webpage, 'upload date', default=None))
+
+        return {
+            'title': video_title,
+            'description': description,
+            'upload_date': upload_date,
+            'uploader': uploader,
+            'channel_id': channel_id,
+            'channel_url': channel_url,
+            'duration': duration,
+            'uploader_url': uploader_url,
+            'uploader_id': uploader_id,
+        }
+
+    def _extract_thumbnails(self, video_id):
+        try_all = 'thumbnails' in self._configuration_arg('check_all')
+        thumbnail_base_urls = ['http://{server}/vi{webp}/{video_id}'.format(
+            webp='_webp' if ext == 'webp' else '', video_id=video_id, server=server)
+            for server in (self._YT_ALL_THUMB_SERVERS if try_all else self._YT_DEFAULT_THUMB_SERVERS) for ext in (('jpg', 'webp') if try_all else ('jpg',))]
+
+        thumbnails = []
+        for url in thumbnail_base_urls:
+            response = self._call_cdx_api(
+                video_id, url, filters=['mimetype:image/(?:webp|jpeg)'],
+                collapse=['urlkey'], query={'matchType': 'prefix'})
+            if not response:
+                continue
+            thumbnails.extend(
+                {
+                    'url': (self._WAYBACK_BASE_URL % (int_or_none(thumbnail_dict.get('timestamp')) or self._OLDEST_CAPTURE_DATE)) + thumbnail_dict.get('original'),
+                    'filesize': int_or_none(thumbnail_dict.get('length')),
+                    'preference': int_or_none(thumbnail_dict.get('length'))
+                } for thumbnail_dict in response)
+            if not try_all:
+                break
+
+        self._remove_duplicate_formats(thumbnails)
+        return thumbnails
+
+    def _get_capture_dates(self, video_id, url_date):
+        capture_dates = []
+        # Note: CDX API will not find watch pages with extra params in the url.
+        response = self._call_cdx_api(
+            video_id, f'https://www.youtube.com/watch?v={video_id}',
+            filters=['mimetype:text/html'], collapse=['timestamp:6', 'digest'], query={'matchType': 'prefix'}) or []
+        all_captures = sorted([int_or_none(r['timestamp']) for r in response if int_or_none(r['timestamp']) is not None])
+
+        # Prefer the new polymer UI captures as we support extracting more metadata from them
+        # WBM captures seem to all switch to this layout ~July 2020
+        modern_captures = list(filter(lambda x: x >= 20200701000000, all_captures))
+        if modern_captures:
+            capture_dates.append(modern_captures[0])
+        capture_dates.append(url_date)
+        if all_captures:
+            capture_dates.append(all_captures[0])
+
+        if 'captures' in self._configuration_arg('check_all'):
+            capture_dates.extend(modern_captures + all_captures)
+
+        # Fallbacks if any of the above fail
+        capture_dates.extend([self._OLDEST_CAPTURE_DATE, self._NEWEST_CAPTURE_DATE])
+        return orderedSet(capture_dates)
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
-        title = video_id  # if we are not able get a title
-
-        def _extract_title(webpage):
-            page_title = self._html_search_regex(
-                r'<title>([^<]*)</title>', webpage, 'title', fatal=False) or ''
-            # YouTube video pages appear to always have either 'YouTube -' as suffix or '- YouTube' as prefix.
-            try:
-                page_title = self._html_search_regex(
-                    r'(?:YouTube\s*-\s*(.*)$)|(?:(.*)\s*-\s*YouTube$)',
-                    page_title, 'title', default='')
-            except RegexNotFoundError:
-                page_title = None
-
-            if not page_title:
-                self.report_warning('unable to extract title', video_id=video_id)
-                return
-            return page_title
-
-        # If the video is no longer available, the oldest capture may be one before it was removed.
-        # Setting the capture date in url to early date seems to redirect to earliest capture.
-        webpage = self._download_webpage(
-            'https://web.archive.org/web/20050214000000/http://www.youtube.com/watch?v=%s' % video_id,
-            video_id=video_id, fatal=False, errnote='unable to download video webpage (probably not archived).')
-        if webpage:
-            title = _extract_title(webpage) or title
-
-        # Use link translator mentioned in https://github.com/ytdl-org/youtube-dl/issues/13655
-        internal_fake_url = 'https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id
+
+        url_date, video_id = self._match_valid_url(url).groups()
+
+        urlh = None
         try:
-            video_file_webpage = self._request_webpage(
-                HEADRequest(internal_fake_url), video_id,
-                note='Fetching video file url', expected_status=True)
+            urlh = self._request_webpage(
+                HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id),
+                video_id, note='Fetching archived video file url', expected_status=True)
         except ExtractorError as e:
             # HTTP Error 404 is expected if the video is not saved.
             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
-                raise ExtractorError(
-                    'HTTP Error %s. Most likely the video is not archived or issue with web.archive.org.' % e.cause.code,
+                self.raise_no_formats(
+                    'The requested video is not archived, indexed, or there is an issue with web.archive.org',
                     expected=True)
-            raise
-        video_file_url = compat_urllib_parse_unquote(video_file_webpage.url)
-        video_file_url_qs = parse_qs(video_file_url)
-
-        # Attempt to recover any ext & format info from playback url
-        format = {'url': video_file_url}
-        itag = try_get(video_file_url_qs, lambda x: x['itag'][0])
-        if itag and itag in YoutubeIE._formats:  # Naughty access but it works
-            format.update(YoutubeIE._formats[itag])
-            format.update({'format_id': itag})
-        else:
-            mime = try_get(video_file_url_qs, lambda x: x['mime'][0])
-            ext = mimetype2ext(mime) or determine_ext(video_file_url)
-            format.update({'ext': ext})
-        return {
-            'id': video_id,
-            'title': title,
-            'formats': [format],
-            'duration': str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0]))
-        }
+            else:
+                raise
+
+        capture_dates = self._get_capture_dates(video_id, int_or_none(url_date))
+        self.write_debug('Captures to try: ' + ', '.join(str(i) for i in capture_dates if i is not None))
+        info = {'id': video_id}
+        for capture in capture_dates:
+            if not capture:
+                continue
+            webpage = self._download_webpage(
+                (self._WAYBACK_BASE_URL + 'http://www.youtube.com/watch?v=%s') % (capture, video_id),
+                video_id=video_id, fatal=False, errnote='unable to download capture webpage (it may not be archived)',
+                note='Downloading capture webpage')
+            current_info = self._extract_metadata(video_id, webpage or '')
+            # Try avoid getting deleted video metadata
+            if current_info.get('title'):
+                info = merge_dicts(info, current_info)
+                if 'captures' not in self._configuration_arg('check_all'):
+                    break
+
+        info['thumbnails'] = self._extract_thumbnails(video_id)
+
+        if urlh:
+            url = compat_urllib_parse_unquote(urlh.url)
+            video_file_url_qs = parse_qs(url)
+            # Attempt to recover any ext & format info from playback url & response headers
+            format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
+            itag = try_get(video_file_url_qs, lambda x: x['itag'][0])
+            if itag and itag in YoutubeIE._formats:
+                format.update(YoutubeIE._formats[itag])
+                format.update({'format_id': itag})
+            else:
+                mime = try_get(video_file_url_qs, lambda x: x['mime'][0])
+                ext = (mimetype2ext(mime)
+                       or urlhandle_detect_ext(urlh)
+                       or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type')))
+                format.update({'ext': ext})
+            info['formats'] = [format]
+            if not info.get('duration'):
+                info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0]))
+
+        if not info.get('title'):
+            info['title'] = video_id
+        return info
diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py
index cd1c3f01c..171739b46 100644
--- a/yt_dlp/extractor/brightcove.py
+++ b/yt_dlp/extractor/brightcove.py
@@ -16,6 +16,7 @@ from ..compat import (
 )
 from ..utils import (
     clean_html,
+    dict_get,
     extract_attributes,
     ExtractorError,
     find_xpath_attr,
@@ -577,11 +578,20 @@ class BrightcoveNewIE(AdobePassIE):
         if duration is not None and duration <= 0:
             is_live = True
 
+        common_res = [(160, 90), (320, 180), (480, 720), (640, 360), (768, 432), (1024, 576), (1280, 720), (1366, 768), (1920, 1080)]
+        thumb_base_url = dict_get(json_data, ('poster', 'thumbnail'))
+        thumbnails = [{
+            'url': re.sub(r'\d+x\d+', f'{w}x{h}', thumb_base_url),
+            'width': w,
+            'height': h,
+        } for w, h in common_res] if thumb_base_url else None
+
         return {
             'id': video_id,
             'title': self._live_title(title) if is_live else title,
             'description': clean_html(json_data.get('description')),
             'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
+            'thumbnials': thumbnails,
             'duration': duration,
             'timestamp': parse_iso8601(json_data.get('published_at')),
             'uploader_id': json_data.get('account_id'),
diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py
index 413053499..392c77884 100644
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@@ -11,11 +11,13 @@ from ..compat import (
     compat_str,
 )
 from ..utils import (
+    int_or_none,
+    join_nonempty,
     js_to_json,
-    smuggle_url,
-    try_get,
     orderedSet,
+    smuggle_url,
     strip_or_none,
+    try_get,
     ExtractorError,
 )
 
@@ -313,6 +315,37 @@ class CBCGemIE(InfoExtractor):
             return
         self._claims_token = self._downloader.cache.load(self._NETRC_MACHINE, 'claims_token')
 
+    def _find_secret_formats(self, formats, video_id):
+        """ Find a valid video url and convert it to the secret variant """
+        base_format = next((f for f in formats if f.get('vcodec') != 'none'), None)
+        if not base_format:
+            return
+
+        base_url = re.sub(r'(Manifest\(.*?),filter=[\w-]+(.*?\))', r'\1\2', base_format['url'])
+        url = re.sub(r'(Manifest\(.*?),format=[\w-]+(.*?\))', r'\1\2', base_url)
+
+        secret_xml = self._download_xml(url, video_id, note='Downloading secret XML', fatal=False)
+        if not secret_xml:
+            return
+
+        for child in secret_xml:
+            if child.attrib.get('Type') != 'video':
+                continue
+            for video_quality in child:
+                bitrate = int_or_none(video_quality.attrib.get('Bitrate'))
+                if not bitrate or 'Index' not in video_quality.attrib:
+                    continue
+                height = int_or_none(video_quality.attrib.get('MaxHeight'))
+
+                yield {
+                    **base_format,
+                    'format_id': join_nonempty('sec', height),
+                    'url': re.sub(r'(QualityLevels\()\d+(\))', fr'\<1>{bitrate}\2', base_url),
+                    'width': int_or_none(video_quality.attrib.get('MaxWidth')),
+                    'tbr': bitrate / 1000.0,
+                    'height': height,
+                }
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_info = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/assets/' + video_id, video_id)
@@ -335,6 +368,7 @@ class CBCGemIE(InfoExtractor):
 
         formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls')
         self._remove_duplicate_formats(formats)
+        formats.extend(self._find_secret_formats(formats, video_id))
 
         for format in formats:
             if format.get('vcodec') == 'none':
diff --git a/yt_dlp/extractor/crackle.py b/yt_dlp/extractor/crackle.py
index 2c9d28d2e..db4962c42 100644
--- a/yt_dlp/extractor/crackle.py
+++ b/yt_dlp/extractor/crackle.py
@@ -23,32 +23,35 @@ from ..utils import (
 class CrackleIE(InfoExtractor):
     _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
     _TESTS = [{
-        # geo restricted to CA
-        'url': 'https://www.crackle.com/andromeda/2502343',
+        # Crackle is available in the United States and territories
+        'url': 'https://www.crackle.com/thanksgiving/2510064',
         'info_dict': {
-            'id': '2502343',
+            'id': '2510064',
             'ext': 'mp4',
-            'title': 'Under The Night',
-            'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a',
-            'duration': 2583,
+            'title': 'Touch Football',
+            'description': 'md5:cfbb513cf5de41e8b56d7ab756cff4df',
+            'duration': 1398,
             'view_count': int,
             'average_rating': 0,
-            'age_limit': 14,
-            'genre': 'Action, Sci-Fi',
-            'creator': 'Allan Kroeker',
-            'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe',
-            'release_year': 2000,
-            'series': 'Andromeda',
-            'episode': 'Under The Night',
+            'age_limit': 17,
+            'genre': 'Comedy',
+            'creator': 'Daniel Powell',
+            'artist': 'Chris Elliott, Amy Sedaris',
+            'release_year': 2016,
+            'series': 'Thanksgiving',
+            'episode': 'Touch Football',
             'season_number': 1,
             'episode_number': 1,
         },
         'params': {
             # m3u8 download
             'skip_download': True,
-        }
+        },
+        'expected_warnings': [
+            'Trying with a list of known countries'
+        ],
     }, {
-        'url': 'https://www.sonycrackle.com/andromeda/2502343',
+        'url': 'https://www.sonycrackle.com/thanksgiving/2510064',
         'only_matching': True,
     }]
 
@@ -129,7 +132,6 @@ class CrackleIE(InfoExtractor):
                 break
 
         ignore_no_formats = self.get_param('ignore_no_formats_error')
-        allow_unplayable_formats = self.get_param('allow_unplayable_formats')
 
         if not media or (not media.get('MediaURLs') and not ignore_no_formats):
             raise ExtractorError(
@@ -143,9 +145,9 @@ class CrackleIE(InfoExtractor):
         for e in media.get('MediaURLs') or []:
             if e.get('UseDRM'):
                 has_drm = True
-                if not allow_unplayable_formats:
-                    continue
-            format_url = url_or_none(e.get('Path'))
+                format_url = url_or_none(e.get('DRMPath'))
+            else:
+                format_url = url_or_none(e.get('Path'))
             if not format_url:
                 continue
             ext = determine_ext(format_url)
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index 572c32751..8d7c54ec4 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -523,6 +523,7 @@ from .globo import (
 )
 from .go import GoIE
 from .godtube import GodTubeIE
+from .gofile import GofileIE
 from .golem import GolemIE
 from .googledrive import GoogleDriveIE
 from .googlepodcasts import (
@@ -1315,6 +1316,7 @@ from .simplecast import (
 )
 from .sina import SinaIE
 from .sixplay import SixPlayIE
+from .skeb import SkebIE
 from .skyit import (
     SkyItPlayerIE,
     SkyItVideoIE,
diff --git a/yt_dlp/extractor/gofile.py b/yt_dlp/extractor/gofile.py
new file mode 100644
index 000000000..62d778cfe
--- /dev/null
+++ b/yt_dlp/extractor/gofile.py
@@ -0,0 +1,83 @@
+# coding: utf-8
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    try_get
+)
+
+
+class GofileIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?gofile\.io/d/(?P<id>[^/]+)'
+    _TESTS = [{
+        'url': 'https://gofile.io/d/AMZyDw',
+        'info_dict': {
+            'id': 'AMZyDw',
+        },
+        'playlist_mincount': 2,
+        'playlist': [{
+            'info_dict': {
+                'id': 'de571ac1-5edc-42e2-8ec2-bdac83ad4a31',
+                'filesize': 928116,
+                'ext': 'mp4',
+                'title': 'nuuh'
+            }
+        }]
+    }, {  # URL to test mixed file types
+        'url': 'https://gofile.io/d/avt34h',
+        'info_dict': {
+            'id': 'avt34h',
+        },
+        'playlist_mincount': 1,
+    }, {  # URL to test no video/audio error
+        'url': 'https://gofile.io/d/aB03lZ',
+        'info_dict': {
+            'id': 'aB03lZ',
+        },
+        'playlist_count': 0,
+        'skip': 'No video/audio found at provided URL.',
+    }]
+    _TOKEN = None
+
+    def _real_initialize(self):
+        token = self._get_cookies('https://gofile.io/').get('accountToken')
+        if token:
+            self._TOKEN = token.value
+            return
+
+        account_data = self._download_json(
+            'https://api.gofile.io/createAccount', None, note='Getting a new guest account')
+        self._TOKEN = account_data['data']['token']
+        self._set_cookie('gofile.io', 'accountToken', self._TOKEN)
+
+    def _entries(self, file_id):
+        files = self._download_json(
+            f'https://api.gofile.io/getContent?contentId={file_id}&token={self._TOKEN}&websiteToken=websiteToken&cache=true',
+            'Gofile', note='Getting filelist')
+
+        status = files['status']
+        if status != 'ok':
+            raise ExtractorError(f'{self.IE_NAME} said: status {status}', expected=True)
+
+        found_files = False
+        for file in (try_get(files, lambda x: x['data']['contents'], dict) or {}).values():
+            file_type, file_format = file.get('mimetype').split('/', 1)
+            if file_type not in ('video', 'audio') and file_format != 'vnd.mts':
+                continue
+
+            found_files = True
+            file_url = file.get('directLink')
+            if file_url:
+                yield {
+                    'id': file['id'],
+                    'title': file['name'].rsplit('.', 1)[0],
+                    'url': file_url,
+                    'filesize': file.get('size'),
+                    'release_timestamp': file.get('createTime')
+                }
+
+        if not found_files:
+            raise ExtractorError('No video/audio found at provided URL.', expected=True)
+
+    def _real_extract(self, url):
+        file_id = self._match_id(url)
+        return self.playlist_result(self._entries(file_id), playlist_id=file_id)
diff --git a/yt_dlp/extractor/skeb.py b/yt_dlp/extractor/skeb.py
new file mode 100644
index 000000000..81aecb311
--- /dev/null
+++ b/yt_dlp/extractor/skeb.py
@@ -0,0 +1,143 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError, determine_ext, parse_qs, traverse_obj
+
+
+class SkebIE(InfoExtractor):
+    _VALID_URL = r'https?://skeb\.jp/@[^/]+/works/(?P<id>\d+)'
+
+    _TESTS = [{
+        'url': 'https://skeb.jp/@riiru_wm/works/10',
+        'info_dict': {
+            'id': '466853',
+            'title': '内容はおまかせします！ by 姫ノ森りぃる@一周年',
+            'descripion': 'md5:1ec50901efc3437cfbfe3790468d532d',
+            'uploader': '姫ノ森りぃる@一周年',
+            'uploader_id': 'riiru_wm',
+            'age_limit': 0,
+            'tags': [],
+            'url': r're:https://skeb.+',
+            'thumbnail': r're:https://skeb.+',
+            'subtitles': {
+                'jpn': [{
+                    'url': r're:https://skeb.+',
+                    'ext': 'vtt'
+                }]
+            },
+            'width': 720,
+            'height': 405,
+            'duration': 313,
+            'fps': 30,
+            'ext': 'mp4',
+        },
+    }, {
+        'url': 'https://skeb.jp/@furukawa_nob/works/3',
+        'info_dict': {
+            'id': '489408',
+            'title': 'いつもお世話になってお... by 古川ノブ@音楽とVlo...',
+            'descripion': 'md5:5adc2e41d06d33b558bf7b1faeb7b9c2',
+            'uploader': '古川ノブ@音楽とVlogのVtuber',
+            'uploader_id': 'furukawa_nob',
+            'age_limit': 0,
+            'tags': [
+                'よろしく', '大丈夫', 'お願い', 'でした',
+                '是非', 'O', 'バー', '遊び', 'おはよう',
+                'オーバ', 'ボイス',
+            ],
+            'url': r're:https://skeb.+',
+            'thumbnail': r're:https://skeb.+',
+            'subtitles': {
+                'jpn': [{
+                    'url': r're:https://skeb.+',
+                    'ext': 'vtt'
+                }]
+            },
+            'duration': 98,
+            'ext': 'mp3',
+            'vcodec': 'none',
+            'abr': 128,
+        },
+    }, {
+        'url': 'https://skeb.jp/@mollowmollow/works/6',
+        'info_dict': {
+            'id': '6',
+            'title': 'ヒロ。\n\n私のキャラク... by 諸々',
+            'descripion': 'md5:aa6cbf2ba320b50bce219632de195f07',
+            '_type': 'playlist',
+            'entries': [{
+                'id': '486430',
+                'title': 'ヒロ。\n\n私のキャラク... by 諸々',
+                'descripion': 'md5:aa6cbf2ba320b50bce219632de195f07',
+            }, {
+                'id': '486431',
+                'title': 'ヒロ。\n\n私のキャラク... by 諸々',
+            }]
+        }
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        nuxt_data = self._search_nuxt_data(self._download_webpage(url, video_id), video_id)
+
+        parent = {
+            'id': video_id,
+            'title': nuxt_data.get('title'),
+            'descripion': nuxt_data.get('description'),
+            'uploader': traverse_obj(nuxt_data, ('creator', 'name')),
+            'uploader_id': traverse_obj(nuxt_data, ('creator', 'screen_name')),
+            'age_limit': 18 if nuxt_data.get('nsfw') else 0,
+            'tags': nuxt_data.get('tag_list'),
+        }
+
+        entries = []
+        for item in nuxt_data.get('previews') or []:
+            vid_url = item.get('url')
+            given_ext = traverse_obj(item, ('information', 'extension'))
+            preview_ext = determine_ext(vid_url, default_ext=None)
+            if not preview_ext:
+                content_disposition = parse_qs(vid_url)['response-content-disposition'][0]
+                preview_ext = self._search_regex(
+                    r'filename="[^"]+\.([^\.]+?)"', content_disposition,
+                    'preview file extension', fatal=False, group=1)
+            if preview_ext not in ('mp4', 'mp3'):
+                continue
+            if not vid_url or not item.get('id'):
+                continue
+            width, height = traverse_obj(item, ('information', 'width')), traverse_obj(item, ('information', 'height'))
+            if width is not None and height is not None:
+                # the longest side is at most 720px for non-client viewers
+                max_size = max(width, height)
+                width, height = list(x * 720 // max_size for x in (width, height))
+            entries.append({
+                **parent,
+                'id': str(item['id']),
+                'url': vid_url,
+                'thumbnail': item.get('poster_url'),
+                'subtitles': {
+                    'jpn': [{
+                        'url': item.get('vtt_url'),
+                        'ext': 'vtt',
+                    }]
+                } if item.get('vtt_url') else None,
+                'width': width,
+                'height': height,
+                'duration': traverse_obj(item, ('information', 'duration')),
+                'fps': traverse_obj(item, ('information', 'frame_rate')),
+                'ext': preview_ext or given_ext,
+                'vcodec': 'none' if preview_ext == 'mp3' else None,
+                # you'll always get 128kbps MP3 for non-client viewers
+                'abr': 128 if preview_ext == 'mp3' else None,
+            })
+
+        if not entries:
+            raise ExtractorError('No video/audio attachment found in this commission.', expected=True)
+        elif len(entries) == 1:
+            return entries[0]
+        else:
+            parent.update({
+                '_type': 'playlist',
+                'entries': entries,
+            })
+            return parent
diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py
index df236c050..6f7f801e1 100644
--- a/yt_dlp/extractor/zdf.py
+++ b/yt_dlp/extractor/zdf.py
@@ -15,6 +15,7 @@ from ..utils import (
     orderedSet,
     parse_codecs,
     qualities,
+    traverse_obj,
     try_get,
     unified_timestamp,
     update_url_query,
@@ -135,19 +136,6 @@ class ZDFBaseIE(InfoExtractor):
 class ZDFIE(ZDFBaseIE):
     _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
     _TESTS = [{
-        # Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html
-        'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html',
-        'md5': '34ec321e7eb34231fd88616c65c92db0',
-        'info_dict': {
-            'id': '210222_phx_nachgehakt_corona_protest',
-            'ext': 'mp4',
-            'title': 'Wohin führt der Protest in der Pandemie?',
-            'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
-            'duration': 1691,
-            'timestamp': 1613948400,
-            'upload_date': '20210221',
-        },
-    }, {
         # Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html
         'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html',
         'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
@@ -172,6 +160,18 @@ class ZDFIE(ZDFBaseIE):
             'upload_date': '20160604',
         },
     }, {
+        'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
+        'md5': '3d6f1049e9682178a11c54b91f3dd065',
+        'info_dict': {
+            'ext': 'mp4',
+            'id': 'video_funk_1770473',
+            'duration': 1278,
+            'description': 'Die Neue an der Schule verdreht Ismail den Kopf.',
+            'title': 'Alles ist verzaubert',
+            'timestamp': 1635520560,
+            'upload_date': '20211029'
+        },
+    }, {
         # Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche
         'url': 'https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html',
         'only_matching': True,
@@ -192,6 +192,10 @@ class ZDFIE(ZDFBaseIE):
     }, {
         'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
         'only_matching': True,
+    }, {
+        # Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html
+        'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html',
+        'only_matching': True
     }]
 
     def _extract_entry(self, url, player, content, video_id):
@@ -202,8 +206,9 @@ class ZDFIE(ZDFBaseIE):
         ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
 
         if not ptmd_path:
-            ptmd_path = t[
-                'http://zdf.de/rels/streams/ptmd-template'].replace(
+            ptmd_path = traverse_obj(
+                t, ('streams', 'default', 'http://zdf.de/rels/streams/ptmd-template'),
+                'http://zdf.de/rels/streams/ptmd-template').replace(
                 '{playerId}', 'ngplayer_2_4')
 
         info = self._extract_ptmd(
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index 120084046..85c7c8cda 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -1461,20 +1461,29 @@ def parseOpts(overrideArguments=None):
     sponsorblock.add_option(
         '--sponsorblock-mark', metavar='CATS',
         dest='sponsorblock_mark', default=set(), action='callback', type='str',
-        callback=_set_from_options_callback, callback_kwargs={'allowed_values': SponsorBlockPP.CATEGORIES.keys()},
-        help=(
+        callback=_set_from_options_callback, callback_kwargs={
+            'allowed_values': SponsorBlockPP.CATEGORIES.keys(),
+            'aliases': {'default': ['all']}
+        }, help=(
             'SponsorBlock categories to create chapters for, separated by commas. '
-            'Available categories are all, %s. You can prefix the category with a "-" to exempt it. '
-            'See https://wiki.sponsor.ajay.app/index.php/Segment_Categories for description of the categories. '
-            'Eg: --sponsorblock-mark all,-preview' % ', '.join(SponsorBlockPP.CATEGORIES.keys())))
+            f'Available categories are all, default(=all), {", ".join(SponsorBlockPP.CATEGORIES.keys())}. '
+            'You can prefix the category with a "-" to exempt it. See [1] for description of the categories. '
+            'Eg: --sponsorblock-mark all,-preview [1] https://wiki.sponsor.ajay.app/w/Segment_Categories'))
     sponsorblock.add_option(
         '--sponsorblock-remove', metavar='CATS',
         dest='sponsorblock_remove', default=set(), action='callback', type='str',
-        callback=_set_from_options_callback, callback_kwargs={'allowed_values': SponsorBlockPP.CATEGORIES.keys()},
-        help=(
+        callback=_set_from_options_callback, callback_kwargs={
+            'allowed_values': set(SponsorBlockPP.CATEGORIES.keys()) - set(SponsorBlockPP.POI_CATEGORIES.keys()),
+            # Note: From https://wiki.sponsor.ajay.app/w/Types:
+            # The filler category is very aggressive.
+            # It is strongly recommended to not use this in a client by default.
+            'aliases': {'default': ['all', '-filler']}
+        }, help=(
             'SponsorBlock categories to be removed from the video file, separated by commas. '
             'If a category is present in both mark and remove, remove takes precedence. '
-            'The syntax and available categories are the same as for --sponsorblock-mark'))
+            'The syntax and available categories are the same as for --sponsorblock-mark '
+            'except that "default" refers to "all,-filler" '
+            f'and {", ".join(SponsorBlockPP.POI_CATEGORIES.keys())} is not available'))
     sponsorblock.add_option(
         '--sponsorblock-chapter-title', metavar='TEMPLATE',
         default=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, dest='sponsorblock_chapter_title',
diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py
index 0728bdcf5..91433c364 100644
--- a/yt_dlp/postprocessor/modify_chapters.py
+++ b/yt_dlp/postprocessor/modify_chapters.py
@@ -24,7 +24,7 @@ class ModifyChaptersPP(FFmpegPostProcessor):
                  *, sponsorblock_chapter_title=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, force_keyframes=False):
         FFmpegPostProcessor.__init__(self, downloader)
         self._remove_chapters_patterns = set(remove_chapters_patterns or [])
-        self._remove_sponsor_segments = set(remove_sponsor_segments or [])
+        self._remove_sponsor_segments = set(remove_sponsor_segments or []) - set(SponsorBlockPP.POI_CATEGORIES.keys())
         self._ranges_to_remove = set(remove_ranges or [])
         self._sponsorblock_chapter_title = sponsorblock_chapter_title
         self._force_keyframes = force_keyframes
@@ -302,7 +302,7 @@ class ModifyChaptersPP(FFmpegPostProcessor):
                     'name': SponsorBlockPP.CATEGORIES[category],
                     'category_names': [SponsorBlockPP.CATEGORIES[c] for c in cats]
                 })
-                c['title'] = self._downloader.evaluate_outtmpl(self._sponsorblock_chapter_title, c)
+                c['title'] = self._downloader.evaluate_outtmpl(self._sponsorblock_chapter_title, c.copy())
                 # Merge identically named sponsors.
                 if (new_chapters and 'categories' in new_chapters[-1]
                         and new_chapters[-1]['title'] == c['title']):
diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py
index 70c5462d1..cd48b15ae 100644
--- a/yt_dlp/postprocessor/sponsorblock.py
+++ b/yt_dlp/postprocessor/sponsorblock.py
@@ -10,18 +10,23 @@ from ..utils import PostProcessingError, network_exceptions, sanitized_Request
 
 
 class SponsorBlockPP(FFmpegPostProcessor):
-
+    # https://wiki.sponsor.ajay.app/w/Types
     EXTRACTORS = {
         'Youtube': 'YouTube',
     }
+    POI_CATEGORIES = {
+        'poi_highlight': 'Highlight',
+    }
     CATEGORIES = {
         'sponsor': 'Sponsor',
         'intro': 'Intermission/Intro Animation',
         'outro': 'Endcards/Credits',
         'selfpromo': 'Unpaid/Self Promotion',
-        'interaction': 'Interaction Reminder',
         'preview': 'Preview/Recap',
-        'music_offtopic': 'Non-Music Section'
+        'filler': 'Filler Tangent',
+        'interaction': 'Interaction Reminder',
+        'music_offtopic': 'Non-Music Section',
+        **POI_CATEGORIES,
     }
 
     def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'):
@@ -47,6 +52,9 @@ class SponsorBlockPP(FFmpegPostProcessor):
             # Ignore milliseconds difference at the start.
             if start_end[0] <= 1:
                 start_end[0] = 0
+            # Make POI chapters 1 sec so that we can properly mark them
+            if s['category'] in self.POI_CATEGORIES.keys():
+                start_end[1] += 1
             # Ignore milliseconds difference at the end.
             # Never allow the segment to exceed the video.
             if duration and duration - start_end[1] <= 1:
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 0ca7ed738..10c35cbb9 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -38,6 +38,7 @@ import time
 import traceback
 import xml.etree.ElementTree
 import zlib
+import mimetypes
 
 from .compat import (
     compat_HTMLParseError,
@@ -4715,6 +4716,14 @@ def mimetype2ext(mt):
     return subtype.replace('+', '.')
 
 
+def ext2mimetype(ext_or_url):
+    if not ext_or_url:
+        return None
+    if '.' not in ext_or_url:
+        ext_or_url = f'file.{ext_or_url}'
+    return mimetypes.guess_type(ext_or_url)[0]
+
+
 def parse_codecs(codecs_str):
     # http://tools.ietf.org/html/rfc6381
     if not codecs_str: