1 files changed, 59 insertions, 57 deletions
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 1f5009399..852fbd78e 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -668,6 +668,30 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                     return text
 
     @staticmethod
+    def _extract_thumbnails(data, *path_list):
+        """
+        Extract thumbnails from thumbnails dict
+        @param path_list: path list to level that contains 'thumbnails' key
+        """
+        thumbnails = []
+        for path in path_list or [()]:
+            for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
+                thumbnail_url = url_or_none(thumbnail.get('url'))
+                if not thumbnail_url:
+                    continue
+                # Sometimes youtube gives a wrong thumbnail URL. See:
+                # https://github.com/yt-dlp/yt-dlp/issues/233
+                # https://github.com/ytdl-org/youtube-dl/issues/28023
+                if 'maxresdefault' in thumbnail_url:
+                    thumbnail_url = thumbnail_url.split('?')[0]
+                thumbnails.append({
+                    'url': thumbnail_url,
+                    'height': int_or_none(thumbnail.get('height')),
+                    'width': int_or_none(thumbnail.get('width')),
+                })
+        return thumbnails
+
+    @staticmethod
     def extract_relative_time(relative_time_text):
         """
         Extracts a relative time from string and converts to dt object
@@ -783,6 +807,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         overlay_style = traverse_obj(
             renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
         badges = self._extract_badges(renderer)
+        thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
+
         return {
             '_type': 'url',
             'ie_key': YoutubeIE.ie_key(),
@@ -794,6 +820,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             'view_count': view_count,
             'uploader': uploader,
             'channel_id': channel_id,
+            'thumbnails': thumbnails,
             'upload_date': strftime_or_none(timestamp, '%Y%m%d'),
             'live_status': ('is_upcoming' if scheduled_timestamp is not None
                             else 'was_live' if 'streamed' in time_text.lower()
@@ -1750,16 +1777,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         self._player_cache = {}
 
     def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
-        EXPIRATION_DURATION = 18_000
         lock = threading.Lock()
 
         is_live = True
-        expiration_time = time.time() + EXPIRATION_DURATION
+        start_time = time.time()
         formats = [f for f in formats if f.get('is_from_start')]
 
-        def refetch_manifest(format_id):
-            nonlocal formats, expiration_time, is_live
-            if time.time() <= expiration_time:
+        def refetch_manifest(format_id, delay):
+            nonlocal formats, start_time, is_live
+            if time.time() <= start_time + delay:
                 return
 
             _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
@@ -1769,19 +1795,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 prs, (..., 'microformat', 'playerMicroformatRenderer'),
                 expected_type=dict, default=[])
             _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
-            expiration_time = time.time() + EXPIRATION_DURATION
+            start_time = time.time()
 
-        def mpd_feed(format_id):
+        def mpd_feed(format_id, delay):
             """
             @returns (manifest_url, manifest_stream_number, is_live) or None
             """
             with lock:
-                refetch_manifest(format_id)
+                refetch_manifest(format_id, delay)
 
             f = next((f for f in formats if f['format_id'] == format_id), None)
             if not f:
-                self.report_warning(
-                    f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
+                if not is_live:
+                    self.to_screen(f'{video_id}: Video is no longer live')
+                else:
+                    self.report_warning(
+                        f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
                 return None
             return f['manifest_url'], f['manifest_stream_number'], is_live
 
@@ -1812,9 +1841,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
             # Obtain from MPD's maximum seq value
             old_mpd_url = mpd_url
-            mpd_url, stream_number, is_live = mpd_feed(format_id) or (mpd_url, stream_number, False)
-            if old_mpd_url == mpd_url and not refresh_sequence:
-                return True, last_seq
+            last_error = ctx.pop('last_error', None)
+            expire_fast = last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403
+            mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
+                                               or (mpd_url, stream_number, False))
+            if not refresh_sequence:
+                if expire_fast and not is_live:
+                    return False, last_seq
+                elif old_mpd_url == mpd_url:
+                    return True, last_seq
             try:
                 fmts, _ = self._extract_mpd_formats_and_subtitles(
                     mpd_url, None, note=False, errnote=False, fatal=False)
@@ -1848,8 +1883,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     last_segment_url = None
                     continue
             else:
-                should_retry, last_seq = _extract_sequence_from_mpd(True)
-                if not should_retry:
+                should_continue, last_seq = _extract_sequence_from_mpd(True)
+                if not should_continue:
                     continue
 
             if known_idx > last_seq:
@@ -1866,9 +1901,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             try:
                 for idx in range(known_idx, last_seq):
                     # do not update sequence here or you'll get skipped some part of it
-                    should_retry, _ = _extract_sequence_from_mpd(False)
-                    if not should_retry:
-                        # retry when it gets weird state
+                    should_continue, _ = _extract_sequence_from_mpd(False)
+                    if not should_continue:
                         known_idx = idx - 1
                         raise ExtractorError('breaking out of outer loop')
                     last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
@@ -2903,25 +2937,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                             if f.get('vcodec') != 'none':
                                 f['stretched_ratio'] = ratio
                         break
-
-        thumbnails = []
-        thumbnail_dicts = traverse_obj(
-            (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
-            expected_type=dict, default=[])
-        for thumbnail in thumbnail_dicts:
-            thumbnail_url = thumbnail.get('url')
-            if not thumbnail_url:
-                continue
-            # Sometimes youtube gives a wrong thumbnail URL. See:
-            # https://github.com/yt-dlp/yt-dlp/issues/233
-            # https://github.com/ytdl-org/youtube-dl/issues/28023
-            if 'maxresdefault' in thumbnail_url:
-                thumbnail_url = thumbnail_url.split('?')[0]
-            thumbnails.append({
-                'url': thumbnail_url,
-                'height': int_or_none(thumbnail.get('height')),
-                'width': int_or_none(thumbnail.get('width')),
-            })
+        thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
         thumbnail_url = search_meta(['og:image', 'twitter:image'])
         if thumbnail_url:
             thumbnails.append({
@@ -3584,7 +3600,6 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
 
     def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
         playlist_id = title = description = channel_url = channel_name = channel_id = None
-        thumbnails_list = []
         tags = []
 
         selected_tab = self._extract_selected_tab(tabs)
@@ -3603,26 +3618,13 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
             description = renderer.get('description', '')
             playlist_id = channel_id
             tags = renderer.get('keywords', '').split()
-            thumbnails_list = (
-                try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
-                or try_get(
-                    self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
-                    lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
-                    list)
-                or [])
 
-        thumbnails = []
-        for t in thumbnails_list:
-            if not isinstance(t, dict):
-                continue
-            thumbnail_url = url_or_none(t.get('url'))
-            if not thumbnail_url:
-                continue
-            thumbnails.append({
-                'url': thumbnail_url,
-                'width': int_or_none(t.get('width')),
-                'height': int_or_none(t.get('height')),
-            })
+        thumbnails = (
+            self._extract_thumbnails(renderer, 'avatar')
+            or self._extract_thumbnails(
+                self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
+                ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail')))
+
         if playlist_id is None:
             playlist_id = item_id
         if title is None: