diff options
Diffstat (limited to 'yt_dlp/extractor/youtube.py')
-rw-r--r-- | yt_dlp/extractor/youtube.py | 116 |
1 files changed, 59 insertions, 57 deletions
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 1f5009399..852fbd78e 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -668,6 +668,30 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return text @staticmethod + def _extract_thumbnails(data, *path_list): + """ + Extract thumbnails from thumbnails dict + @param path_list: path list to level that contains 'thumbnails' key + """ + thumbnails = [] + for path in path_list or [()]: + for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]): + thumbnail_url = url_or_none(thumbnail.get('url')) + if not thumbnail_url: + continue + # Sometimes youtube gives a wrong thumbnail URL. See: + # https://github.com/yt-dlp/yt-dlp/issues/233 + # https://github.com/ytdl-org/youtube-dl/issues/28023 + if 'maxresdefault' in thumbnail_url: + thumbnail_url = thumbnail_url.split('?')[0] + thumbnails.append({ + 'url': thumbnail_url, + 'height': int_or_none(thumbnail.get('height')), + 'width': int_or_none(thumbnail.get('width')), + }) + return thumbnails + + @staticmethod def extract_relative_time(relative_time_text): """ Extracts a relative time from string and converts to dt object @@ -783,6 +807,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): overlay_style = traverse_obj( renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str) badges = self._extract_badges(renderer) + thumbnails = self._extract_thumbnails(renderer, 'thumbnail') + return { '_type': 'url', 'ie_key': YoutubeIE.ie_key(), @@ -794,6 +820,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'view_count': view_count, 'uploader': uploader, 'channel_id': channel_id, + 'thumbnails': thumbnails, 'upload_date': strftime_or_none(timestamp, '%Y%m%d'), 'live_status': ('is_upcoming' if scheduled_timestamp is not None else 'was_live' if 'streamed' in time_text.lower() @@ -1750,16 +1777,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self._player_cache = {} def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data): - EXPIRATION_DURATION = 18_000 lock = threading.Lock() is_live = True - expiration_time = time.time() + EXPIRATION_DURATION + start_time = time.time() formats = [f for f in formats if f.get('is_from_start')] - def refetch_manifest(format_id): - nonlocal formats, expiration_time, is_live - if time.time() <= expiration_time: + def refetch_manifest(format_id, delay): + nonlocal formats, start_time, is_live + if time.time() <= start_time + delay: return _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url) @@ -1769,19 +1795,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor): prs, (..., 'microformat', 'playerMicroformatRenderer'), expected_type=dict, default=[]) _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url) - expiration_time = time.time() + EXPIRATION_DURATION + start_time = time.time() - def mpd_feed(format_id): + def mpd_feed(format_id, delay): """ @returns (manifest_url, manifest_stream_number, is_live) or None """ with lock: - refetch_manifest(format_id) + refetch_manifest(format_id, delay) f = next((f for f in formats if f['format_id'] == format_id), None) if not f: - self.report_warning( - f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}') + if not is_live: + self.to_screen(f'{video_id}: Video is no longer live') + else: + self.report_warning( + f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}') return None return f['manifest_url'], f['manifest_stream_number'], is_live @@ -1812,9 +1841,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url # Obtain from MPD's maximum seq value old_mpd_url = mpd_url - mpd_url, stream_number, is_live = mpd_feed(format_id) or (mpd_url, stream_number, False) - if old_mpd_url == mpd_url and not refresh_sequence: - return True, last_seq + last_error = ctx.pop('last_error', None) + expire_fast = last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403 + mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000) + or (mpd_url, stream_number, False)) + if not refresh_sequence: + if expire_fast and not is_live: + return False, last_seq + elif old_mpd_url == mpd_url: + return True, last_seq try: fmts, _ = self._extract_mpd_formats_and_subtitles( mpd_url, None, note=False, errnote=False, fatal=False) @@ -1848,8 +1883,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): last_segment_url = None continue else: - should_retry, last_seq = _extract_sequence_from_mpd(True) - if not should_retry: + should_continue, last_seq = _extract_sequence_from_mpd(True) + if not should_continue: continue if known_idx > last_seq: @@ -1866,9 +1901,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): try: for idx in range(known_idx, last_seq): # do not update sequence here or you'll get skipped some part of it - should_retry, _ = _extract_sequence_from_mpd(False) - if not should_retry: - # retry when it gets weird state + should_continue, _ = _extract_sequence_from_mpd(False) + if not should_continue: known_idx = idx - 1 raise ExtractorError('breaking out of outer loop') last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx) @@ -2903,25 +2937,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if f.get('vcodec') != 'none': f['stretched_ratio'] = ratio break - - thumbnails = [] - thumbnail_dicts = traverse_obj( - (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...), - expected_type=dict, default=[]) - for thumbnail in thumbnail_dicts: - thumbnail_url = thumbnail.get('url') - if not thumbnail_url: - continue - # Sometimes youtube gives a wrong thumbnail URL. See: - # https://github.com/yt-dlp/yt-dlp/issues/233 - # https://github.com/ytdl-org/youtube-dl/issues/28023 - if 'maxresdefault' in thumbnail_url: - thumbnail_url = thumbnail_url.split('?')[0] - thumbnails.append({ - 'url': thumbnail_url, - 'height': int_or_none(thumbnail.get('height')), - 'width': int_or_none(thumbnail.get('width')), - }) + thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail')) thumbnail_url = search_meta(['og:image', 'twitter:image']) if thumbnail_url: thumbnails.append({ @@ -3584,7 +3600,6 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): def _extract_from_tabs(self, item_id, ytcfg, data, tabs): playlist_id = title = description = channel_url = channel_name = channel_id = None - thumbnails_list = [] tags = [] selected_tab = self._extract_selected_tab(tabs) @@ -3603,26 +3618,13 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): description = renderer.get('description', '') playlist_id = channel_id tags = renderer.get('keywords', '').split() - thumbnails_list = ( - try_get(renderer, lambda x: x['avatar']['thumbnails'], list) - or try_get( - self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'), - lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'], - list) - or []) - thumbnails = [] - for t in thumbnails_list: - if not isinstance(t, dict): - continue - thumbnail_url = url_or_none(t.get('url')) - if not thumbnail_url: - continue - thumbnails.append({ - 'url': thumbnail_url, - 'width': int_or_none(t.get('width')), - 'height': int_or_none(t.get('height')), - }) + thumbnails = ( + self._extract_thumbnails(renderer, 'avatar') + or self._extract_thumbnails( + self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'), + ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail'))) + if playlist_id is None: playlist_id = item_id if title is None: |