aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/youtube.py
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp/extractor/youtube.py')
-rw-r--r--yt_dlp/extractor/youtube.py116
1 files changed, 59 insertions, 57 deletions
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 1f5009399..852fbd78e 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -668,6 +668,30 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return text
@staticmethod
+ def _extract_thumbnails(data, *path_list):
+ """
+ Extract thumbnails from thumbnails dict
+ @param path_list: path list to level that contains 'thumbnails' key
+ """
+ thumbnails = []
+ for path in path_list or [()]:
+ for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...), default=[]):
+ thumbnail_url = url_or_none(thumbnail.get('url'))
+ if not thumbnail_url:
+ continue
+ # Sometimes youtube gives a wrong thumbnail URL. See:
+ # https://github.com/yt-dlp/yt-dlp/issues/233
+ # https://github.com/ytdl-org/youtube-dl/issues/28023
+ if 'maxresdefault' in thumbnail_url:
+ thumbnail_url = thumbnail_url.split('?')[0]
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'height': int_or_none(thumbnail.get('height')),
+ 'width': int_or_none(thumbnail.get('width')),
+ })
+ return thumbnails
+
+ @staticmethod
def extract_relative_time(relative_time_text):
"""
Extracts a relative time from string and converts to dt object
@@ -783,6 +807,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
overlay_style = traverse_obj(
renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
badges = self._extract_badges(renderer)
+ thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
+
return {
'_type': 'url',
'ie_key': YoutubeIE.ie_key(),
@@ -794,6 +820,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'view_count': view_count,
'uploader': uploader,
'channel_id': channel_id,
+ 'thumbnails': thumbnails,
'upload_date': strftime_or_none(timestamp, '%Y%m%d'),
'live_status': ('is_upcoming' if scheduled_timestamp is not None
else 'was_live' if 'streamed' in time_text.lower()
@@ -1750,16 +1777,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self._player_cache = {}
def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
- EXPIRATION_DURATION = 18_000
lock = threading.Lock()
is_live = True
- expiration_time = time.time() + EXPIRATION_DURATION
+ start_time = time.time()
formats = [f for f in formats if f.get('is_from_start')]
- def refetch_manifest(format_id):
- nonlocal formats, expiration_time, is_live
- if time.time() <= expiration_time:
+ def refetch_manifest(format_id, delay):
+ nonlocal formats, start_time, is_live
+ if time.time() <= start_time + delay:
return
_, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url)
@@ -1769,19 +1795,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
prs, (..., 'microformat', 'playerMicroformatRenderer'),
expected_type=dict, default=[])
_, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url)
- expiration_time = time.time() + EXPIRATION_DURATION
+ start_time = time.time()
- def mpd_feed(format_id):
+ def mpd_feed(format_id, delay):
"""
@returns (manifest_url, manifest_stream_number, is_live) or None
"""
with lock:
- refetch_manifest(format_id)
+ refetch_manifest(format_id, delay)
f = next((f for f in formats if f['format_id'] == format_id), None)
if not f:
- self.report_warning(
- f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
+ if not is_live:
+ self.to_screen(f'{video_id}: Video is no longer live')
+ else:
+ self.report_warning(
+ f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}')
return None
return f['manifest_url'], f['manifest_stream_number'], is_live
@@ -1812,9 +1841,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url
# Obtain from MPD's maximum seq value
old_mpd_url = mpd_url
- mpd_url, stream_number, is_live = mpd_feed(format_id) or (mpd_url, stream_number, False)
- if old_mpd_url == mpd_url and not refresh_sequence:
- return True, last_seq
+ last_error = ctx.pop('last_error', None)
+ expire_fast = last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403
+ mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
+ or (mpd_url, stream_number, False))
+ if not refresh_sequence:
+ if expire_fast and not is_live:
+ return False, last_seq
+ elif old_mpd_url == mpd_url:
+ return True, last_seq
try:
fmts, _ = self._extract_mpd_formats_and_subtitles(
mpd_url, None, note=False, errnote=False, fatal=False)
@@ -1848,8 +1883,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
last_segment_url = None
continue
else:
- should_retry, last_seq = _extract_sequence_from_mpd(True)
- if not should_retry:
+ should_continue, last_seq = _extract_sequence_from_mpd(True)
+ if not should_continue:
continue
if known_idx > last_seq:
@@ -1866,9 +1901,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
try:
for idx in range(known_idx, last_seq):
# do not update sequence here or you'll get skipped some part of it
- should_retry, _ = _extract_sequence_from_mpd(False)
- if not should_retry:
- # retry when it gets weird state
+ should_continue, _ = _extract_sequence_from_mpd(False)
+ if not should_continue:
known_idx = idx - 1
raise ExtractorError('breaking out of outer loop')
last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx)
@@ -2903,25 +2937,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if f.get('vcodec') != 'none':
f['stretched_ratio'] = ratio
break
-
- thumbnails = []
- thumbnail_dicts = traverse_obj(
- (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
- expected_type=dict, default=[])
- for thumbnail in thumbnail_dicts:
- thumbnail_url = thumbnail.get('url')
- if not thumbnail_url:
- continue
- # Sometimes youtube gives a wrong thumbnail URL. See:
- # https://github.com/yt-dlp/yt-dlp/issues/233
- # https://github.com/ytdl-org/youtube-dl/issues/28023
- if 'maxresdefault' in thumbnail_url:
- thumbnail_url = thumbnail_url.split('?')[0]
- thumbnails.append({
- 'url': thumbnail_url,
- 'height': int_or_none(thumbnail.get('height')),
- 'width': int_or_none(thumbnail.get('width')),
- })
+ thumbnails = self._extract_thumbnails((video_details, microformats), (..., ..., 'thumbnail'))
thumbnail_url = search_meta(['og:image', 'twitter:image'])
if thumbnail_url:
thumbnails.append({
@@ -3584,7 +3600,6 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
playlist_id = title = description = channel_url = channel_name = channel_id = None
- thumbnails_list = []
tags = []
selected_tab = self._extract_selected_tab(tabs)
@@ -3603,26 +3618,13 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
description = renderer.get('description', '')
playlist_id = channel_id
tags = renderer.get('keywords', '').split()
- thumbnails_list = (
- try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
- or try_get(
- self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
- lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
- list)
- or [])
- thumbnails = []
- for t in thumbnails_list:
- if not isinstance(t, dict):
- continue
- thumbnail_url = url_or_none(t.get('url'))
- if not thumbnail_url:
- continue
- thumbnails.append({
- 'url': thumbnail_url,
- 'width': int_or_none(t.get('width')),
- 'height': int_or_none(t.get('height')),
- })
+ thumbnails = (
+ self._extract_thumbnails(renderer, 'avatar')
+ or self._extract_thumbnails(
+ self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
+ ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail')))
+
if playlist_id is None:
playlist_id = item_id
if title is None: