aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/youtube.py
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp/extractor/youtube.py')
-rw-r--r--yt_dlp/extractor/youtube.py187
1 files changed, 159 insertions, 28 deletions
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 017554c88..f284487b8 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -384,6 +384,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _real_initialize(self):
self._initialize_pref()
self._initialize_consent()
+ self._check_login_required()
+
+ def _check_login_required(self):
if (self._LOGIN_REQUIRED
and self.get_param('cookiefile') is None
and self.get_param('cookiesfrombrowser') is None):
@@ -563,6 +566,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
headers['X-Origin'] = origin
return {h: v for h, v in headers.items() if v is not None}
+ def _download_ytcfg(self, client, video_id):
+ url = {
+ 'web': 'https://www.youtube.com',
+ 'web_music': 'https://music.youtube.com',
+ 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
+ }.get(client)
+ if not url:
+ return {}
+ webpage = self._download_webpage(
+ url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
+ return self.extract_ytcfg(video_id, webpage) or {}
+
@staticmethod
def _build_api_continuation_query(continuation, ctp=None):
query = {
@@ -728,6 +743,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return None
def _extract_time_text(self, renderer, *path_list):
+ """@returns (timestamp, time_text)"""
text = self._get_text(renderer, *path_list) or ''
dt = self.extract_relative_time(text)
timestamp = None
@@ -2959,16 +2975,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return orderedSet(requested_clients)
- def _extract_player_ytcfg(self, client, video_id):
- url = {
- 'web_music': 'https://music.youtube.com',
- 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
- }.get(client)
- if not url:
- return {}
- webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip())
- return self.extract_ytcfg(video_id, webpage) or {}
-
def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
initial_pr = None
if webpage:
@@ -3005,8 +3011,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
while clients:
client, base_client, variant = _split_innertube_client(clients.pop())
player_ytcfg = master_ytcfg if client == 'web' else {}
- if 'configs' not in self._configuration_arg('player_skip'):
- player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
+ if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
+ player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
@@ -4109,14 +4115,15 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
if fatal:
raise ExtractorError('Unable to find selected tab')
- @classmethod
- def _extract_uploader(cls, data):
+ def _extract_uploader(self, data):
uploader = {}
- renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
+ renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
owner = try_get(
renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
if owner:
- uploader['uploader'] = owner.get('text')
+ owner_text = owner.get('text')
+ uploader['uploader'] = self._search_regex(
+ r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text)
uploader['uploader_id'] = try_get(
owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
uploader['uploader_url'] = urljoin(
@@ -4346,6 +4353,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
check_get_keys='contents', fatal=False, ytcfg=ytcfg,
note='Downloading API JSON with unavailable videos')
+ @property
+ def skip_webpage(self):
+ return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
+
def _extract_webpage(self, url, item_id, fatal=True):
retries = self.get_param('extractor_retries', 3)
count = -1
@@ -4392,9 +4403,21 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
return webpage, data
+ def _report_playlist_authcheck(self, ytcfg, fatal=True):
+ """Use if failed to extract ytcfg (and data) from initial webpage"""
+ if not ytcfg and self.is_authenticated:
+ msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
+ if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
+ raise ExtractorError(
+ f'{msg}. If you are not downloading private content, or '
+ 'your cookies are only for the first account and channel,'
+ ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
+ expected=True)
+ self.report_warning(msg, only_once=True)
+
def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
data = None
- if 'webpage' not in self._configuration_arg('skip'):
+ if not self.skip_webpage:
webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
# Reject webpage data if redirected to home page without explicitly requesting
@@ -4408,14 +4431,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
raise ExtractorError(msg, expected=True)
self.report_warning(msg, only_once=True)
if not data:
- if not ytcfg and self.is_authenticated:
- msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
- if 'authcheck' not in self._configuration_arg('skip') and fatal:
- raise ExtractorError(
- msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
- ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
- expected=True)
- self.report_warning(msg, only_once=True)
+ self._report_playlist_authcheck(ytcfg, fatal=fatal)
data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
return data, ytcfg
@@ -4453,14 +4469,20 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
('continuationContents', ),
)
+ display_id = f'query "{query}"'
check_get_keys = tuple(set(keys[0] for keys in content_keys))
+ ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
+ self._report_playlist_authcheck(ytcfg, fatal=False)
continuation_list = [None]
+ search = None
for page_num in itertools.count(1):
data.update(continuation_list[0] or {})
+ headers = self.generate_api_headers(
+ ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
search = self._extract_response(
- item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
- default_client=default_client, check_get_keys=check_get_keys)
+ item_id=f'{display_id} page {page_num}', ep='search', query=data,
+ default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
slr_contents = traverse_obj(search, *content_keys)
yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
if not continuation_list[0]:
@@ -5136,6 +5158,24 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'note': 'non-standard redirect to regional channel',
'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ',
'only_matching': True
+ }, {
+ 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")',
+ 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
+ 'info_dict': {
+ 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6',
+ 'modified_date': '20220407',
+ 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
+ 'tags': [],
+ 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
+ 'uploader': 'pukkandan',
+ 'availability': 'unlisted',
+ 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q',
+ 'channel': 'pukkandan',
+ 'description': 'Test for collaborative playlist',
+ 'title': 'yt-dlp test - collaborative playlist',
+ 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
+ },
+ 'playlist_mincount': 2
}]
@classmethod
@@ -5486,6 +5526,95 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
ie=YoutubeTabIE.ie_key())
+class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
+ IE_NAME = 'youtube:notif'
+ IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)'
+ _VALID_URL = r':ytnotif(?:ication)?s?'
+ _LOGIN_REQUIRED = True
+ _TESTS = [{
+ 'url': ':ytnotif',
+ 'only_matching': True,
+ }, {
+ 'url': ':ytnotifications',
+ 'only_matching': True,
+ }]
+
+ def _extract_notification_menu(self, response, continuation_list):
+ notification_list = traverse_obj(
+ response,
+ ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'),
+ ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'),
+ expected_type=list) or []
+ continuation_list[0] = None
+ for item in notification_list:
+ entry = self._extract_notification_renderer(item.get('notificationRenderer'))
+ if entry:
+ yield entry
+ continuation = item.get('continuationItemRenderer')
+ if continuation:
+ continuation_list[0] = continuation
+
+ def _extract_notification_renderer(self, notification):
+ video_id = traverse_obj(
+ notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str)
+ url = f'https://www.youtube.com/watch?v={video_id}'
+ channel_id = None
+ if not video_id:
+ browse_ep = traverse_obj(
+ notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict)
+ channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str)
+ post_id = self._search_regex(
+ r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str),
+ 'post id', default=None)
+ if not channel_id or not post_id:
+ return
+ # The direct /post url redirects to this in the browser
+ url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}'
+
+ channel = traverse_obj(
+ notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'),
+ expected_type=str)
+ title = self._search_regex(
+ rf'{re.escape(channel)} [^:]+: (.+)', self._get_text(notification, 'shortMessage'),
+ 'video title', default=None)
+ if title:
+ title = title.replace('\xad', '') # remove soft hyphens
+ upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
+ if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
+ else None)
+ return {
+ '_type': 'url',
+ 'url': url,
+ 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(),
+ 'video_id': video_id,
+ 'title': title,
+ 'channel_id': channel_id,
+ 'channel': channel,
+ 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'),
+ 'upload_date': upload_date,
+ }
+
+ def _notification_menu_entries(self, ytcfg):
+ continuation_list = [None]
+ response = None
+ for page in itertools.count(1):
+ ctoken = traverse_obj(
+ continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str)
+ response = self._extract_response(
+ item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg,
+ ep='notification/get_notification_menu', check_get_keys='actions',
+ headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)))
+ yield from self._extract_notification_menu(response, continuation_list)
+ if not continuation_list[0]:
+ break
+
+ def _real_extract(self, url):
+ display_id = 'notifications'
+ ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {}
+ self._report_playlist_authcheck(ytcfg)
+ return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id)
+
+
class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
IE_DESC = 'YouTube search'
IE_NAME = 'youtube:search'
@@ -5615,7 +5744,9 @@ class YoutubeFeedsInfoExtractor(InfoExtractor):
Subclasses must define the _FEED_NAME property.
"""
_LOGIN_REQUIRED = True
- _TESTS = []
+
+ def _real_initialize(self):
+ YoutubeBaseInfoExtractor._check_login_required(self)
@property
def IE_NAME(self):