diff options
-rw-r--r-- | yt_dlp/extractor/youtube.py | 72 |
1 files changed, 38 insertions, 34 deletions
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 4954a2d8c..7f514d35a 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals +import hashlib import itertools import json import os.path @@ -274,7 +275,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'context': { 'client': { 'clientName': 'WEB', - 'clientVersion': '2.20201021.03.00', + 'clientVersion': '2.20210301.08.00', } }, } @@ -283,15 +284,28 @@ class YoutubeBaseInfoExtractor(InfoExtractor): _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;' _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)' - def _call_api(self, ep, query, video_id, fatal=True): + def _generate_sapisidhash_header(self): + sapisid_cookie = self._get_cookies('https://www.youtube.com').get('SAPISID') + if sapisid_cookie is None: + return + time_now = round(time.time()) + sapisidhash = hashlib.sha1((str(time_now) + " " + sapisid_cookie.value + " " + "https://www.youtube.com").encode("utf-8")).hexdigest() + return "SAPISIDHASH %s_%s" % (time_now, sapisidhash) + + def _call_api(self, ep, query, video_id, fatal=True, headers=None, + note='Downloading API JSON', errnote='Unable to download API page'): data = self._DEFAULT_API_DATA.copy() data.update(query) + headers = headers or {} + headers.update({'content-type': 'application/json'}) + auth = self._generate_sapisidhash_header() + if auth is not None: + headers.update({'Authorization': auth, 'X-Origin': 'https://www.youtube.com'}) return self._download_json( - 'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id, - note='Downloading API JSON', errnote='Unable to download API page', - data=json.dumps(data).encode('utf8'), fatal=fatal, - headers={'content-type': 'application/json'}, + 'https://www.youtube.com/youtubei/v1/%s' % ep, + video_id=video_id, fatal=fatal, note=note, errnote=errnote, + data=json.dumps(data).encode('utf8'), headers=headers, query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'}) def _extract_yt_initial_data(self, video_id, webpage): @@ -2699,7 +2713,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): ctp = continuation_ep.get('clickTrackingParams') return YoutubeTabIE._build_continuation_query(continuation, ctp) - def _entries(self, tab, identity_token): + def _entries(self, tab, identity_token, item_id): def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds contents = try_get(parent_renderer, lambda x: x['contents'], list) or [] @@ -2770,11 +2784,14 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): if last_error: self.report_warning('%s. Retrying ...' % last_error) try: - browse = self._download_json( - 'https://www.youtube.com/browse_ajax', None, - 'Downloading page %d%s' - % (page_num, ' (retry #%d)' % count if count else ''), - headers=headers, query=continuation) + response = self._call_api( + ep="browse", fatal=True, headers=headers, + video_id='%s page %s' % (item_id, page_num), + query={ + 'continuation': continuation['continuation'], + 'clickTracking': {'clickTrackingParams': continuation['itct']}, + }, + note='Downloading API JSON%s' % (' (retry #%d)' % count if count else '')) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404): # Downloading page may result in intermittent 5xx HTTP error @@ -2784,8 +2801,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): continue raise else: - response = try_get(browse, lambda x: x[1]['response'], dict) - # Youtube sometimes sends incomplete data # See: https://github.com/ytdl-org/youtube-dl/issues/28194 if response.get('continuationContents') or response.get('onResponseReceivedActions'): @@ -2793,7 +2808,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): last_error = 'Incomplete data recieved' if count >= retries: self._downloader.report_error(last_error) - if not browse or not response: + + if not response: break known_continuation_renderers = { @@ -2936,7 +2952,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): 'channel_id': metadata['uploader_id'], 'channel_url': metadata['uploader_url']}) return self.playlist_result( - self._entries(selected_tab, identity_token), + self._entries(selected_tab, identity_token, playlist_id), **metadata) def _extract_from_playlist(self, item_id, url, data, playlist): @@ -3223,26 +3239,14 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor): _TESTS = [] def _entries(self, query, n): - data = { - 'context': { - 'client': { - 'clientName': 'WEB', - 'clientVersion': '2.20201021.03.00', - } - }, - 'query': query, - } + data = {'query': query} if self._SEARCH_PARAMS: data['params'] = self._SEARCH_PARAMS total = 0 for page_num in itertools.count(1): - search = self._download_json( - 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', - video_id='query "%s"' % query, - note='Downloading page %s' % page_num, - errnote='Unable to download API page', fatal=False, - data=json.dumps(data).encode('utf8'), - headers={'content-type': 'application/json'}) + search = self._call_api( + ep='search', video_id='query "%s"' % query, fatal=False, + note='Downloading page %s' % page_num, query=data) if not search: break slr_contents = try_get( @@ -3394,8 +3398,8 @@ class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): - IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)' - _VALID_URL = r':ythistory' + IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)' + _VALID_URL = r':ythis(?:tory)?' _FEED_NAME = 'history' _TESTS = [{ 'url': ':ythistory', |