diff options
Diffstat (limited to 'yt_dlp/extractor/youtube.py')
-rw-r--r-- | yt_dlp/extractor/youtube.py | 48 |
1 files changed, 29 insertions, 19 deletions
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 804186b85..2e4ce4c12 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2762,28 +2762,36 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): for page_num in itertools.count(1): if not continuation: break - count = 0 - retries = 3 - while count <= retries: + retries = self._downloader.params.get('extractor_retries', 3) + count = -1 + last_error = None + while count < retries: + count += 1 + if last_error: + self.report_warning('%s. Retrying ...' % last_error) try: - # Downloading page may result in intermittent 5xx HTTP error - # that is usually worked around with a retry browse = self._download_json( 'https://www.youtube.com/browse_ajax', None, 'Downloading page %d%s' % (page_num, ' (retry #%d)' % count if count else ''), headers=headers, query=continuation) - break except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503): - count += 1 - if count <= retries: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404): + # Downloading page may result in intermittent 5xx HTTP error + # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289 + last_error = 'HTTP Error %s' % e.cause.code + if count < retries: continue raise - if not browse: - break - response = try_get(browse, lambda x: x[1]['response'], dict) - if not response: + else: + response = try_get(browse, lambda x: x[1]['response'], dict) + + # Youtube sometimes sends incomplete data + # See: https://github.com/ytdl-org/youtube-dl/issues/28194 + if response.get('continuationContents') or response.get('onResponseReceivedActions'): + break + last_error = 'Incomplete data recieved' + if not browse or not response: break known_continuation_renderers = { @@ -3004,11 +3012,16 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id) self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) - count = 0 - retries = 3 + retries = self._downloader.params.get('extractor_retries', 3) + count = -1 while count < retries: + count += 1 # Sometimes youtube returns a webpage with incomplete ytInitialData - webpage = self._download_webpage(url, item_id) + # See: https://github.com/yt-dlp/yt-dlp/issues/116 + if count: + self.report_warning('Incomplete yt initial data recieved. Retrying ...') + webpage = self._download_webpage(url, item_id, + 'Downloading webpage%s' % ' (retry #%d)' % count if count else '') identity_token = self._extract_identity_token(webpage, item_id) data = self._extract_yt_initial_data(item_id, webpage) err_msg = None @@ -3023,9 +3036,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): raise ExtractorError('YouTube said: %s' % err_msg, expected=True) if data.get('contents') or data.get('currentVideoEndpoint'): break - count += 1 - self.to_screen( - 'Incomplete yt initial data recieved. Retrying (attempt %d of %d)...' % (count, retries)) tabs = try_get( data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list) |