diff options
Diffstat (limited to 'yt_dlp/extractor')
-rw-r--r-- | yt_dlp/extractor/common.py | 8 | ||||
-rw-r--r-- | yt_dlp/extractor/soundcloud.py | 16 | ||||
-rw-r--r-- | yt_dlp/extractor/tiktok.py | 28 | ||||
-rw-r--r-- | yt_dlp/extractor/youtube.py | 154 |
4 files changed, 82 insertions, 124 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index a6933e738..0ae0f4301 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -32,6 +32,7 @@ from ..utils import ( GeoUtils, LenientJSONDecoder, RegexNotFoundError, + RetryManager, UnsupportedError, age_restricted, base_url, @@ -3848,6 +3849,13 @@ class InfoExtractor: self.to_screen(f'Downloading {playlist_label}{playlist_id} - add --no-playlist to download just the {video_label}{video_id}') return True + def _error_or_warning(self, err, _count=None, _retries=0, *, fatal=True): + RetryManager.report_retry(err, _count or int(fatal), _retries, info=self.to_screen, warn=self.report_warning, + sleep_func=self.get_param('retry_sleep_functions', {}).get('extractor')) + + def RetryManager(self, **kwargs): + return RetryManager(self.get_param('extractor_retries', 3), self._error_or_warning, **kwargs) + @classmethod def extract_from_webpage(cls, ydl, url, webpage): ie = (cls if isinstance(cls._extract_from_webpage, types.MethodType) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index f7e125d37..2730052a0 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -19,7 +19,6 @@ from ..utils import ( int_or_none, KNOWN_EXTENSIONS, mimetype2ext, - remove_end, parse_qs, str_or_none, try_get, @@ -661,25 +660,20 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudBaseIE): 'offset': 0, } - retries = self.get_param('extractor_retries', 3) - for i in itertools.count(): - attempt, last_error = -1, None - while attempt < retries: - attempt += 1 - if last_error: - self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'), playlist_id) + for retry in self.RetryManager(): try: response = self._download_json( url, playlist_id, query=query, headers=self._HEADERS, - note='Downloading track page %s%s' % (i + 1, f' (retry #{attempt})' if attempt else '')) + note=f'Downloading track page {i + 1}') break except ExtractorError as e: # Downloading page may result in intermittent 502 HTTP error # See https://github.com/yt-dlp/yt-dlp/issues/872 - if attempt >= retries or not isinstance(e.cause, compat_HTTPError) or e.cause.code != 502: + if not isinstance(e.cause, compat_HTTPError) or e.cause.code != 502: raise - last_error = str(e.cause or e.msg) + retry.error = e + continue def resolve_entry(*candidates): for cand in candidates: diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 3ac765270..c58538394 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -630,19 +630,17 @@ class TikTokUserIE(TikTokBaseIE): 'device_id': ''.join(random.choice(string.digits) for _ in range(19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. } - max_retries = self.get_param('extractor_retries', 3) for page in itertools.count(1): - for retries in itertools.count(): + for retry in self.RetryManager(): try: - post_list = self._call_api('aweme/post', query, username, - note='Downloading user video list page %d%s' % (page, f' (attempt {retries})' if retries != 0 else ''), - errnote='Unable to download user video list') + post_list = self._call_api( + 'aweme/post', query, username, note=f'Downloading user video list page {page}', + errnote='Unable to download user video list') except ExtractorError as e: - if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0 and retries != max_retries: - self.report_warning('%s. Retrying...' % str(e.cause or e.msg)) + if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0: + retry.error = e continue raise - break yield from post_list.get('aweme_list', []) if not post_list.get('has_more'): break @@ -680,19 +678,17 @@ class TikTokBaseListIE(TikTokBaseIE): 'device_id': ''.join(random.choice(string.digits) for i in range(19)) } - max_retries = self.get_param('extractor_retries', 3) for page in itertools.count(1): - for retries in itertools.count(): + for retry in self.RetryManager(): try: - post_list = self._call_api(self._API_ENDPOINT, query, display_id, - note='Downloading video list page %d%s' % (page, f' (attempt {retries})' if retries != 0 else ''), - errnote='Unable to download video list') + post_list = self._call_api( + self._API_ENDPOINT, query, display_id, note=f'Downloading video list page {page}', + errnote='Unable to download video list') except ExtractorError as e: - if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0 and retries != max_retries: - self.report_warning('%s. Retrying...' % str(e.cause or e.msg)) + if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0: + retry.error = e continue raise - break for video in post_list.get('aweme_list', []): yield { **self._parse_aweme_video_app(video), diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index f20b7321a..8b9f38307 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -28,7 +28,6 @@ from ..utils import ( clean_html, datetime_from_str, dict_get, - error_to_compat_str, float_or_none, format_field, get_first, @@ -45,7 +44,6 @@ from ..utils import ( parse_iso8601, parse_qs, qualities, - remove_end, remove_start, smuggle_url, str_or_none, @@ -763,74 +761,54 @@ class YoutubeBaseInfoExtractor(InfoExtractor): def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None, ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None, default_client='web'): - response = None - last_error = None - count = -1 - retries = self.get_param('extractor_retries', 3) - if check_get_keys is None: - check_get_keys = [] - while count < retries: - count += 1 - if last_error: - self.report_warning('%s. Retrying ...' % remove_end(last_error, '.')) + for retry in self.RetryManager(): try: response = self._call_api( ep=ep, fatal=True, headers=headers, - video_id=item_id, query=query, + video_id=item_id, query=query, note=note, context=self._extract_context(ytcfg, default_client), api_key=self._extract_api_key(ytcfg, default_client), - api_hostname=api_hostname, default_client=default_client, - note='%s%s' % (note, ' (retry #%d)' % count if count else '')) + api_hostname=api_hostname, default_client=default_client) except ExtractorError as e: - if isinstance(e.cause, network_exceptions): - if isinstance(e.cause, urllib.error.HTTPError): - first_bytes = e.cause.read(512) - if not is_html(first_bytes): - yt_error = try_get( - self._parse_json( - self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False), - lambda x: x['error']['message'], str) - if yt_error: - self._report_alerts([('ERROR', yt_error)], fatal=False) - # Downloading page may result in intermittent 5xx HTTP error - # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289 - # We also want to catch all other network exceptions since errors in later pages can be troublesome - # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210 - if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429): - last_error = error_to_compat_str(e.cause or e.msg) - if count < retries: - continue - if fatal: - raise - else: - self.report_warning(error_to_compat_str(e)) - return + if not isinstance(e.cause, network_exceptions): + return self._error_or_warning(e, fatal=fatal) + elif not isinstance(e.cause, urllib.error.HTTPError): + retry.error = e + continue - else: - try: - self._extract_and_report_alerts(response, only_once=True) - except ExtractorError as e: - # YouTube servers may return errors we want to retry on in a 200 OK response - # See: https://github.com/yt-dlp/yt-dlp/issues/839 - if 'unknown error' in e.msg.lower(): - last_error = e.msg - continue - if fatal: - raise - self.report_warning(error_to_compat_str(e)) - return - if not check_get_keys or dict_get(response, check_get_keys): - break - # Youtube sometimes sends incomplete data - # See: https://github.com/ytdl-org/youtube-dl/issues/28194 - last_error = 'Incomplete data received' - if count >= retries: - if fatal: - raise ExtractorError(last_error) - else: - self.report_warning(last_error) - return - return response + first_bytes = e.cause.read(512) + if not is_html(first_bytes): + yt_error = try_get( + self._parse_json( + self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False), + lambda x: x['error']['message'], str) + if yt_error: + self._report_alerts([('ERROR', yt_error)], fatal=False) + # Downloading page may result in intermittent 5xx HTTP error + # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289 + # We also want to catch all other network exceptions since errors in later pages can be troublesome + # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210 + if e.cause.code not in (403, 429): + retry.error = e + continue + return self._error_or_warning(e, fatal=fatal) + + try: + self._extract_and_report_alerts(response, only_once=True) + except ExtractorError as e: + # YouTube servers may return errors we want to retry on in a 200 OK response + # See: https://github.com/yt-dlp/yt-dlp/issues/839 + if 'unknown error' in e.msg.lower(): + retry.error = e + continue + return self._error_or_warning(e, fatal=fatal) + # Youtube sometimes sends incomplete data + # See: https://github.com/ytdl-org/youtube-dl/issues/28194 + if not traverse_obj(response, *variadic(check_get_keys)): + retry.error = ExtractorError('Incomplete data received') + continue + + return response @staticmethod def is_music_url(url): @@ -4522,48 +4500,30 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) def _extract_webpage(self, url, item_id, fatal=True): - retries = self.get_param('extractor_retries', 3) - count = -1 - webpage = data = last_error = None - while count < retries: - count += 1 - # Sometimes youtube returns a webpage with incomplete ytInitialData - # See: https://github.com/yt-dlp/yt-dlp/issues/116 - if last_error: - self.report_warning('%s. Retrying ...' % last_error) + webpage, data = None, None + for retry in self.RetryManager(fatal=fatal): try: - webpage = self._download_webpage( - url, item_id, - note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',)) + webpage = self._download_webpage(url, item_id, note='Downloading webpage') data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {} except ExtractorError as e: if isinstance(e.cause, network_exceptions): if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429): - last_error = error_to_compat_str(e.cause or e.msg) - if count < retries: - continue - if fatal: - raise - self.report_warning(error_to_compat_str(e)) + retry.error = e + continue + self._error_or_warning(e, fatal=fatal) break - else: - try: - self._extract_and_report_alerts(data) - except ExtractorError as e: - if fatal: - raise - self.report_warning(error_to_compat_str(e)) - break - if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')): - break + try: + self._extract_and_report_alerts(data) + except ExtractorError as e: + self._error_or_warning(e, fatal=fatal) + break - last_error = 'Incomplete yt initial data received' - if count >= retries: - if fatal: - raise ExtractorError(last_error) - self.report_warning(last_error) - break + # Sometimes youtube returns a webpage with incomplete ytInitialData + # See: https://github.com/yt-dlp/yt-dlp/issues/116 + if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'): + retry.error = ExtractorError('Incomplete yt initial data received') + continue return webpage, data |