diff options
Diffstat (limited to 'yt_dlp/extractor/youtube.py')
-rw-r--r-- | yt_dlp/extractor/youtube.py | 32 |
1 files changed, 18 insertions, 14 deletions
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index d5f9b6962..c03637f5f 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -225,28 +225,28 @@ INNERTUBE_CLIENTS = { def build_innertube_clients(): - third_party = { + THIRD_PARTY = { 'embedUrl': 'https://google.com', # Can be any valid URL } - base_clients = ('android', 'web', 'ios', 'mweb') - priority = qualities(base_clients[::-1]) + BASE_CLIENTS = ('android', 'web', 'ios', 'mweb') + priority = qualities(BASE_CLIENTS[::-1]) for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()): ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8') ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com') ytcfg.setdefault('REQUIRE_JS_PLAYER', True) ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en') - ytcfg['priority'] = 10 * priority(client.split('_', 1)[0]) - if client in base_clients: - INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg) + base_client, *variant = client.split('_') + ytcfg['priority'] = 10 * priority(base_client) + + if variant == ['embedded']: + ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY + INNERTUBE_CLIENTS[f'{base_client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg) agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED' - agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party agegate_ytcfg['priority'] -= 1 - elif client.endswith('_embedded'): - ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party ytcfg['priority'] -= 2 - else: + elif variant: ytcfg['priority'] -= 3 @@ -2413,7 +2413,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_n_function_name(self, jscode): nfunc, idx = self._search_regex( - r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)', + r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)', jscode, 'Initial JS player n function name', group=('nfunc', 'idx')) if not idx: return nfunc @@ -2936,6 +2936,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres' ]) streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[]) + approx_duration = max(traverse_obj(streaming_formats, (..., 'approxDurationMs'), expected_type=float_or_none) or [0]) or None for fmt in streaming_formats: if fmt.get('targetDurationSec') or fmt.get('drmFamilies'): @@ -2995,12 +2996,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): itags[itag] = 'https' stream_ids.append(stream_id) - tbr = float_or_none( - fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) + tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) language_preference = ( 10 if audio_track.get('audioIsDefault') and 10 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10 else -1) + # Some formats may have much smaller duration than others (possibly damaged during encoding) + # Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823 + is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) < approx_duration - 10000) dct = { 'asr': int_or_none(fmt.get('audioSampleRate')), 'filesize': int_or_none(fmt.get('contentLength')), @@ -3009,7 +3012,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '%s%s' % (audio_track.get('displayName') or '', ' (default)' if language_preference > 0 else ''), fmt.get('qualityLabel') or quality.replace('audio_quality_', ''), - throttled and 'THROTTLED', delim=', '), + throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '), 'source_preference': -10 if throttled else -1, 'fps': int_or_none(fmt.get('fps')) or None, 'height': height, @@ -3020,6 +3023,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'language': join_nonempty(audio_track.get('id', '').split('.')[0], 'desc' if language_preference < -1 else ''), 'language_preference': language_preference, + 'preference': -10 if is_damaged else None, } mime_mobj = re.match( r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '') |