aboutsummaryrefslogtreecommitdiffstats
path: root/youtube/yt_data_extract/watch_extraction.py
diff options
context:
space:
mode:
authorAstounds <kirito@disroot.org>2026-04-05 14:56:51 -0500
committerAstounds <kirito@disroot.org>2026-04-05 14:56:51 -0500
commitf0649be5dec84ce06a3164a2d9ee90f5385ac92f (patch)
tree6dcae30ff3e0d66c895033aab9e92a4c9e4ed513 /youtube/yt_data_extract/watch_extraction.py
parent62a028968e6d9b4e821b6014d6658b8317328fcf (diff)
downloadyt-local-f0649be5dec84ce06a3164a2d9ee90f5385ac92f.tar.lz
yt-local-f0649be5dec84ce06a3164a2d9ee90f5385ac92f.tar.xz
yt-local-f0649be5dec84ce06a3164a2d9ee90f5385ac92f.zip
Add HLS support to multi-audio
Diffstat (limited to 'youtube/yt_data_extract/watch_extraction.py')
-rw-r--r--youtube/yt_data_extract/watch_extraction.py74
1 files changed, 69 insertions, 5 deletions
diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py
index 85c8100..de87a6a 100644
--- a/youtube/yt_data_extract/watch_extraction.py
+++ b/youtube/yt_data_extract/watch_extraction.py
@@ -473,13 +473,22 @@ def _extract_formats(info, player_response):
itag = yt_fmt.get('itag')
# Translated audio track
- # Example: https://www.youtube.com/watch?v=gF9kkB0UWYQ
- # Only get the original language for now so a foreign
- # translation will not be picked just because it comes first
- if deep_get(yt_fmt, 'audioTrack', 'audioIsDefault') is False:
- continue
+ # Keep non-default tracks for multi-audio support
+ # (they will be served via local proxy)
fmt = {}
+
+ # Audio track info
+ audio_track = yt_fmt.get('audioTrack')
+ if audio_track:
+ fmt['audio_track_id'] = audio_track.get('id')
+ fmt['audio_track_name'] = audio_track.get('displayName')
+ fmt['audio_track_is_default'] = audio_track.get('audioIsDefault', True)
+ else:
+ fmt['audio_track_id'] = None
+ fmt['audio_track_name'] = None
+ fmt['audio_track_is_default'] = True
+
fmt['itag'] = itag
fmt['ext'] = None
fmt['audio_bitrate'] = None
@@ -532,6 +541,61 @@ def _extract_formats(info, player_response):
else:
info['ip_address'] = None
+
+def parse_format(yt_fmt):
+ '''Parse a single YouTube format dict into our internal format dict.'''
+ itag = yt_fmt.get('itag')
+ fmt = {}
+
+ audio_track = yt_fmt.get('audioTrack')
+ if audio_track:
+ fmt['audio_track_id'] = audio_track.get('id')
+ fmt['audio_track_name'] = audio_track.get('displayName')
+ fmt['audio_track_is_default'] = audio_track.get('audioIsDefault', True)
+ else:
+ fmt['audio_track_id'] = None
+ fmt['audio_track_name'] = None
+ fmt['audio_track_is_default'] = True
+
+ fmt['itag'] = itag
+ fmt['ext'] = None
+ fmt['audio_bitrate'] = None
+ fmt['bitrate'] = yt_fmt.get('bitrate')
+ fmt['acodec'] = None
+ fmt['vcodec'] = None
+ fmt['width'] = yt_fmt.get('width')
+ fmt['height'] = yt_fmt.get('height')
+ fmt['file_size'] = extract_int(yt_fmt.get('contentLength'))
+ fmt['audio_sample_rate'] = extract_int(yt_fmt.get('audioSampleRate'))
+ fmt['duration_ms'] = yt_fmt.get('approxDurationMs')
+ fmt['fps'] = yt_fmt.get('fps')
+ fmt['init_range'] = yt_fmt.get('initRange')
+ fmt['index_range'] = yt_fmt.get('indexRange')
+ for key in ('init_range', 'index_range'):
+ if fmt[key]:
+ fmt[key]['start'] = int(fmt[key]['start'])
+ fmt[key]['end'] = int(fmt[key]['end'])
+ update_format_with_type_info(fmt, yt_fmt)
+ cipher = dict(urllib.parse.parse_qsl(multi_get(yt_fmt,
+ 'cipher', 'signatureCipher', default='')))
+ if cipher:
+ fmt['url'] = cipher.get('url')
+ else:
+ fmt['url'] = yt_fmt.get('url')
+ fmt['s'] = cipher.get('s')
+ fmt['sp'] = cipher.get('sp')
+
+ hardcoded_itag_info = _formats.get(str(itag), {})
+ for key, value in hardcoded_itag_info.items():
+ conservative_update(fmt, key, value)
+ fmt['quality'] = hardcoded_itag_info.get('height')
+ conservative_update(fmt, 'quality',
+ extract_int(yt_fmt.get('quality'), whole_word=False))
+ conservative_update(fmt, 'quality',
+ extract_int(yt_fmt.get('qualityLabel'), whole_word=False))
+
+ return fmt
+
hls_regex = re.compile(r'[\w_-]+=(?:"[^"]+"|[^",]+),')
def extract_hls_formats(hls_manifest):
'''returns hls_formats, err'''