Add HLS support to multi-audio

author: Astounds <kirito@disroot.org> 2026-04-05 14:56:51 -0500
committer: Astounds <kirito@disroot.org> 2026-04-05 14:56:51 -0500
commit: f0649be5dec84ce06a3164a2d9ee90f5385ac92f (patch)
tree: 6dcae30ff3e0d66c895033aab9e92a4c9e4ed513 /youtube/yt_data_extract/watch_extraction.py
parent: 62a028968e6d9b4e821b6014d6658b8317328fcf (diff)
download: yt-local-f0649be5dec84ce06a3164a2d9ee90f5385ac92f.tar.lz
yt-local-f0649be5dec84ce06a3164a2d9ee90f5385ac92f.tar.xz
yt-local-f0649be5dec84ce06a3164a2d9ee90f5385ac92f.zip
1 files changed, 69 insertions, 5 deletions
diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py
index 85c8100..de87a6a 100644
--- a/youtube/yt_data_extract/watch_extraction.py
+++ b/youtube/yt_data_extract/watch_extraction.py
@@ -473,13 +473,22 @@ def _extract_formats(info, player_response):
         itag = yt_fmt.get('itag')
 
         # Translated audio track
-        # Example: https://www.youtube.com/watch?v=gF9kkB0UWYQ
-        # Only get the original language for now so a foreign
-        # translation will not be picked just because it comes first
-        if deep_get(yt_fmt, 'audioTrack', 'audioIsDefault') is False:
-            continue
+        # Keep non-default tracks for multi-audio support
+        # (they will be served via local proxy)
 
         fmt = {}
+
+        # Audio track info
+        audio_track = yt_fmt.get('audioTrack')
+        if audio_track:
+            fmt['audio_track_id'] = audio_track.get('id')
+            fmt['audio_track_name'] = audio_track.get('displayName')
+            fmt['audio_track_is_default'] = audio_track.get('audioIsDefault', True)
+        else:
+            fmt['audio_track_id'] = None
+            fmt['audio_track_name'] = None
+            fmt['audio_track_is_default'] = True
+
         fmt['itag'] = itag
         fmt['ext'] = None
         fmt['audio_bitrate'] = None
@@ -532,6 +541,61 @@ def _extract_formats(info, player_response):
     else:
         info['ip_address'] = None
 
+
+def parse_format(yt_fmt):
+    '''Parse a single YouTube format dict into our internal format dict.'''
+    itag = yt_fmt.get('itag')
+    fmt = {}
+
+    audio_track = yt_fmt.get('audioTrack')
+    if audio_track:
+        fmt['audio_track_id'] = audio_track.get('id')
+        fmt['audio_track_name'] = audio_track.get('displayName')
+        fmt['audio_track_is_default'] = audio_track.get('audioIsDefault', True)
+    else:
+        fmt['audio_track_id'] = None
+        fmt['audio_track_name'] = None
+        fmt['audio_track_is_default'] = True
+
+    fmt['itag'] = itag
+    fmt['ext'] = None
+    fmt['audio_bitrate'] = None
+    fmt['bitrate'] = yt_fmt.get('bitrate')
+    fmt['acodec'] = None
+    fmt['vcodec'] = None
+    fmt['width'] = yt_fmt.get('width')
+    fmt['height'] = yt_fmt.get('height')
+    fmt['file_size'] = extract_int(yt_fmt.get('contentLength'))
+    fmt['audio_sample_rate'] = extract_int(yt_fmt.get('audioSampleRate'))
+    fmt['duration_ms'] = yt_fmt.get('approxDurationMs')
+    fmt['fps'] = yt_fmt.get('fps')
+    fmt['init_range'] = yt_fmt.get('initRange')
+    fmt['index_range'] = yt_fmt.get('indexRange')
+    for key in ('init_range', 'index_range'):
+        if fmt[key]:
+            fmt[key]['start'] = int(fmt[key]['start'])
+            fmt[key]['end'] = int(fmt[key]['end'])
+    update_format_with_type_info(fmt, yt_fmt)
+    cipher = dict(urllib.parse.parse_qsl(multi_get(yt_fmt,
+        'cipher', 'signatureCipher', default='')))
+    if cipher:
+        fmt['url'] = cipher.get('url')
+    else:
+        fmt['url'] = yt_fmt.get('url')
+    fmt['s'] = cipher.get('s')
+    fmt['sp'] = cipher.get('sp')
+
+    hardcoded_itag_info = _formats.get(str(itag), {})
+    for key, value in hardcoded_itag_info.items():
+        conservative_update(fmt, key, value)
+    fmt['quality'] = hardcoded_itag_info.get('height')
+    conservative_update(fmt, 'quality',
+        extract_int(yt_fmt.get('quality'), whole_word=False))
+    conservative_update(fmt, 'quality',
+        extract_int(yt_fmt.get('qualityLabel'), whole_word=False))
+
+    return fmt
+
 hls_regex = re.compile(r'[\w_-]+=(?:"[^"]+"|[^",]+),')
 def extract_hls_formats(hls_manifest):
     '''returns hls_formats, err'''
author	Astounds <kirito@disroot.org>	2026-04-05 14:56:51 -0500
committer	Astounds <kirito@disroot.org>	2026-04-05 14:56:51 -0500
commit	f0649be5dec84ce06a3164a2d9ee90f5385ac92f (patch)
tree	6dcae30ff3e0d66c895033aab9e92a4c9e4ed513 /youtube/yt_data_extract/watch_extraction.py
parent	62a028968e6d9b4e821b6014d6658b8317328fcf (diff)
download	yt-local-f0649be5dec84ce06a3164a2d9ee90f5385ac92f.tar.lz yt-local-f0649be5dec84ce06a3164a2d9ee90f5385ac92f.tar.xz yt-local-f0649be5dec84ce06a3164a2d9ee90f5385ac92f.zip