aboutsummaryrefslogtreecommitdiffstats
path: root/youtube/yt_data_extract
diff options
context:
space:
mode:
Diffstat (limited to 'youtube/yt_data_extract')
-rw-r--r--youtube/yt_data_extract/__init__.py2
-rw-r--r--youtube/yt_data_extract/watch_extraction.py74
2 files changed, 70 insertions, 6 deletions
diff --git a/youtube/yt_data_extract/__init__.py b/youtube/yt_data_extract/__init__.py
index de1812d..63b1b37 100644
--- a/youtube/yt_data_extract/__init__.py
+++ b/youtube/yt_data_extract/__init__.py
@@ -10,4 +10,4 @@ from .watch_extraction import (extract_watch_info, get_caption_url,
update_with_new_urls, requires_decryption,
extract_decryption_function, decrypt_signatures, _formats,
update_format_with_type_info, extract_hls_formats,
- extract_watch_info_from_html, captions_available)
+ extract_watch_info_from_html, captions_available, parse_format)
diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py
index 85c8100..de87a6a 100644
--- a/youtube/yt_data_extract/watch_extraction.py
+++ b/youtube/yt_data_extract/watch_extraction.py
@@ -473,13 +473,22 @@ def _extract_formats(info, player_response):
itag = yt_fmt.get('itag')
# Translated audio track
- # Example: https://www.youtube.com/watch?v=gF9kkB0UWYQ
- # Only get the original language for now so a foreign
- # translation will not be picked just because it comes first
- if deep_get(yt_fmt, 'audioTrack', 'audioIsDefault') is False:
- continue
+ # Keep non-default tracks for multi-audio support
+ # (they will be served via local proxy)
fmt = {}
+
+ # Audio track info
+ audio_track = yt_fmt.get('audioTrack')
+ if audio_track:
+ fmt['audio_track_id'] = audio_track.get('id')
+ fmt['audio_track_name'] = audio_track.get('displayName')
+ fmt['audio_track_is_default'] = audio_track.get('audioIsDefault', True)
+ else:
+ fmt['audio_track_id'] = None
+ fmt['audio_track_name'] = None
+ fmt['audio_track_is_default'] = True
+
fmt['itag'] = itag
fmt['ext'] = None
fmt['audio_bitrate'] = None
@@ -532,6 +541,61 @@ def _extract_formats(info, player_response):
else:
info['ip_address'] = None
+
+def parse_format(yt_fmt):
+ '''Parse a single YouTube format dict into our internal format dict.'''
+ itag = yt_fmt.get('itag')
+ fmt = {}
+
+ audio_track = yt_fmt.get('audioTrack')
+ if audio_track:
+ fmt['audio_track_id'] = audio_track.get('id')
+ fmt['audio_track_name'] = audio_track.get('displayName')
+ fmt['audio_track_is_default'] = audio_track.get('audioIsDefault', True)
+ else:
+ fmt['audio_track_id'] = None
+ fmt['audio_track_name'] = None
+ fmt['audio_track_is_default'] = True
+
+ fmt['itag'] = itag
+ fmt['ext'] = None
+ fmt['audio_bitrate'] = None
+ fmt['bitrate'] = yt_fmt.get('bitrate')
+ fmt['acodec'] = None
+ fmt['vcodec'] = None
+ fmt['width'] = yt_fmt.get('width')
+ fmt['height'] = yt_fmt.get('height')
+ fmt['file_size'] = extract_int(yt_fmt.get('contentLength'))
+ fmt['audio_sample_rate'] = extract_int(yt_fmt.get('audioSampleRate'))
+ fmt['duration_ms'] = yt_fmt.get('approxDurationMs')
+ fmt['fps'] = yt_fmt.get('fps')
+ fmt['init_range'] = yt_fmt.get('initRange')
+ fmt['index_range'] = yt_fmt.get('indexRange')
+ for key in ('init_range', 'index_range'):
+ if fmt[key]:
+ fmt[key]['start'] = int(fmt[key]['start'])
+ fmt[key]['end'] = int(fmt[key]['end'])
+ update_format_with_type_info(fmt, yt_fmt)
+ cipher = dict(urllib.parse.parse_qsl(multi_get(yt_fmt,
+ 'cipher', 'signatureCipher', default='')))
+ if cipher:
+ fmt['url'] = cipher.get('url')
+ else:
+ fmt['url'] = yt_fmt.get('url')
+ fmt['s'] = cipher.get('s')
+ fmt['sp'] = cipher.get('sp')
+
+ hardcoded_itag_info = _formats.get(str(itag), {})
+ for key, value in hardcoded_itag_info.items():
+ conservative_update(fmt, key, value)
+ fmt['quality'] = hardcoded_itag_info.get('height')
+ conservative_update(fmt, 'quality',
+ extract_int(yt_fmt.get('quality'), whole_word=False))
+ conservative_update(fmt, 'quality',
+ extract_int(yt_fmt.get('qualityLabel'), whole_word=False))
+
+ return fmt
+
hls_regex = re.compile(r'[\w_-]+=(?:"[^"]+"|[^",]+),')
def extract_hls_formats(hls_manifest):
'''returns hls_formats, err'''