aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/generic.py
diff options
context:
space:
mode:
authorbashonly <88596187+bashonly@users.noreply.github.com>2023-03-21 18:12:17 -0500
committerGitHub <noreply@github.com>2023-03-21 23:12:17 +0000
commitc2e0fc40a73dd85ab3920f977f579d475e66ef59 (patch)
treebb9da0da662491b9ead2c2a72f6cd160ebad890e /yt_dlp/extractor/generic.py
parent06966cb8966b9aa4f60ab9c44c182a057d4ca3a3 (diff)
downloadhypervideo-pre-c2e0fc40a73dd85ab3920f977f579d475e66ef59.tar.lz
hypervideo-pre-c2e0fc40a73dd85ab3920f977f579d475e66ef59.tar.xz
hypervideo-pre-c2e0fc40a73dd85ab3920f977f579d475e66ef59.zip
[extractor/generic] Add extractor-args `hls_key`, `variant_query` (#6567)
Authored by: bashonly
Diffstat (limited to 'yt_dlp/extractor/generic.py')
-rw-r--r--yt_dlp/extractor/generic.py32
1 files changed, 21 insertions, 11 deletions
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 49aa5a1f5..075bb36de 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -24,6 +24,7 @@ from ..utils import (
mimetype2ext,
orderedSet,
parse_duration,
+ parse_qs,
parse_resolution,
smuggle_url,
str_or_none,
@@ -32,6 +33,7 @@ from ..utils import (
unescapeHTML,
unified_timestamp,
unsmuggle_url,
+ update_url_query,
url_or_none,
urljoin,
variadic,
@@ -2184,12 +2186,21 @@ class GenericIE(InfoExtractor):
self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}')
- def _fragment_query(self, url):
+ def _extra_manifest_info(self, info, manifest_url):
if self._configuration_arg('fragment_query'):
- query_string = urllib.parse.urlparse(url).query
+ query_string = urllib.parse.urlparse(manifest_url).query
if query_string:
- return {'extra_param_to_segment_url': query_string}
- return {}
+ info['extra_param_to_segment_url'] = query_string
+
+ hex_or_none = lambda x: x if re.fullmatch(r'(0x)?[\da-f]+', x, re.IGNORECASE) else None
+ info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key'), {
+ 'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}),
+ }) or None
+
+ if self._configuration_arg('variant_query'):
+ query = parse_qs(manifest_url)
+ for fmt in self._downloader._get_formats(info):
+ fmt['url'] = update_url_query(fmt['url'], query)
def _extract_rss(self, url, video_id, doc):
NS_MAP = {
@@ -2397,10 +2408,8 @@ class GenericIE(InfoExtractor):
subtitles = {}
if format_id.endswith('mpegurl') or ext == 'm3u8':
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
- info_dict.update(self._fragment_query(url))
elif format_id.endswith('mpd') or format_id.endswith('dash+xml') or ext == 'mpd':
formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers)
- info_dict.update(self._fragment_query(url))
elif format_id == 'f4m' or ext == 'f4m':
formats = self._extract_f4m_formats(url, video_id, headers=headers)
else:
@@ -2415,6 +2424,7 @@ class GenericIE(InfoExtractor):
'subtitles': subtitles,
'http_headers': headers or None,
})
+ self._extra_manifest_info(info_dict, url)
return info_dict
if not self.get_param('test', False) and not is_intentional:
@@ -2427,7 +2437,7 @@ class GenericIE(InfoExtractor):
if first_bytes.startswith(b'#EXTM3U'):
self.report_detected('M3U playlist')
info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
- info_dict.update(self._fragment_query(url))
+ self._extra_manifest_info(info_dict, url)
return info_dict
# Maybe it's a direct link to a video?
@@ -2478,7 +2488,7 @@ class GenericIE(InfoExtractor):
doc,
mpd_base_url=full_response.geturl().rpartition('/')[0],
mpd_url=url)
- info_dict.update(self._fragment_query(url))
+ self._extra_manifest_info(info_dict, url)
self.report_detected('DASH manifest')
return info_dict
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
@@ -2592,7 +2602,7 @@ class GenericIE(InfoExtractor):
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
for fmt in formats:
- fmt.update(self._fragment_query(src))
+ self._extra_manifest_info(fmt, src)
if not formats:
formats.append({
@@ -2795,10 +2805,10 @@ class GenericIE(InfoExtractor):
return [self._extract_xspf_playlist(video_url, video_id)]
elif ext == 'm3u8':
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers)
- entry_info_dict.update(self._fragment_query(video_url))
+ self._extra_manifest_info(entry_info_dict, video_url)
elif ext == 'mpd':
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers)
- entry_info_dict.update(self._fragment_query(video_url))
+ self._extra_manifest_info(entry_info_dict, video_url)
elif ext == 'f4m':
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers)
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url: