diff options
author | pukkandan <pukkandan.ytdlp@gmail.com> | 2022-09-16 23:05:49 +0530 |
---|---|---|
committer | pukkandan <pukkandan.ytdlp@gmail.com> | 2022-09-16 23:08:13 +0530 |
commit | 3166e6840c7f7b1ea3984f0e40a892d87e690480 (patch) | |
tree | 4c692a513089b3211410f1fcd11c7d20a489eead /yt_dlp/extractor/generic.py | |
parent | 8817a80d3ac69f2dfd12bdc41657c4a04139807c (diff) | |
download | hypervideo-pre-3166e6840c7f7b1ea3984f0e40a892d87e690480.tar.lz hypervideo-pre-3166e6840c7f7b1ea3984f0e40a892d87e690480.tar.xz hypervideo-pre-3166e6840c7f7b1ea3984f0e40a892d87e690480.zip |
[extractor/generic] Pass through referer from json-ld
Closes #4941
Diffstat (limited to 'yt_dlp/extractor/generic.py')
-rw-r--r-- | yt_dlp/extractor/generic.py | 27 |
1 files changed, 19 insertions, 8 deletions
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index af7f93b67..55b3addde 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2621,7 +2621,7 @@ class GenericIE(InfoExtractor): default_search += ':' return self.url_result(default_search + url) - url, smuggled_data = unsmuggle_url(url) + url, smuggled_data = unsmuggle_url(url, {}) force_videoid = None is_intentional = smuggled_data and smuggled_data.get('to_generic') if smuggled_data and 'force_videoid' in smuggled_data: @@ -2638,7 +2638,10 @@ class GenericIE(InfoExtractor): # to accept raw bytes and being able to download only a chunk. # It may probably better to solve this by checking Content-Type for application/octet-stream # after a HEAD request, but not sure if we can rely on this. - full_response = self._request_webpage(url, video_id, headers={'Accept-Encoding': '*'}) + full_response = self._request_webpage(url, video_id, headers={ + 'Accept-Encoding': '*', + **smuggled_data.get('http_headers', {}) + }) new_url = full_response.geturl() if url != new_url: self.report_following_redirect(new_url) @@ -2657,14 +2660,15 @@ class GenericIE(InfoExtractor): m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type) if m: self.report_detected('direct video link') + headers = smuggled_data.get('http_headers', {}) format_id = str(m.group('format_id')) subtitles = {} if format_id.endswith('mpegurl'): - formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers) elif format_id.endswith('mpd') or format_id.endswith('dash+xml'): - formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id) + formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers) elif format_id == 'f4m': - formats = self._extract_f4m_formats(url, video_id) + formats = self._extract_f4m_formats(url, video_id, headers=headers) else: formats = [{ 'format_id': format_id, @@ -2673,8 +2677,11 @@ class GenericIE(InfoExtractor): }] info_dict['direct'] = True self._sort_formats(formats) - info_dict['formats'] = formats - info_dict['subtitles'] = subtitles + info_dict.update({ + 'formats': formats, + 'subtitles': subtitles, + 'http_headers': headers, + }) return info_dict if not self.get_param('test', False) and not is_intentional: @@ -2919,7 +2926,11 @@ class GenericIE(InfoExtractor): self.report_detected('JSON LD') return merge_dicts({ '_type': 'url_transparent', - 'url': smuggle_url(json_ld['url'], {'force_videoid': video_id, 'to_generic': True}), + 'url': smuggle_url(json_ld['url'], { + 'force_videoid': video_id, + 'to_generic': True, + 'http_headers': {'Referer': url}, + }), }, json_ld, info_dict) def check_video(vurl): |