diff options
Diffstat (limited to 'yt_dlp/YoutubeDL.py')
-rw-r--r-- | yt_dlp/YoutubeDL.py | 80 |
1 files changed, 77 insertions, 3 deletions
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index cf0122d4b..7f5571666 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1,9 +1,11 @@ import collections import contextlib +import copy import datetime import errno import fileinput import functools +import http.cookiejar import io import itertools import json @@ -25,7 +27,7 @@ import unicodedata from .cache import Cache from .compat import urllib # isort: split from .compat import compat_os_name, compat_shlex_quote -from .cookies import load_cookies +from .cookies import LenientSimpleCookie, load_cookies from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name from .downloader.rtmp import rtmpdump_version from .extractor import gen_extractor_classes, get_info_extractor @@ -673,6 +675,9 @@ class YoutubeDL: if auto_init and auto_init != 'no_verbose_header': self.print_debug_header() + self.__header_cookies = [] + self._load_cookies(traverse_obj(self.params.get('http_headers'), 'cookie', casesense=False)) # compat + def check_deprecated(param, option, suggestion): if self.params.get(param) is not None: self.report_warning(f'{option} is deprecated. Use {suggestion} instead') @@ -1625,8 +1630,60 @@ class YoutubeDL: self.to_screen('') raise + def _load_cookies(self, data, *, from_headers=True): + """Loads cookies from a `Cookie` header + + This tries to work around the security vulnerability of passing cookies to every domain. + See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj + The unscoped cookies are saved for later to be stored in the jar with a limited scope. + + @param data The Cookie header as string to load the cookies from + @param from_headers If `False`, allows Set-Cookie syntax in the cookie string (at least a domain will be required) + """ + for cookie in LenientSimpleCookie(data).values(): + if from_headers and any(cookie.values()): + raise ValueError('Invalid syntax in Cookie Header') + + domain = cookie.get('domain') or '' + expiry = cookie.get('expires') + if expiry == '': # 0 is valid + expiry = None + prepared_cookie = http.cookiejar.Cookie( + cookie.get('version') or 0, cookie.key, cookie.value, None, False, + domain, True, True, cookie.get('path') or '', bool(cookie.get('path')), + cookie.get('secure') or False, expiry, False, None, None, {}) + + if domain: + self.cookiejar.set_cookie(prepared_cookie) + elif from_headers: + self.deprecated_feature( + 'Passing cookies as a header is a potential security risk; ' + 'they will be scoped to the domain of the downloaded urls. ' + 'Please consider loading cookies from a file or browser instead.') + self.__header_cookies.append(prepared_cookie) + else: + self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping', + tb=False, is_error=False) + + def _apply_header_cookies(self, url): + """Applies stray header cookies to the provided url + + This loads header cookies and scopes them to the domain provided in `url`. + While this is not ideal, it helps reduce the risk of them being sent + to an unintended destination while mostly maintaining compatibility. + """ + parsed = urllib.parse.urlparse(url) + if not parsed.hostname: + return + + for cookie in map(copy.copy, self.__header_cookies): + cookie.domain = f'.{parsed.hostname}' + self.cookiejar.set_cookie(cookie) + @_handle_extraction_exceptions def __extract_info(self, url, ie, download, extra_info, process): + self._apply_header_cookies(url) + try: ie_result = ie.extract(url) except UserNotLive as e: @@ -2414,9 +2471,24 @@ class YoutubeDL: if 'Youtubedl-No-Compression' in res: # deprecated res.pop('Youtubedl-No-Compression', None) res['Accept-Encoding'] = 'identity' - cookies = self.cookiejar.get_cookie_header(info_dict['url']) + cookies = self.cookiejar.get_cookies_for_url(info_dict['url']) if cookies: - res['Cookie'] = cookies + encoder = LenientSimpleCookie() + values = [] + for cookie in cookies: + _, value = encoder.value_encode(cookie.value) + values.append(f'{cookie.name}={value}') + if cookie.domain: + values.append(f'Domain={cookie.domain}') + if cookie.path: + values.append(f'Path={cookie.path}') + if cookie.secure: + values.append('Secure') + if cookie.expires: + values.append(f'Expires={cookie.expires}') + if cookie.version: + values.append(f'Version={cookie.version}') + info_dict['cookies'] = '; '.join(values) if 'X-Forwarded-For' not in res: x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip') @@ -3423,6 +3495,8 @@ class YoutubeDL: infos = [self.sanitize_info(info, self.params.get('clean_infojson', True)) for info in variadic(json.loads('\n'.join(f)))] for info in infos: + self._load_cookies(info.get('cookies'), from_headers=False) + self._load_cookies(traverse_obj(info.get('http_headers'), 'Cookie', casesense=False)) # compat try: self.__download_wrapper(self.process_ie_result)(info, download=True) except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e: |