aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--test/test_cookies.py146
-rw-r--r--yt_dlp/cookies.py96
-rw-r--r--yt_dlp/extractor/common.py3
3 files changed, 244 insertions, 1 deletions
diff --git a/test/test_cookies.py b/test/test_cookies.py
index cfeb11b55..61619df29 100644
--- a/test/test_cookies.py
+++ b/test/test_cookies.py
@@ -3,6 +3,7 @@ from datetime import datetime, timezone
from yt_dlp import cookies
from yt_dlp.cookies import (
+ LenientSimpleCookie,
LinuxChromeCookieDecryptor,
MacChromeCookieDecryptor,
WindowsChromeCookieDecryptor,
@@ -137,3 +138,148 @@ class TestCookies(unittest.TestCase):
def test_pbkdf2_sha1(self):
key = pbkdf2_sha1(b'peanuts', b' ' * 16, 1, 16)
self.assertEqual(key, b'g\xe1\x8e\x0fQ\x1c\x9b\xf3\xc9`!\xaa\x90\xd9\xd34')
+
+
+class TestLenientSimpleCookie(unittest.TestCase):
+ def _run_tests(self, *cases):
+ for message, raw_cookie, expected in cases:
+ cookie = LenientSimpleCookie(raw_cookie)
+
+ with self.subTest(message, expected=expected):
+ self.assertEqual(cookie.keys(), expected.keys(), message)
+
+ for key, expected_value in expected.items():
+ morsel = cookie[key]
+ if isinstance(expected_value, tuple):
+ expected_value, expected_attributes = expected_value
+ else:
+ expected_attributes = {}
+
+ attributes = {
+ key: value
+ for key, value in dict(morsel).items()
+ if value != ""
+ }
+ self.assertEqual(attributes, expected_attributes, message)
+
+ self.assertEqual(morsel.value, expected_value, message)
+
+ def test_parsing(self):
+ self._run_tests(
+ # Copied from https://github.com/python/cpython/blob/v3.10.7/Lib/test/test_http_cookies.py
+ (
+ "Test basic cookie",
+ "chips=ahoy; vienna=finger",
+ {"chips": "ahoy", "vienna": "finger"},
+ ),
+ (
+ "Test quoted cookie",
+ 'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"',
+ {"keebler": 'E=mc2; L="Loves"; fudge=\012;'},
+ ),
+ (
+ "Allow '=' in an unquoted value",
+ "keebler=E=mc2",
+ {"keebler": "E=mc2"},
+ ),
+ (
+ "Allow cookies with ':' in their name",
+ "key:term=value:term",
+ {"key:term": "value:term"},
+ ),
+ (
+ "Allow '[' and ']' in cookie values",
+ "a=b; c=[; d=r; f=h",
+ {"a": "b", "c": "[", "d": "r", "f": "h"},
+ ),
+ (
+ "Test basic cookie attributes",
+ 'Customer="WILE_E_COYOTE"; Version=1; Path=/acme',
+ {"Customer": ("WILE_E_COYOTE", {"version": "1", "path": "/acme"})},
+ ),
+ (
+ "Test flag only cookie attributes",
+ 'Customer="WILE_E_COYOTE"; HttpOnly; Secure',
+ {"Customer": ("WILE_E_COYOTE", {"httponly": True, "secure": True})},
+ ),
+ (
+ "Test flag only attribute with values",
+ "eggs=scrambled; httponly=foo; secure=bar; Path=/bacon",
+ {"eggs": ("scrambled", {"httponly": "foo", "secure": "bar", "path": "/bacon"})},
+ ),
+ (
+ "Test special case for 'expires' attribute, 4 digit year",
+ 'Customer="W"; expires=Wed, 01 Jan 2010 00:00:00 GMT',
+ {"Customer": ("W", {"expires": "Wed, 01 Jan 2010 00:00:00 GMT"})},
+ ),
+ (
+ "Test special case for 'expires' attribute, 2 digit year",
+ 'Customer="W"; expires=Wed, 01 Jan 98 00:00:00 GMT',
+ {"Customer": ("W", {"expires": "Wed, 01 Jan 98 00:00:00 GMT"})},
+ ),
+ (
+ "Test extra spaces in keys and values",
+ "eggs = scrambled ; secure ; path = bar ; foo=foo ",
+ {"eggs": ("scrambled", {"secure": True, "path": "bar"}), "foo": "foo"},
+ ),
+ (
+ "Test quoted attributes",
+ 'Customer="WILE_E_COYOTE"; Version="1"; Path="/acme"',
+ {"Customer": ("WILE_E_COYOTE", {"version": "1", "path": "/acme"})}
+ ),
+ # Our own tests that CPython passes
+ (
+ "Allow ';' in quoted value",
+ 'chips="a;hoy"; vienna=finger',
+ {"chips": "a;hoy", "vienna": "finger"},
+ ),
+ (
+ "Keep only the last set value",
+ "a=c; a=b",
+ {"a": "b"},
+ ),
+ )
+
+ def test_lenient_parsing(self):
+ self._run_tests(
+ (
+ "Ignore and try to skip invalid cookies",
+ 'chips={"ahoy;": 1}; vienna="finger;"',
+ {"vienna": "finger;"},
+ ),
+ (
+ "Ignore cookies without a name",
+ "a=b; unnamed; c=d",
+ {"a": "b", "c": "d"},
+ ),
+ (
+ "Ignore '\"' cookie without name",
+ 'a=b; "; c=d',
+ {"a": "b", "c": "d"},
+ ),
+ (
+ "Skip all space separated values",
+ "x a=b c=d x; e=f",
+ {"a": "b", "c": "d", "e": "f"},
+ ),
+ (
+ "Skip all space separated values",
+ 'x a=b; data={"complex": "json", "with": "key=value"}; x c=d x',
+ {"a": "b", "c": "d"},
+ ),
+ (
+ "Expect quote mending",
+ 'a=b; invalid="; c=d',
+ {"a": "b", "c": "d"},
+ ),
+ (
+ "Reset morsel after invalid to not capture attributes",
+ "a=b; invalid; Version=1; c=d",
+ {"a": "b", "c": "d"},
+ ),
+ (
+ "Continue after non-flag attribute without value",
+ "a=b; path; Version=1; c=d",
+ {"a": "b", "c": "d"},
+ ),
+ )
diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py
index c3b14f03b..d502e91da 100644
--- a/yt_dlp/cookies.py
+++ b/yt_dlp/cookies.py
@@ -1,6 +1,7 @@
import base64
import contextlib
import http.cookiejar
+import http.cookies
import json
import os
import re
@@ -990,3 +991,98 @@ def _parse_browser_specification(browser_name, profile=None, keyring=None, conta
if profile is not None and _is_path(profile):
profile = os.path.expanduser(profile)
return browser_name, profile, keyring, container
+
+
+class LenientSimpleCookie(http.cookies.SimpleCookie):
+ """More lenient version of http.cookies.SimpleCookie"""
+ # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
+ _LEGAL_KEY_CHARS = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
+ _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + r"\[\]"
+
+ _RESERVED = {
+ "expires",
+ "path",
+ "comment",
+ "domain",
+ "max-age",
+ "secure",
+ "httponly",
+ "version",
+ "samesite",
+ }
+
+ _FLAGS = {"secure", "httponly"}
+
+ # Added 'bad' group to catch the remaining value
+ _COOKIE_PATTERN = re.compile(r"""
+ \s* # Optional whitespace at start of cookie
+ (?P<key> # Start of group 'key'
+ [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
+ ) # End of group 'key'
+ ( # Optional group: there may not be a value.
+ \s*=\s* # Equal Sign
+ ( # Start of potential value
+ (?P<val> # Start of group 'val'
+ "(?:[^\\"]|\\.)*" # Any doublequoted string
+ | # or
+ \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
+ | # or
+ [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
+ ) # End of group 'val'
+ | # or
+ (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
+ ) # End of potential value
+ )? # End of optional value group
+ \s* # Any number of spaces.
+ (\s+|;|$) # Ending either at space, semicolon, or EOS.
+ """, re.ASCII | re.VERBOSE)
+
+ def load(self, data):
+ # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
+ if not isinstance(data, str):
+ return super().load(data)
+
+ morsel = None
+ index = 0
+ length = len(data)
+
+ while 0 <= index < length:
+ match = self._COOKIE_PATTERN.search(data, index)
+ if not match:
+ break
+
+ index = match.end(0)
+ if match.group("bad"):
+ morsel = None
+ continue
+
+ key, value = match.group("key", "val")
+
+ if key[0] == "$":
+ if morsel is not None:
+ morsel[key[1:]] = True
+ continue
+
+ lower_key = key.lower()
+ if lower_key in self._RESERVED:
+ if morsel is None:
+ continue
+
+ if value is None:
+ if lower_key not in self._FLAGS:
+ morsel = None
+ continue
+ value = True
+ else:
+ value, _ = self.value_decode(value)
+
+ morsel[key] = value
+
+ elif value is not None:
+ morsel = self.get(key, http.cookies.Morsel())
+ real_value, coded_value = self.value_decode(value)
+ morsel.set(key, real_value, coded_value)
+ self[key] = morsel
+
+ else:
+ morsel = None
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 30042d61f..e8fa8fdde 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -22,6 +22,7 @@ import xml.etree.ElementTree
from ..compat import functools # isort: split
from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
+from ..cookies import LenientSimpleCookie
from ..downloader import FileDownloader
from ..downloader.f4m import get_base_url, remove_encrypted_media
from ..utils import (
@@ -3632,7 +3633,7 @@ class InfoExtractor:
def _get_cookies(self, url):
""" Return a http.cookies.SimpleCookie with the cookies for the url """
- return http.cookies.SimpleCookie(self._downloader._calc_cookies(url))
+ return LenientSimpleCookie(self._downloader._calc_cookies(url))
def _apply_first_set_cookie_header(self, url_handle, cookie):
"""