diff options
author | Simon Sawicki <37424085+Grub4K@users.noreply.github.com> | 2022-09-16 19:02:00 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-16 22:32:00 +0530 |
commit | 8817a80d3ac69f2dfd12bdc41657c4a04139807c (patch) | |
tree | a4303b0b928b09433651dd52c0907fc774836fe7 /yt_dlp/cookies.py | |
parent | 5736d79172c47ff84740d5720467370a560febad (diff) | |
download | hypervideo-pre-8817a80d3ac69f2dfd12bdc41657c4a04139807c.tar.lz hypervideo-pre-8817a80d3ac69f2dfd12bdc41657c4a04139807c.tar.xz hypervideo-pre-8817a80d3ac69f2dfd12bdc41657c4a04139807c.zip |
[cookies] Parse cookies leniently (#4780)
Closes #4776, #3778
Authored by: Grub4K
Diffstat (limited to 'yt_dlp/cookies.py')
-rw-r--r-- | yt_dlp/cookies.py | 96 |
1 files changed, 96 insertions, 0 deletions
diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index c3b14f03b..d502e91da 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -1,6 +1,7 @@ import base64 import contextlib import http.cookiejar +import http.cookies import json import os import re @@ -990,3 +991,98 @@ def _parse_browser_specification(browser_name, profile=None, keyring=None, conta if profile is not None and _is_path(profile): profile = os.path.expanduser(profile) return browser_name, profile, keyring, container + + +class LenientSimpleCookie(http.cookies.SimpleCookie): + """More lenient version of http.cookies.SimpleCookie""" + # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py + _LEGAL_KEY_CHARS = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=" + _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + r"\[\]" + + _RESERVED = { + "expires", + "path", + "comment", + "domain", + "max-age", + "secure", + "httponly", + "version", + "samesite", + } + + _FLAGS = {"secure", "httponly"} + + # Added 'bad' group to catch the remaining value + _COOKIE_PATTERN = re.compile(r""" + \s* # Optional whitespace at start of cookie + (?P<key> # Start of group 'key' + [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter + ) # End of group 'key' + ( # Optional group: there may not be a value. + \s*=\s* # Equal Sign + ( # Start of potential value + (?P<val> # Start of group 'val' + "(?:[^\\"]|\\.)*" # Any doublequoted string + | # or + \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr + | # or + [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string + ) # End of group 'val' + | # or + (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values + ) # End of potential value + )? # End of optional value group + \s* # Any number of spaces. + (\s+|;|$) # Ending either at space, semicolon, or EOS. + """, re.ASCII | re.VERBOSE) + + def load(self, data): + # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776 + if not isinstance(data, str): + return super().load(data) + + morsel = None + index = 0 + length = len(data) + + while 0 <= index < length: + match = self._COOKIE_PATTERN.search(data, index) + if not match: + break + + index = match.end(0) + if match.group("bad"): + morsel = None + continue + + key, value = match.group("key", "val") + + if key[0] == "$": + if morsel is not None: + morsel[key[1:]] = True + continue + + lower_key = key.lower() + if lower_key in self._RESERVED: + if morsel is None: + continue + + if value is None: + if lower_key not in self._FLAGS: + morsel = None + continue + value = True + else: + value, _ = self.value_decode(value) + + morsel[key] = value + + elif value is not None: + morsel = self.get(key, http.cookies.Morsel()) + real_value, coded_value = self.value_decode(value) + morsel.set(key, real_value, coded_value) + self[key] = morsel + + else: + morsel = None |