aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/cookies.py
diff options
context:
space:
mode:
authorSimon Sawicki <37424085+Grub4K@users.noreply.github.com>2022-09-16 19:02:00 +0200
committerGitHub <noreply@github.com>2022-09-16 22:32:00 +0530
commit8817a80d3ac69f2dfd12bdc41657c4a04139807c (patch)
treea4303b0b928b09433651dd52c0907fc774836fe7 /yt_dlp/cookies.py
parent5736d79172c47ff84740d5720467370a560febad (diff)
downloadhypervideo-pre-8817a80d3ac69f2dfd12bdc41657c4a04139807c.tar.lz
hypervideo-pre-8817a80d3ac69f2dfd12bdc41657c4a04139807c.tar.xz
hypervideo-pre-8817a80d3ac69f2dfd12bdc41657c4a04139807c.zip
[cookies] Parse cookies leniently (#4780)
Closes #4776, #3778 Authored by: Grub4K
Diffstat (limited to 'yt_dlp/cookies.py')
-rw-r--r--yt_dlp/cookies.py96
1 files changed, 96 insertions, 0 deletions
diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py
index c3b14f03b..d502e91da 100644
--- a/yt_dlp/cookies.py
+++ b/yt_dlp/cookies.py
@@ -1,6 +1,7 @@
import base64
import contextlib
import http.cookiejar
+import http.cookies
import json
import os
import re
@@ -990,3 +991,98 @@ def _parse_browser_specification(browser_name, profile=None, keyring=None, conta
if profile is not None and _is_path(profile):
profile = os.path.expanduser(profile)
return browser_name, profile, keyring, container
+
+
+class LenientSimpleCookie(http.cookies.SimpleCookie):
+ """More lenient version of http.cookies.SimpleCookie"""
+ # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
+ _LEGAL_KEY_CHARS = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
+ _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + r"\[\]"
+
+ _RESERVED = {
+ "expires",
+ "path",
+ "comment",
+ "domain",
+ "max-age",
+ "secure",
+ "httponly",
+ "version",
+ "samesite",
+ }
+
+ _FLAGS = {"secure", "httponly"}
+
+ # Added 'bad' group to catch the remaining value
+ _COOKIE_PATTERN = re.compile(r"""
+ \s* # Optional whitespace at start of cookie
+ (?P<key> # Start of group 'key'
+ [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter
+ ) # End of group 'key'
+ ( # Optional group: there may not be a value.
+ \s*=\s* # Equal Sign
+ ( # Start of potential value
+ (?P<val> # Start of group 'val'
+ "(?:[^\\"]|\\.)*" # Any doublequoted string
+ | # or
+ \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
+ | # or
+ [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string
+ ) # End of group 'val'
+ | # or
+ (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
+ ) # End of potential value
+ )? # End of optional value group
+ \s* # Any number of spaces.
+ (\s+|;|$) # Ending either at space, semicolon, or EOS.
+ """, re.ASCII | re.VERBOSE)
+
+ def load(self, data):
+ # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
+ if not isinstance(data, str):
+ return super().load(data)
+
+ morsel = None
+ index = 0
+ length = len(data)
+
+ while 0 <= index < length:
+ match = self._COOKIE_PATTERN.search(data, index)
+ if not match:
+ break
+
+ index = match.end(0)
+ if match.group("bad"):
+ morsel = None
+ continue
+
+ key, value = match.group("key", "val")
+
+ if key[0] == "$":
+ if morsel is not None:
+ morsel[key[1:]] = True
+ continue
+
+ lower_key = key.lower()
+ if lower_key in self._RESERVED:
+ if morsel is None:
+ continue
+
+ if value is None:
+ if lower_key not in self._FLAGS:
+ morsel = None
+ continue
+ value = True
+ else:
+ value, _ = self.value_decode(value)
+
+ morsel[key] = value
+
+ elif value is not None:
+ morsel = self.get(key, http.cookies.Morsel())
+ real_value, coded_value = self.value_decode(value)
+ morsel.set(key, real_value, coded_value)
+ self[key] = morsel
+
+ else:
+ morsel = None