aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--test/test_YoutubeDLCookieJar.py8
-rw-r--r--yt_dlp/YoutubeDL.py7
-rw-r--r--yt_dlp/cookies.py144
-rw-r--r--yt_dlp/extractor/common.py2
-rw-r--r--yt_dlp/utils/_legacy.py3
-rw-r--r--yt_dlp/utils/_utils.py130
6 files changed, 157 insertions, 137 deletions
diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py
index 0d4e7dc97..2c73d7d85 100644
--- a/test/test_YoutubeDLCookieJar.py
+++ b/test/test_YoutubeDLCookieJar.py
@@ -11,7 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import re
import tempfile
-from yt_dlp.utils import YoutubeDLCookieJar
+from yt_dlp.cookies import YoutubeDLCookieJar
class TestYoutubeDLCookieJar(unittest.TestCase):
@@ -47,6 +47,12 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
# will be ignored
self.assertFalse(cookiejar._cookies)
+ def test_get_cookie_header(self):
+ cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
+ cookiejar.load(ignore_discard=True, ignore_expires=True)
+ header = cookiejar.get_cookie_header('https://www.foobar.foobar')
+ self.assertIn('HTTPONLY_COOKIE', header)
+
if __name__ == '__main__':
unittest.main()
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index e1e558836..f69bc98c5 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -2404,7 +2404,7 @@ class YoutubeDL:
if 'Youtubedl-No-Compression' in res: # deprecated
res.pop('Youtubedl-No-Compression', None)
res['Accept-Encoding'] = 'identity'
- cookies = self._calc_cookies(info_dict['url'])
+ cookies = self.cookiejar.get_cookie_header(info_dict['url'])
if cookies:
res['Cookie'] = cookies
@@ -2416,9 +2416,8 @@ class YoutubeDL:
return res
def _calc_cookies(self, url):
- pr = sanitized_Request(url)
- self.cookiejar.add_cookie_header(pr)
- return pr.get_header('Cookie')
+ self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
+ return self.cookiejar.get_cookie_header(url)
def _sort_thumbnails(self, thumbnails):
thumbnails.sort(key=lambda t: (
diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py
index 4cafb522e..eb6a2656b 100644
--- a/yt_dlp/cookies.py
+++ b/yt_dlp/cookies.py
@@ -1,7 +1,9 @@
import base64
+import collections
import contextlib
import http.cookiejar
import http.cookies
+import io
import json
import os
import re
@@ -11,6 +13,7 @@ import subprocess
import sys
import tempfile
import time
+import urllib.request
from datetime import datetime, timedelta, timezone
from enum import Enum, auto
from hashlib import pbkdf2_hmac
@@ -29,11 +32,14 @@ from .dependencies import (
from .minicurses import MultilinePrinter, QuietMultilinePrinter
from .utils import (
Popen,
- YoutubeDLCookieJar,
error_to_str,
+ escape_url,
expand_path,
is_path_like,
+ sanitize_url,
+ str_or_none,
try_call,
+ write_string,
)
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
@@ -1091,3 +1097,139 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
else:
morsel = None
+
+
+class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
+ """
+ See [1] for cookie file format.
+
+ 1. https://curl.haxx.se/docs/http-cookies.html
+ """
+ _HTTPONLY_PREFIX = '#HttpOnly_'
+ _ENTRY_LEN = 7
+ _HEADER = '''# Netscape HTTP Cookie File
+# This file is generated by yt-dlp. Do not edit.
+
+'''
+ _CookieFileEntry = collections.namedtuple(
+ 'CookieFileEntry',
+ ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
+
+ def __init__(self, filename=None, *args, **kwargs):
+ super().__init__(None, *args, **kwargs)
+ if is_path_like(filename):
+ filename = os.fspath(filename)
+ self.filename = filename
+
+ @staticmethod
+ def _true_or_false(cndn):
+ return 'TRUE' if cndn else 'FALSE'
+
+ @contextlib.contextmanager
+ def open(self, file, *, write=False):
+ if is_path_like(file):
+ with open(file, 'w' if write else 'r', encoding='utf-8') as f:
+ yield f
+ else:
+ if write:
+ file.truncate(0)
+ yield file
+
+ def _really_save(self, f, ignore_discard=False, ignore_expires=False):
+ now = time.time()
+ for cookie in self:
+ if (not ignore_discard and cookie.discard
+ or not ignore_expires and cookie.is_expired(now)):
+ continue
+ name, value = cookie.name, cookie.value
+ if value is None:
+ # cookies.txt regards 'Set-Cookie: foo' as a cookie
+ # with no name, whereas http.cookiejar regards it as a
+ # cookie with no value.
+ name, value = '', name
+ f.write('%s\n' % '\t'.join((
+ cookie.domain,
+ self._true_or_false(cookie.domain.startswith('.')),
+ cookie.path,
+ self._true_or_false(cookie.secure),
+ str_or_none(cookie.expires, default=''),
+ name, value
+ )))
+
+ def save(self, filename=None, *args, **kwargs):
+ """
+ Save cookies to a file.
+ Code is taken from CPython 3.6
+ https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
+
+ if filename is None:
+ if self.filename is not None:
+ filename = self.filename
+ else:
+ raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
+
+ # Store session cookies with `expires` set to 0 instead of an empty string
+ for cookie in self:
+ if cookie.expires is None:
+ cookie.expires = 0
+
+ with self.open(filename, write=True) as f:
+ f.write(self._HEADER)
+ self._really_save(f, *args, **kwargs)
+
+ def load(self, filename=None, ignore_discard=False, ignore_expires=False):
+ """Load cookies from a file."""
+ if filename is None:
+ if self.filename is not None:
+ filename = self.filename
+ else:
+ raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
+
+ def prepare_line(line):
+ if line.startswith(self._HTTPONLY_PREFIX):
+ line = line[len(self._HTTPONLY_PREFIX):]
+ # comments and empty lines are fine
+ if line.startswith('#') or not line.strip():
+ return line
+ cookie_list = line.split('\t')
+ if len(cookie_list) != self._ENTRY_LEN:
+ raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
+ cookie = self._CookieFileEntry(*cookie_list)
+ if cookie.expires_at and not cookie.expires_at.isdigit():
+ raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
+ return line
+
+ cf = io.StringIO()
+ with self.open(filename) as f:
+ for line in f:
+ try:
+ cf.write(prepare_line(line))
+ except http.cookiejar.LoadError as e:
+ if f'{line.strip()} '[0] in '[{"':
+ raise http.cookiejar.LoadError(
+ 'Cookies file must be Netscape formatted, not JSON. See '
+ 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
+ write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
+ continue
+ cf.seek(0)
+ self._really_load(cf, filename, ignore_discard, ignore_expires)
+ # Session cookies are denoted by either `expires` field set to
+ # an empty string or 0. MozillaCookieJar only recognizes the former
+ # (see [1]). So we need force the latter to be recognized as session
+ # cookies on our own.
+ # Session cookies may be important for cookies-based authentication,
+ # e.g. usually, when user does not check 'Remember me' check box while
+ # logging in on a site, some important cookies are stored as session
+ # cookies so that not recognizing them will result in failed login.
+ # 1. https://bugs.python.org/issue17164
+ for cookie in self:
+ # Treat `expires=0` cookies as session cookies
+ if cookie.expires == 0:
+ cookie.expires = None
+ cookie.discard = True
+
+ def get_cookie_header(self, url):
+ """Generate a Cookie HTTP header for a given url"""
+ cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))
+ self.add_cookie_header(cookie_req)
+ return cookie_req.get_header('Cookie')
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 1b1dd560f..306911a6c 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -3444,7 +3444,7 @@ class InfoExtractor:
def _get_cookies(self, url):
""" Return a http.cookies.SimpleCookie with the cookies for the url """
- return LenientSimpleCookie(self._downloader._calc_cookies(url))
+ return LenientSimpleCookie(self._downloader.cookiejar.get_cookie_header(url))
def _apply_first_set_cookie_header(self, url_handle, cookie):
"""
diff --git a/yt_dlp/utils/_legacy.py b/yt_dlp/utils/_legacy.py
index b0578a1d6..1097778f0 100644
--- a/yt_dlp/utils/_legacy.py
+++ b/yt_dlp/utils/_legacy.py
@@ -10,6 +10,9 @@ from ._utils import decode_base_n, preferredencoding
from .traversal import traverse_obj
from ..dependencies import certifi, websockets
+# isort: split
+from ..cookies import YoutubeDLCookieJar # noqa: F401
+
has_certifi = bool(certifi)
has_websockets = bool(websockets)
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index d78022295..6f4f22bb3 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -1518,136 +1518,6 @@ def is_path_like(f):
return isinstance(f, (str, bytes, os.PathLike))
-class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
- """
- See [1] for cookie file format.
-
- 1. https://curl.haxx.se/docs/http-cookies.html
- """
- _HTTPONLY_PREFIX = '#HttpOnly_'
- _ENTRY_LEN = 7
- _HEADER = '''# Netscape HTTP Cookie File
-# This file is generated by yt-dlp. Do not edit.
-
-'''
- _CookieFileEntry = collections.namedtuple(
- 'CookieFileEntry',
- ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
-
- def __init__(self, filename=None, *args, **kwargs):
- super().__init__(None, *args, **kwargs)
- if is_path_like(filename):
- filename = os.fspath(filename)
- self.filename = filename
-
- @staticmethod
- def _true_or_false(cndn):
- return 'TRUE' if cndn else 'FALSE'
-
- @contextlib.contextmanager
- def open(self, file, *, write=False):
- if is_path_like(file):
- with open(file, 'w' if write else 'r', encoding='utf-8') as f:
- yield f
- else:
- if write:
- file.truncate(0)
- yield file
-
- def _really_save(self, f, ignore_discard=False, ignore_expires=False):
- now = time.time()
- for cookie in self:
- if (not ignore_discard and cookie.discard
- or not ignore_expires and cookie.is_expired(now)):
- continue
- name, value = cookie.name, cookie.value
- if value is None:
- # cookies.txt regards 'Set-Cookie: foo' as a cookie
- # with no name, whereas http.cookiejar regards it as a
- # cookie with no value.
- name, value = '', name
- f.write('%s\n' % '\t'.join((
- cookie.domain,
- self._true_or_false(cookie.domain.startswith('.')),
- cookie.path,
- self._true_or_false(cookie.secure),
- str_or_none(cookie.expires, default=''),
- name, value
- )))
-
- def save(self, filename=None, *args, **kwargs):
- """
- Save cookies to a file.
- Code is taken from CPython 3.6
- https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
-
- if filename is None:
- if self.filename is not None:
- filename = self.filename
- else:
- raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
-
- # Store session cookies with `expires` set to 0 instead of an empty string
- for cookie in self:
- if cookie.expires is None:
- cookie.expires = 0
-
- with self.open(filename, write=True) as f:
- f.write(self._HEADER)
- self._really_save(f, *args, **kwargs)
-
- def load(self, filename=None, ignore_discard=False, ignore_expires=False):
- """Load cookies from a file."""
- if filename is None:
- if self.filename is not None:
- filename = self.filename
- else:
- raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
-
- def prepare_line(line):
- if line.startswith(self._HTTPONLY_PREFIX):
- line = line[len(self._HTTPONLY_PREFIX):]
- # comments and empty lines are fine
- if line.startswith('#') or not line.strip():
- return line
- cookie_list = line.split('\t')
- if len(cookie_list) != self._ENTRY_LEN:
- raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
- cookie = self._CookieFileEntry(*cookie_list)
- if cookie.expires_at and not cookie.expires_at.isdigit():
- raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
- return line
-
- cf = io.StringIO()
- with self.open(filename) as f:
- for line in f:
- try:
- cf.write(prepare_line(line))
- except http.cookiejar.LoadError as e:
- if f'{line.strip()} '[0] in '[{"':
- raise http.cookiejar.LoadError(
- 'Cookies file must be Netscape formatted, not JSON. See '
- 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
- write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
- continue
- cf.seek(0)
- self._really_load(cf, filename, ignore_discard, ignore_expires)
- # Session cookies are denoted by either `expires` field set to
- # an empty string or 0. MozillaCookieJar only recognizes the former
- # (see [1]). So we need force the latter to be recognized as session
- # cookies on our own.
- # Session cookies may be important for cookies-based authentication,
- # e.g. usually, when user does not check 'Remember me' check box while
- # logging in on a site, some important cookies are stored as session
- # cookies so that not recognizing them will result in failed login.
- # 1. https://bugs.python.org/issue17164
- for cookie in self:
- # Treat `expires=0` cookies as session cookies
- if cookie.expires == 0:
- cookie.expires = None
- cookie.discard = True
-
-
class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
def __init__(self, cookiejar=None):
urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)