aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp
diff options
context:
space:
mode:
authorcoletdjnz <coletdjnz@protonmail.com>2023-07-15 15:55:23 +0530
committerpukkandan <pukkandan.ytdlp@gmail.com>2023-07-15 16:18:35 +0530
commit227bf1a33be7b89cd7d44ad046844c4ccba104f4 (patch)
tree72ffa5e9135e7e4869c351bd9dec6c0944cfba17 /yt_dlp
parentc365dba8430ee33abda85d31f95128605bf240eb (diff)
downloadhypervideo-pre-227bf1a33be7b89cd7d44ad046844c4ccba104f4.tar.lz
hypervideo-pre-227bf1a33be7b89cd7d44ad046844c4ccba104f4.tar.xz
hypervideo-pre-227bf1a33be7b89cd7d44ad046844c4ccba104f4.zip
[networking] Rewrite architecture (#2861)
New networking interface consists of a `RequestDirector` that directs each `Request` to appropriate `RequestHandler` and returns the `Response` or raises `RequestError`. The handlers define adapters to transform its internal Request/Response/Errors to our interfaces. User-facing changes: - Fix issues with per request proxies on redirects for urllib - Support for `ALL_PROXY` environment variable for proxy setting - Support for `socks5h` proxy - Closes https://github.com/yt-dlp/yt-dlp/issues/6325, https://github.com/ytdl-org/youtube-dl/issues/22618, https://github.com/ytdl-org/youtube-dl/pull/28093 - Raise error when using `https` proxy instead of silently converting it to `http` Authored by: coletdjnz
Diffstat (limited to 'yt_dlp')
-rw-r--r--yt_dlp/YoutubeDL.py177
-rw-r--r--yt_dlp/compat/__init__.py10
-rw-r--r--yt_dlp/downloader/http.py24
-rw-r--r--yt_dlp/extractor/common.py32
-rw-r--r--yt_dlp/networking/__init__.py13
-rw-r--r--yt_dlp/networking/_helper.py91
-rw-r--r--yt_dlp/networking/_urllib.py233
-rw-r--r--yt_dlp/networking/common.py522
-rw-r--r--yt_dlp/networking/exceptions.py202
-rw-r--r--yt_dlp/utils/_deprecated.py13
-rw-r--r--yt_dlp/utils/_utils.py35
-rw-r--r--yt_dlp/utils/networking.py67
12 files changed, 1214 insertions, 205 deletions
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 138646ebf..29a18aef0 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -4,7 +4,6 @@ import copy
import datetime
import errno
import fileinput
-import functools
import http.cookiejar
import io
import itertools
@@ -25,8 +24,8 @@ import traceback
import unicodedata
from .cache import Cache
-from .compat import urllib # isort: split
-from .compat import compat_os_name, compat_shlex_quote
+from .compat import functools, urllib # isort: split
+from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
from .cookies import LenientSimpleCookie, load_cookies
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
from .downloader.rtmp import rtmpdump_version
@@ -34,6 +33,15 @@ from .extractor import gen_extractor_classes, get_info_extractor
from .extractor.common import UnsupportedURLIE
from .extractor.openload import PhantomJSwrapper
from .minicurses import format_text
+from .networking import Request, RequestDirector
+from .networking.common import _REQUEST_HANDLERS
+from .networking.exceptions import (
+ HTTPError,
+ NoSupportingHandlers,
+ RequestError,
+ SSLError,
+ _CompatHTTPError,
+)
from .plugins import directories as plugin_directories
from .postprocessor import _PLUGIN_CLASSES as plugin_pps
from .postprocessor import (
@@ -78,7 +86,6 @@ from .utils import (
MaxDownloadsReached,
Namespace,
PagedList,
- PerRequestProxyHandler,
PlaylistEntries,
Popen,
PostProcessingError,
@@ -87,9 +94,6 @@ from .utils import (
SameFileError,
UnavailableVideoError,
UserNotLive,
- YoutubeDLCookieProcessor,
- YoutubeDLHandler,
- YoutubeDLRedirectHandler,
age_restricted,
args_to_str,
bug_reports_message,
@@ -102,6 +106,7 @@ from .utils import (
error_to_compat_str,
escapeHTML,
expand_path,
+ extract_basic_auth,
filter_dict,
float_or_none,
format_bytes,
@@ -117,8 +122,6 @@ from .utils import (
locked_file,
make_archive_id,
make_dir,
- make_HTTPS_handler,
- merge_headers,
network_exceptions,
number_of_digits,
orderedSet,
@@ -132,7 +135,6 @@ from .utils import (
sanitize_filename,
sanitize_path,
sanitize_url,
- sanitized_Request,
std_headers,
str_or_none,
strftime_or_none,
@@ -151,7 +153,12 @@ from .utils import (
write_json_file,
write_string,
)
-from .utils.networking import clean_headers
+from .utils._utils import _YDLLogger
+from .utils.networking import (
+ HTTPHeaderDict,
+ clean_headers,
+ clean_proxies,
+)
from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
if compat_os_name == 'nt':
@@ -673,7 +680,9 @@ class YoutubeDL:
raise
self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
- self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
+ self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
+ self._request_director = self.build_request_director(
+ sorted(_REQUEST_HANDLERS.values(), key=lambda rh: rh.RH_NAME.lower()))
if auto_init and auto_init != 'no_verbose_header':
self.print_debug_header()
@@ -763,8 +772,6 @@ class YoutubeDL:
get_postprocessor(pp_def.pop('key'))(self, **pp_def),
when=when)
- self._setup_opener()
-
def preload_download_archive(fn):
"""Preload the archive, if any is specified"""
archive = set()
@@ -946,7 +953,11 @@ class YoutubeDL:
def __exit__(self, *args):
self.restore_console_title()
+ self.close()
+
+ def close(self):
self.save_cookies()
+ self._request_director.close()
def trouble(self, message=None, tb=None, is_error=True):
"""Determine action to take when a download problem appears.
@@ -2468,7 +2479,7 @@ class YoutubeDL:
return _build_selector_function(parsed_selector)
def _calc_headers(self, info_dict):
- res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
+ res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
clean_headers(res)
cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
if cookies:
@@ -3943,13 +3954,8 @@ class YoutubeDL:
join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
})) or 'none'))
- self._setup_opener()
- proxy_map = {}
- for handler in self._opener.handlers:
- if hasattr(handler, 'proxies'):
- proxy_map.update(handler.proxies)
- write_debug(f'Proxy map: {proxy_map}')
-
+ write_debug(f'Proxy map: {self.proxies}')
+ # write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers)}')
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
display_list = ['%s%s' % (
klass.__name__, '' if klass.__name__ == name else f' as {name}')
@@ -3977,53 +3983,21 @@ class YoutubeDL:
'See https://yt-dl.org/update if you need help updating.' %
latest_version)
- def _setup_opener(self):
- if hasattr(self, '_opener'):
- return
- timeout_val = self.params.get('socket_timeout')
- self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
+ @functools.cached_property
+ def proxies(self):
+ """Global proxy configuration"""
opts_proxy = self.params.get('proxy')
-
- cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
if opts_proxy is not None:
if opts_proxy == '':
- proxies = {}
- else:
- proxies = {'http': opts_proxy, 'https': opts_proxy}
+ opts_proxy = '__noproxy__'
+ proxies = {'all': opts_proxy}
else:
proxies = urllib.request.getproxies()
- # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
+ # compat. Set HTTPS_PROXY to __noproxy__ to revert
if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http']
- proxy_handler = PerRequestProxyHandler(proxies)
-
- debuglevel = 1 if self.params.get('debug_printtraffic') else 0
- https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
- ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
- redirect_handler = YoutubeDLRedirectHandler()
- data_handler = urllib.request.DataHandler()
-
- # When passing our own FileHandler instance, build_opener won't add the
- # default FileHandler and allows us to disable the file protocol, which
- # can be used for malicious purposes (see
- # https://github.com/ytdl-org/youtube-dl/issues/8227)
- file_handler = urllib.request.FileHandler()
-
- if not self.params.get('enable_file_urls'):
- def file_open(*args, **kwargs):
- raise urllib.error.URLError(
- 'file:// URLs are explicitly disabled in yt-dlp for security reasons. '
- 'Use --enable-file-urls to enable at your own risk.')
- file_handler.file_open = file_open
-
- opener = urllib.request.build_opener(
- proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
-
- # Delete the default user-agent header, which would otherwise apply in
- # cases where our custom HTTP handler doesn't come into play
- # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
- opener.addheaders = []
- self._opener = opener
+
+ return proxies
@functools.cached_property
def cookiejar(self):
@@ -4031,11 +4005,84 @@ class YoutubeDL:
return load_cookies(
self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
+ @property
+ def _opener(self):
+ """
+ Get a urllib OpenerDirector from the Urllib handler (deprecated).
+ """
+ self.deprecation_warning('YoutubeDL._opener() is deprecated, use YoutubeDL.urlopen()')
+ handler = self._request_director.handlers['Urllib']
+ return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
+
def urlopen(self, req):
""" Start an HTTP download """
if isinstance(req, str):
- req = sanitized_Request(req)
- return self._opener.open(req, timeout=self._socket_timeout)
+ req = Request(req)
+ elif isinstance(req, urllib.request.Request):
+ req = urllib_req_to_req(req)
+ assert isinstance(req, Request)
+
+ # compat: Assume user:pass url params are basic auth
+ url, basic_auth_header = extract_basic_auth(req.url)
+ if basic_auth_header:
+ req.headers['Authorization'] = basic_auth_header
+ req.url = sanitize_url(url)
+
+ clean_proxies(proxies=req.proxies, headers=req.headers)
+ clean_headers(req.headers)
+
+ try:
+ return self._request_director.send(req)
+ except NoSupportingHandlers as e:
+ for ue in e.unsupported_errors:
+ if not (ue.handler and ue.msg):
+ continue
+ if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
+ raise RequestError(
+ 'file:// URLs are disabled by default in yt-dlp for security reasons. '
+ 'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
+ raise
+ except SSLError as e:
+ if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
+ raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
+ elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
+ raise RequestError(
+ 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
+ 'Try using --legacy-server-connect', cause=e) from e
+ raise
+ except HTTPError as e: # TODO: Remove in a future release
+ raise _CompatHTTPError(e) from e
+
+ def build_request_director(self, handlers):
+ logger = _YDLLogger(self)
+ headers = self.params.get('http_headers').copy()
+ proxies = self.proxies.copy()
+ clean_headers(headers)
+ clean_proxies(proxies, headers)
+
+ director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
+ for handler in handlers:
+ director.add_handler(handler(
+ logger=logger,
+ headers=headers,
+ cookiejar=self.cookiejar,
+ proxies=proxies,
+ prefer_system_certs='no-certifi' in self.params['compat_opts'],
+ verify=not self.params.get('nocheckcertificate'),
+ **traverse_obj(self.params, {
+ 'verbose': 'debug_printtraffic',
+ 'source_address': 'source_address',
+ 'timeout': 'socket_timeout',
+ 'legacy_ssl_support': 'legacy_server_connect',
+ 'enable_file_urls': 'enable_file_urls',
+ 'client_cert': {
+ 'client_certificate': 'client_certificate',
+ 'client_certificate_key': 'client_certificate_key',
+ 'client_certificate_password': 'client_certificate_password',
+ },
+ }),
+ ))
+ return director
def encode(self, s):
if isinstance(s, bytes):
@@ -4188,7 +4235,7 @@ class YoutubeDL:
else:
self.to_screen(f'[info] Downloading {thumb_display_id} ...')
try:
- uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
+ uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py
index c6c02541c..a41a80ebb 100644
--- a/yt_dlp/compat/__init__.py
+++ b/yt_dlp/compat/__init__.py
@@ -70,3 +70,13 @@ if compat_os_name in ('nt', 'ce'):
return userhome + path[i:]
else:
compat_expanduser = os.path.expanduser
+
+
+def urllib_req_to_req(urllib_request):
+ """Convert urllib Request to a networking Request"""
+ from ..networking import Request
+ from ..utils.networking import HTTPHeaderDict
+ return Request(
+ urllib_request.get_full_url(), data=urllib_request.data, method=urllib_request.get_method(),
+ headers=HTTPHeaderDict(urllib_request.headers, urllib_request.unredirected_hdrs),
+ extensions={'timeout': urllib_request.timeout} if hasattr(urllib_request, 'timeout') else None)
diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py
index 7c5daea85..45d094721 100644
--- a/yt_dlp/downloader/http.py
+++ b/yt_dlp/downloader/http.py
@@ -1,12 +1,10 @@
-import http.client
import os
import random
-import socket
-import ssl
import time
import urllib.error
from .common import FileDownloader
+from ..networking.exceptions import CertificateVerifyError, TransportError
from ..utils import (
ContentTooShortError,
RetryManager,
@@ -21,14 +19,6 @@ from ..utils import (
write_xattr,
)
-RESPONSE_READ_EXCEPTIONS = (
- TimeoutError,
- socket.timeout, # compat: py < 3.10
- ConnectionError,
- ssl.SSLError,
- http.client.HTTPException
-)
-
class HttpFD(FileDownloader):
def real_download(self, filename, info_dict):
@@ -196,13 +186,9 @@ class HttpFD(FileDownloader):
# Unexpected HTTP error
raise
raise RetryDownload(err)
- except urllib.error.URLError as err:
- if isinstance(err.reason, ssl.CertificateError):
- raise
- raise RetryDownload(err)
- # In urllib.request.AbstractHTTPHandler, the response is partially read on request.
- # Any errors that occur during this will not be wrapped by URLError
- except RESPONSE_READ_EXCEPTIONS as err:
+ except CertificateVerifyError:
+ raise
+ except TransportError as err:
raise RetryDownload(err)
def close_stream():
@@ -258,7 +244,7 @@ class HttpFD(FileDownloader):
try:
# Download and write
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
- except RESPONSE_READ_EXCEPTIONS as err:
+ except TransportError as err:
retry(err)
byte_counter += len(data_block)
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index fe08839aa..63156d3ac 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -17,16 +17,22 @@ import subprocess
import sys
import time
import types
-import urllib.error
import urllib.parse
import urllib.request
import xml.etree.ElementTree
from ..compat import functools # isort: split
-from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name
+from ..compat import (
+ compat_etree_fromstring,
+ compat_expanduser,
+ compat_os_name,
+ urllib_req_to_req,
+)
from ..cookies import LenientSimpleCookie
from ..downloader.f4m import get_base_url, remove_encrypted_media
from ..downloader.hls import HlsFD
+from ..networking.common import HEADRequest, Request
+from ..networking.exceptions import network_exceptions
from ..utils import (
IDENTITY,
JSON_LD_RE,
@@ -35,7 +41,6 @@ from ..utils import (
FormatSorter,
GeoRestrictedError,
GeoUtils,
- HEADRequest,
LenientJSONDecoder,
Popen,
RegexNotFoundError,
@@ -61,7 +66,6 @@ from ..utils import (
js_to_json,
mimetype2ext,
netrc_from_content,
- network_exceptions,
orderedSet,
parse_bitrate,
parse_codecs,
@@ -71,7 +75,6 @@ from ..utils import (
parse_resolution,
sanitize_filename,
sanitize_url,
- sanitized_Request,
smuggle_url,
str_or_none,
str_to_int,
@@ -83,8 +86,6 @@ from ..utils import (
unescapeHTML,
unified_strdate,
unified_timestamp,
- update_Request,
- update_url_query,
url_basename,
url_or_none,
urlhandle_detect_ext,
@@ -797,10 +798,12 @@ class InfoExtractor:
def _create_request(self, url_or_request, data=None, headers=None, query=None):
if isinstance(url_or_request, urllib.request.Request):
- return update_Request(url_or_request, data=data, headers=headers, query=query)
- if query:
- url_or_request = update_url_query(url_or_request, query)
- return sanitized_Request(url_or_request, data, headers or {})
+ url_or_request = urllib_req_to_req(url_or_request)
+ elif not isinstance(url_or_request, Request):
+ url_or_request = Request(url_or_request)
+
+ url_or_request.update(data=data, headers=headers, query=query)
+ return url_or_request
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
"""
@@ -838,12 +841,7 @@ class InfoExtractor:
except network_exceptions as err:
if isinstance(err, urllib.error.HTTPError):
if self.__can_accept_status_code(err, expected_status):
- # Retain reference to error to prevent file object from
- # being closed before it can be read. Works around the
- # effects of <https://bugs.python.org/issue15002>
- # introduced in Python 3.4.1.
- err.fp._error = err
- return err.fp
+ return err.response
if errnote is False:
return False
diff --git a/yt_dlp/networking/__init__.py b/yt_dlp/networking/__init__.py
index e69de29bb..5e8876484 100644
--- a/yt_dlp/networking/__init__.py
+++ b/yt_dlp/networking/__init__.py
@@ -0,0 +1,13 @@
+# flake8: noqa: 401
+from .common import (
+ HEADRequest,
+ PUTRequest,
+ Request,
+ RequestDirector,
+ RequestHandler,
+ Response,
+)
+
+# isort: split
+# TODO: all request handlers should be safely imported
+from . import _urllib
diff --git a/yt_dlp/networking/_helper.py b/yt_dlp/networking/_helper.py
index 367f3f444..a43c57bb4 100644
--- a/yt_dlp/networking/_helper.py
+++ b/yt_dlp/networking/_helper.py
@@ -1,13 +1,22 @@
from __future__ import annotations
import contextlib
+import functools
import ssl
import sys
+import typing
import urllib.parse
+import urllib.request
+from .exceptions import RequestError, UnsupportedRequest
from ..dependencies import certifi
from ..socks import ProxyType
-from ..utils import YoutubeDLError
+from ..utils import format_field, traverse_obj
+
+if typing.TYPE_CHECKING:
+ from collections.abc import Iterable
+
+ from ..utils.networking import HTTPHeaderDict
def ssl_load_certs(context: ssl.SSLContext, use_certifi=True):
@@ -23,11 +32,11 @@ def ssl_load_certs(context: ssl.SSLContext, use_certifi=True):
# enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
for storename in ('CA', 'ROOT'):
- _ssl_load_windows_store_certs(context, storename)
+ ssl_load_windows_store_certs(context, storename)
context.set_default_verify_paths()
-def _ssl_load_windows_store_certs(ssl_context, storename):
+def ssl_load_windows_store_certs(ssl_context, storename):
# Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
try:
certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
@@ -44,10 +53,18 @@ def make_socks_proxy_opts(socks_proxy):
url_components = urllib.parse.urlparse(socks_proxy)
if url_components.scheme.lower() == 'socks5':
socks_type = ProxyType.SOCKS5
- elif url_components.scheme.lower() in ('socks', 'socks4'):
+ rdns = False
+ elif url_components.scheme.lower() == 'socks5h':
+ socks_type = ProxyType.SOCKS5
+ rdns = True
+ elif url_components.scheme.lower() == 'socks4':
socks_type = ProxyType.SOCKS4
+ rdns = False
elif url_components.scheme.lower() == 'socks4a':
socks_type = ProxyType.SOCKS4A
+ rdns = True
+ else:
+ raise ValueError(f'Unknown SOCKS proxy version: {url_components.scheme.lower()}')
def unquote_if_non_empty(s):
if not s:
@@ -57,12 +74,25 @@ def make_socks_proxy_opts(socks_proxy):
'proxytype': socks_type,
'addr': url_components.hostname,
'port': url_components.port or 1080,
- 'rdns': True,
+ 'rdns': rdns,
'username': unquote_if_non_empty(url_components.username),
'password': unquote_if_non_empty(url_components.password),
}
+def select_proxy(url, proxies):
+ """Unified proxy selector for all backends"""
+ url_components = urllib.parse.urlparse(url)
+ if 'no' in proxies:
+ hostport = url_components.hostname + format_field(url_components.port, None, ':%s')
+ if urllib.request.proxy_bypass_environment(hostport, {'no': proxies['no']}):
+ return
+ elif urllib.request.proxy_bypass(hostport): # check system settings
+ return
+
+ return traverse_obj(proxies, url_components.scheme or 'http', 'all')
+
+
def get_redirect_method(method, status):
"""Unified redirect method handling"""
@@ -126,14 +156,53 @@ def make_ssl_context(
client_certificate, keyfile=client_certificate_key,
password=client_certificate_password)
except ssl.SSLError:
- raise YoutubeDLError('Unable to load client certificate')
+ raise RequestError('Unable to load client certificate')
+ if getattr(context, 'post_handshake_auth', None) is not None:
+ context.post_handshake_auth = True
return context
-def add_accept_encoding_header(headers, supported_encodings):
- if supported_encodings and 'Accept-Encoding' not in headers:
- headers['Accept-Encoding'] = ', '.join(supported_encodings)
+class InstanceStoreMixin:
+ def __init__(self, **kwargs):
+ self.__instances = []
+ super().__init__(**kwargs) # So that both MRO works
+
+ @staticmethod
+ def _create_instance(**kwargs):
+ raise NotImplementedError
- elif 'Accept-Encoding' not in headers:
- headers['Accept-Encoding'] = 'identity'
+ def _get_instance(self, **kwargs):
+ for key, instance in self.__instances:
+ if key == kwargs:
+ return instance
+
+ instance = self._create_instance(**kwargs)
+ self.__instances.append((kwargs, instance))
+ return instance
+
+ def _close_instance(self, instance):
+ if callable(getattr(instance, 'close', None)):
+ instance.close()
+
+ def _clear_instances(self):
+ for _, instance in self.__instances:
+ self._close_instance(instance)
+ self.__instances.clear()
+
+
+def add_accept_encoding_header(headers: HTTPHeaderDict, supported_encodings: Iterable[str]):
+ if 'Accept-Encoding' not in headers:
+ headers['Accept-Encoding'] = ', '.join(supported_encodings) or 'identity'
+
+
+def wrap_request_errors(func):
+ @functools.wraps(func)
+ def wrapper(self, *args, **kwargs):
+ try:
+ return func(self, *args, **kwargs)
+ except UnsupportedRequest as e:
+ if e.handler is None:
+ e.handler = self
+ raise
+ return wrapper
diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py
index 1f5871ae6..2c5f09872 100644
--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
import functools
import gzip
import http.client
@@ -9,26 +11,48 @@ import urllib.parse
import urllib.request
import urllib.response
import zlib
+from urllib.request import (
+ DataHandler,
+ FileHandler,
+ FTPHandler,
+ HTTPCookieProcessor,
+ HTTPDefaultErrorHandler,
+ HTTPErrorProcessor,
+ UnknownHandler,
+)
from ._helper import (
+ InstanceStoreMixin,
add_accept_encoding_header,
get_redirect_method,
make_socks_proxy_opts,
+ select_proxy,
+)
+from .common import Features, RequestHandler, Response, register
+from .exceptions import (
+ CertificateVerifyError,
+ HTTPError,
+ IncompleteRead,
+ ProxyError,
+ RequestError,
+ SSLError,
+ TransportError,
)
from ..dependencies import brotli
+from ..socks import ProxyError as SocksProxyError
from ..socks import sockssocket
from ..utils import escape_url, update_url_query
-from ..utils.networking import clean_headers, std_headers
SUPPORTED_ENCODINGS = ['gzip', 'deflate']
+CONTENT_DECODE_ERRORS = [zlib.error, OSError]
if brotli:
SUPPORTED_ENCODINGS.append('br')
+ CONTENT_DECODE_ERRORS.append(brotli.error)
-def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
+def _create_http_connection(http_class, source_address, *args, **kwargs):
hc = http_class(*args, **kwargs)
- source_address = ydl_handler._params.get('source_address')
if source_address is not None:
# This is to workaround _create_connection() from socket where it will try all
@@ -73,7 +97,7 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
return hc
-class HTTPHandler(urllib.request.HTTPHandler):
+class HTTPHandler(urllib.request.AbstractHTTPHandler):
"""Handler for HTTP requests and responses.
This class, when installed with an OpenerDirector, automatically adds
@@ -88,21 +112,30 @@ class HTTPHandler(urllib.request.HTTPHandler):
public domain.
"""
- def __init__(self, params, *args, **kwargs):
- urllib.request.HTTPHandler.__init__(self, *args, **kwargs)
- self._params = params
-
- def http_open(self, req):
- conn_class = http.client.HTTPConnection
+ def __init__(self, context=None, source_address=None, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self._source_address = source_address
+ self._context = context
- socks_proxy = req.headers.get('Ytdl-socks-proxy')
+ @staticmethod
+ def _make_conn_class(base, req):
+ conn_class = base
+ socks_proxy = req.headers.pop('Ytdl-socks-proxy', None)
if socks_proxy:
conn_class = make_socks_conn_class(conn_class, socks_proxy)
- del req.headers['Ytdl-socks-proxy']
+ return conn_class
+ def http_open(self, req):
+ conn_class = self._make_conn_class(http.client.HTTPConnection, req)
return self.do_open(functools.partial(
- _create_http_connection, self, conn_class, False),
- req)
+ _create_http_connection, conn_class, self._source_address), req)
+
+ def https_open(self, req):
+ conn_class = self._make_conn_class(http.client.HTTPSConnection, req)
+ return self.do_open(
+ functools.partial(
+ _create_http_connection, conn_class, self._source_address),
+ req, context=self._context)
@staticmethod
def deflate(data):
@@ -152,14 +185,6 @@ class HTTPHandler(urllib.request.HTTPHandler):
if url != url_escaped:
req = update_Request(req, url=url_escaped)
- for h, v in self._params.get('http_headers', std_headers).items():
- # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
- # The dict keys are capitalized because of this bug by urllib
- if h.capitalize() not in req.headers:
- req.add_header(h, v)
-
- clean_headers(req.headers)
- add_accept_encoding_header(req.headers, SUPPORTED_ENCODINGS)
return super().do_request_(req)
def http_response(self, req, resp):
@@ -207,16 +232,12 @@ def make_socks_conn_class(base_class, socks_proxy):
def connect(self):
self.sock = sockssocket()
self.sock.setproxy(**proxy_args)
- if isinstance(self.timeout, (int, float)):
+ if type(self.timeout) in (int, float): # noqa: E721
self.sock.settimeout(self.timeout)
self.sock.connect((self.host, self.port))
if isinstance(self, http.client.HTTPSConnection):
- if hasattr(self, '_context'): # Python > 2.6
- self.sock = self._context.wrap_socket(
- self.sock, server_hostname=self.host)
- else:
- self.sock = ssl.wrap_socket(self.sock)
+ self.sock = self._context.wrap_socket(self.sock, server_hostname=self.host)
return SocksConnection
@@ -260,29 +281,25 @@ class RedirectHandler(urllib.request.HTTPRedirectHandler):
unverifiable=True, method=new_method, data=new_data)
-class ProxyHandler(urllib.request.ProxyHandler):
+class ProxyHandler(urllib.request.BaseHandler):
+ handler_order = 100
+
def __init__(self, proxies=None):
+ self.proxies = proxies
# Set default handlers
- for type in ('http', 'https'):
- setattr(self, '%s_open' % type,
- lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
- meth(r, proxy, type))
- urllib.request.ProxyHandler.__init__(self, proxies)
-
- def proxy_open(self, req, proxy, type):
- req_proxy = req.headers.get('Ytdl-request-proxy')
- if req_proxy is not None:
- proxy = req_proxy
- del req.headers['Ytdl-request-proxy']
-
- if proxy == '__noproxy__':
- return None # No Proxy
- if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
+ for type in ('http', 'https', 'ftp'):
+ setattr(self, '%s_open' % type, lambda r, meth=self.proxy_open: meth(r))
+
+ def proxy_open(self, req):
+ proxy = select_proxy(req.get_full_url(), self.proxies)
+ if proxy is None:
+ return
+ if urllib.parse.urlparse(proxy).scheme.lower() in ('socks4', 'socks4a', 'socks5', 'socks5h'):
req.add_header('Ytdl-socks-proxy', proxy)
# yt-dlp's http/https handlers do wrapping the socket with socks
return None
return urllib.request.ProxyHandler.proxy_open(
- self, req, proxy, type)
+ self, req, proxy, None)
class PUTRequest(urllib.request.Request):
@@ -313,3 +330,129 @@ def update_Request(req, url=None, data=None, headers=None, query=None):
if hasattr(req, 'timeout'):
new_req.timeout = req.timeout
return new_req
+
+
+class UrllibResponseAdapter(Response):
+ """
+ HTTP Response adapter class for urllib addinfourl and http.client.HTTPResponse
+ """
+
+ def __init__(self, res: http.client.HTTPResponse | urllib.response.addinfourl):
+ # addinfourl: In Python 3.9+, .status was introduced and .getcode() was deprecated [1]
+ # HTTPResponse: .getcode() was deprecated, .status always existed [2]
+ # 1. https://docs.python.org/3/library/urllib.request.html#urllib.response.addinfourl.getcode
+ # 2. https://docs.python.org/3.10/library/http.client.html#http.client.HTTPResponse.status
+ super().__init__(
+ fp=res, headers=res.headers, url=res.url,
+ status=getattr(res, 'status', None) or res.getcode(), reason=getattr(res, 'reason', None))
+
+ def read(self, amt=None):
+ try:
+ return self.fp.read(amt)
+ except Exception as e:
+ handle_response_read_exceptions(e)
+ raise e
+
+
+def handle_sslerror(e: ssl.SSLError):
+ if not isinstance(e, ssl.SSLError):
+ return
+ if isinstance(e, ssl.SSLCertVerificationError):
+ raise CertificateVerifyError(cause=e) from e
+ raise SSLError(cause=e) from e
+
+
+def handle_response_read_exceptions(e):
+ if isinstance(e, http.client.IncompleteRead):
+ raise IncompleteRead(partial=e.partial, cause=e, expected=e.expected) from e
+ elif isinstance(e, ssl.SSLError):
+ handle_sslerror(e)
+ elif isinstance(e, (OSError, EOFError, http.client.HTTPException, *CONTENT_DECODE_ERRORS)):
+ # OSErrors raised here should mostly be network related
+ raise TransportError(cause=e) from e
+
+
+@register
+class UrllibRH(RequestHandler, InstanceStoreMixin):
+ _SUPPORTED_URL_SCHEMES = ('http', 'https', 'data', 'ftp')
+ _SUPPORTED_PROXY_SCHEMES = ('http', 'socks4', 'socks4a', 'socks5', 'socks5h')
+ _SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
+ RH_NAME = 'urllib'
+
+ def __init__(self, *, enable_file_urls: bool = False, **kwargs):
+ super().__init__(**kwargs)
+ self.enable_file_urls = enable_file_urls
+ if self.enable_file_urls:
+ self._SUPPORTED_URL_SCHEMES = (*self._SUPPORTED_URL_SCHEMES, 'file')
+
+ def _create_instance(self, proxies, cookiejar):
+ opener = urllib.request.OpenerDirector()
+ handlers = [
+ ProxyHandler(proxies),
+ HTTPHandler(
+ debuglevel=int(bool(self.verbose)),
+ context=self._make_sslcontext(),
+ source_address=self.source_address),
+ HTTPCookieProcessor(cookiejar),
+ DataHandler(),
+ UnknownHandler(),
+ HTTPDefaultErrorHandler(),
+ FTPHandler(),
+ HTTPErrorProcessor(),
+ RedirectHandler(),
+ ]
+
+ if self.enable_file_urls:
+ handlers.append(FileHandler())
+
+ for handler in handlers:
+ opener.add_handler(handler)
+
+ # Delete the default user-agent header, which would otherwise apply in
+ # cases where our custom HTTP handler doesn't come into play
+ # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
+ opener.addheaders = []
+ return opener
+
+ def _send(self, request):
+ headers = self._merge_headers(request.headers)
+ add_accept_encoding_header(headers, SUPPORTED_ENCODINGS)
+ urllib_req = urllib.request.Request(
+ url=request.url,
+ data=request.data,
+ headers=dict(headers),
+ method=request.method
+ )
+
+ opener = self._get_instance(
+ proxies=request.proxies or self.proxies,
+ cookiejar=request.extensions.get('cookiejar') or self.cookiejar
+ )
+ try:
+ res = opener.open(urllib_req, timeout=float(request.extensions.get('timeout') or self.timeout))
+ except urllib.error.HTTPError as e:
+ if isinstance(e.fp, (http.client.HTTPResponse, urllib.response.addinfourl)):
+ # Prevent file object from being closed when urllib.error.HTTPError is destroyed.
+ e._closer.file = None
+ raise HTTPError(UrllibResponseAdapter(e.fp), redirect_loop='redirect error' in str(e)) from e
+ raise # unexpected
+ except urllib.error.URLError as e:
+ cause = e.reason # NOTE: cause may be a string
+
+ # proxy errors
+ if 'tunnel connection failed' in str(cause).lower() or isinstance(cause, SocksProxyError):
+ raise ProxyError(cause=e) from e
+
+ handle_response_read_exceptions(cause)
+ raise TransportError(cause=e) from e
+ except (http.client.InvalidURL, ValueError) as e:
+ # Validation errors
+ # http.client.HTTPConnection raises ValueError in some validation cases
+ # such as if request method contains illegal control characters [1]
+ # 1. https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
+ raise RequestError(cause=e) from e
+ except Exception as e:
+ handle_response_read_exceptions(e)
+ raise # unexpected
+
+ return UrllibResponseAdapter(res)
diff --git a/yt_dlp/networking/common.py b/yt_dlp/networking/common.py
new file mode 100644
index 000000000..e4b362827
--- /dev/null
+++ b/yt_dlp/networking/common.py
@@ -0,0 +1,522 @@
+from __future__ import annotations
+
+import abc
+import copy
+import enum
+import functools
+import io
+import typing
+import urllib.parse
+import urllib.request
+import urllib.response
+from collections.abc import Iterable, Mapping
+from email.message import Message
+from http import HTTPStatus
+from http.cookiejar import CookieJar
+
+from ._helper import make_ssl_context, wrap_request_errors
+from .exceptions import (
+ NoSupportingHandlers,
+ RequestError,
+ TransportError,
+ UnsupportedRequest,
+)
+from ..utils import (
+ bug_reports_message,
+ classproperty,
+ error_to_str,
+ escape_url,
+ update_url_query,
+)
+from ..utils.networking import HTTPHeaderDict
+
+if typing.TYPE_CHECKING:
+ RequestData = bytes | Iterable[bytes] | typing.IO | None
+
+
+class RequestDirector:
+ """RequestDirector class
+
+ Helper class that, when given a request, forward it to a RequestHandler that supports it.
+
+ @param logger: Logger instance.
+ @param verbose: Print debug request information to stdout.
+ """
+
+ def __init__(self, logger, verbose=False):
+ self.handlers: dict[str, RequestHandler] = {}
+ self.logger = logger # TODO(Grub4k): default logger
+ self.verbose = verbose
+
+ def close(self):
+ for handler in self.handlers.values():
+ handler.close()
+
+ def add_handler(self, handler: RequestHandler):
+ """Add a handler. If a handler of the same RH_KEY exists, it will overwrite it"""
+ assert isinstance(handler, RequestHandler), 'handler must be a RequestHandler'
+ self.handlers[handler.RH_KEY] = handler
+
+ def _print_verbose(self, msg):
+ if self.verbose:
+ self.logger.stdout(f'director: {msg}')
+
+ def send(self, request: Request) -> Response:
+ """
+ Passes a request onto a suitable RequestHandler
+ """
+ if not self.handlers:
+ raise RequestError('No request handlers configured')
+
+ assert isinstance(request, Request)
+
+ unexpected_errors = []
+ unsupported_errors = []
+ # TODO (future): add a per-request preference system
+ for handler in reversed(list(self.handlers.values())):
+ self._print_verbose(f'Checking if "{handler.RH_NAME}" supports this request.')
+ try:
+ handler.validate(request)
+ except UnsupportedRequest as e:
+ self._print_verbose(
+ f'"{handler.RH_NAME}" cannot handle this request (reason: {error_to_str(e)})')
+ unsupported_errors.append(e)
+ continue
+
+ self._print_verbose(f'Sending request via "{handler.RH_NAME}"')
+ try:
+ response = handler.send(request)
+ except RequestError:
+ raise
+ except Exception as e:
+ self.logger.error(
+ f'[{handler.RH_NAME}] Unexpected error: {error_to_str(e)}{bug_reports_message()}',
+ is_error=False)
+ unexpected_errors.append(e)
+ continue
+
+ assert isinstance(response, Response)
+ return response
+
+ raise NoSupportingHandlers(unsupported_errors, unexpected_errors)
+
+
+_REQUEST_HANDLERS = {}
+
+
+def register(handler):
+ """Register a RequestHandler class"""
+ assert issubclass(handler, RequestHandler), f'{handler} must be a subclass of RequestHandler'
+ assert handler.RH_KEY not in _REQUEST_HANDLERS, f'RequestHandler {handler.RH_KEY} already registered'
+ _REQUEST_HANDLERS[handler.RH_KEY] = handler
+ return handler
+
+
+class Features(enum.Enum):
+ ALL_PROXY = enum.auto()
+ NO_PROXY = enum.auto()
+
+
+class RequestHandler(abc.ABC):
+
+ """Request Handler class
+
+ Request handlers are class that, given a Request,
+ process the request from start to finish and return a Response.
+
+ Concrete subclasses need to redefine the _send(request) method,
+ which handles the underlying request logic and returns a Response.
+
+ RH_NAME class variable may contain a display name for the RequestHandler.
+ By default, this is generated from the class name.
+
+ The concrete request handler MUST have "RH" as the suffix in the class name.
+
+ All exceptions raised by a RequestHandler should be an instance of RequestError.
+ Any other exception raised will be treated as a handler issue.
+
+ If a Request is not supported by the handler, an UnsupportedRequest
+ should be raised with a reason.
+
+ By default, some checks are done on the request in _validate() based on the following class variables:
+ - `_SUPPORTED_URL_SCHEMES`: a tuple of supported url schemes.
+ Any Request with an url scheme not in this list will raise an UnsupportedRequest.
+
+ - `_SUPPORTED_PROXY_SCHEMES`: a tuple of support proxy url schemes. Any Request that contains
+ a proxy url with an url scheme not in this list will raise an UnsupportedRequest.
+
+ - `_SUPPORTED_FEATURES`: a tuple of supported features, as defined in Features enum.
+ The above may be set to None to disable the checks.
+
+ Parameters:
+ @param logger: logger instance
+ @param headers: HTTP Headers to include when sending requests.
+ @param cookiejar: Cookiejar to use for requests.
+ @param timeout: Socket timeout to use when sending requests.
+ @param proxies: Proxies to use for sending requests.
+ @param source_address: Client-side IP address to bind to for requests.
+ @param verbose: Print debug request and traffic information to stdout.
+ @param prefer_system_certs: Whether to prefer system certificates over other means (e.g. certifi).
+ @param client_cert: SSL client certificate configuration.
+ dict with {client_certificate, client_certificate_key, client_certificate_password}
+ @param verify: Verify SSL certificates
+ @param legacy_ssl_support: Enable legacy SSL options such as legacy server connect and older cipher support.
+
+ Some configuration options may be available for individual Requests too. In this case,
+ either the Request configuration option takes precedence or they are merged.
+
+ Requests may have additional optional parameters defined as extensions.
+ RequestHandler subclasses may choose to support custom extensions.
+
+ The following extensions are defined for RequestHandler:
+ - `cookiejar`: Cookiejar to use for this request
+ - `timeout`: socket timeout to use for this request
+
+ Apart from the url protocol, proxies dict may contain the following keys:
+ - `all`: proxy to use for all protocols. Used as a fallback if no proxy is set for a specific protocol.
+ - `no`: comma seperated list of hostnames (optionally with port) to not use a proxy for.
+ Note: a RequestHandler may not support these, as defined in `_SUPPORTED_FEATURES`.
+
+ """
+
+ _SUPPORTED_URL_SCHEMES = ()
+ _SUPPORTED_PROXY_SCHEMES = ()
+ _SUPPORTED_FEATURES = ()
+
+ def __init__(
+ self, *,
+ logger, # TODO(Grub4k): default logger
+ headers: HTTPHeaderDict = None,
+ cookiejar: CookieJar = None,
+ timeout: float | int | None = None,
+ proxies: dict = None,
+ source_address: str = None,
+ verbose: bool = False,
+ prefer_system_certs: bool = False,
+ client_cert: dict[str, str | None] = None,
+ verify: bool = True,
+ legacy_ssl_support: bool = False,
+ **_,
+ ):
+
+ self._logger = logger
+ self.headers = headers or {}
+ self.cookiejar = cookiejar if cookiejar is not None else CookieJar()
+ self.timeout = float(timeout or 20)
+ self.proxies = proxies or {}
+ self.source_address = source_address
+ self.verbose = verbose
+ self.prefer_system_certs = prefer_system_certs
+ self._client_cert = client_cert or {}
+ self.verify = verify
+ self.legacy_ssl_support = legacy_ssl_support
+ super().__init__()
+
+ def _make_sslcontext(self):
+ return make_ssl_context(
+ verify=self.verify,
+ legacy_support=self.legacy_ssl_support,
+ use_certifi=not self.prefer_system_certs,
+ **self._client_cert,
+ )
+
+ def _merge_headers(self, request_headers):
+ return HTTPHeaderDict(self.headers, request_headers)
+
+ def _check_url_scheme(self, request: Request):
+ scheme = urllib.parse.urlparse(request.url).scheme.lower()
+ if self._SUPPORTED_URL_SCHEMES is not None and scheme not in self._SUPPORTED_URL_SCHEMES:
+ raise UnsupportedRequest(f'Unsupported url scheme: "{scheme}"')
+ return scheme # for further processing
+
+ def _check_proxies(self, proxies):
+ for proxy_key, proxy_url in proxies.items():
+ if proxy_url is None:
+ continue
+ if proxy_key == 'no':
+ if self._SUPPORTED_FEATURES is not None and Features.NO_PROXY not in self._SUPPORTED_FEATURES:
+ raise UnsupportedRequest('"no" proxy is not supported')
+ continue
+ if (
+ proxy_key == 'all'
+ and self._SUPPORTED_FEATURES is not None
+ and Features.ALL_PROXY not in self._SUPPORTED_FEATURES
+ ):
+ raise UnsupportedRequest('"all" proxy is not supported')
+
+ # Unlikely this handler will use this proxy, so ignore.
+ # This is to allow a case where a proxy may be set for a protocol
+ # for one handler in which such protocol (and proxy) is not supported by another handler.
+ if self._SUPPORTED_URL_SCHEMES is not None and proxy_key not in (*self._SUPPORTED_URL_SCHEMES, 'all'):
+ continue
+
+ if self._SUPPORTED_PROXY_SCHEMES is None:
+ # Skip proxy scheme checks
+ continue
+
+ # Scheme-less proxies are not supported
+ if urllib.request._parse_proxy(proxy_url)[0] is None:
+ raise UnsupportedRequest(f'Proxy "{proxy_url}" missing scheme')
+
+ scheme = urllib.parse.urlparse(proxy_url).scheme.lower()
+ if scheme not in self._SUPPORTED_PROXY_SCHEMES:
+ raise UnsupportedRequest(f'Unsupported proxy type: "{scheme}"')
+
+ def _check_cookiejar_extension(self, extensions):
+ if not extensions.get('cookiejar'):
+ return
+ if not isinstance(extensions['cookiejar'], CookieJar):
+ raise UnsupportedRequest('cookiejar is not a CookieJar')
+
+ def _check_timeout_extension(self, extensions):
+ if extensions.get('timeout') is None:
+ return
+ if not isinstance(extensions['timeout'], (float, int)):
+ raise UnsupportedRequest('timeout is not a float or int')
+
+ def _check_extensions(self, extensions):
+ self._check_cookiejar_extension(extensions)
+ self._check_timeout_extension(extensions)
+
+ def _validate(self, request):
+ self._check_url_scheme(request)
+ self._check_proxies(request.proxies or self.proxies)
+ self._check_extensions(request.extensions)
+
+ @wrap_request_errors
+ def validate(self, request: Request):
+ if not isinstance(request, Request):
+ raise TypeError('Expected an instance of Request')
+ self._validate(request)
+
+ @wrap_request_errors
+ def send(self, request: Request) -> Response:
+ if not isinstance(request, Request):
+ raise TypeError('Expected an instance of Request')
+ return self._send(request)
+
+ @abc.abstractmethod
+ def _send(self, request: Request):
+ """Handle a request from start to finish. Redefine in subclasses."""
+
+ def close(self):
+ pass
+
+ @classproperty
+ def RH_NAME(cls):
+ return cls.__name__[:-2]
+
+ @classproperty
+ def RH_KEY(cls):
+ assert cls.__name__.endswith('RH'), 'RequestHandler class names must end with "RH"'
+ return cls.__name__[:-2]
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args):
+ self.close()
+
+
+class Request:
+ """
+ Represents a request to be made.
+ Partially backwards-compatible with urllib.request.Request.
+
+ @param url: url to send. Will be sanitized.
+ @param data: payload data to send. Must be bytes, iterable of bytes, a file-like object or None
+ @param headers: headers to send.
+ @param proxies: proxy dict mapping of proto:proxy to use for the request and any redirects.
+ @param query: URL query parameters to update the url with.
+ @param method: HTTP method to use. If no method specified, will use POST if payload data is present else GET
+ @param extensions: Dictionary of Request extensions to add, as supported by handlers.
+ """
+
+ def __init__(
+ self,
+ url: str,
+ data: RequestData = None,
+ headers: typing.Mapping = None,
+ proxies: dict = None,
+ query: dict = None,
+ method: str = None,
+ extensions: dict = None
+ ):
+
+ self._headers = HTTPHeaderDict()
+ self._data = None
+
+ if query:
+ url = update_url_query(url, query)
+
+ self.url = url
+ self.method = method
+ if headers:
+ self.headers = headers
+ self.data = data # note: must be done after setting headers
+ self.proxies = proxies or {}
+ self.extensions = extensions or {}
+
+ @property
+ def url(self):
+ return self._url
+
+ @url.setter
+ def url(self, url):
+ if not isinstance(url, str):
+ raise TypeError('url must be a string')
+ elif url.startswith('//'):
+ url = 'http:' + url
+ self._url = escape_url(url)
+
+ @property
+ def method(self):
+ return self._method or ('POST' if self.data is not None else 'GET')
+
+ @method.setter
+ def method(self, method):
+ if method is None:
+ self._method = None
+ elif isinstance(method, str):
+ self._method = method.upper()
+ else:
+ raise TypeError('method must be a string')
+
+ @property
+ def data(self):
+ return self._data
+
+ @data.setter
+ def data(self, data: RequestData):
+ # Try catch some common mistakes
+ if data is not None and (
+ not isinstance(data, (bytes, io.IOBase, Iterable)) or isinstance(data, (str, Mapping))
+ ):
+ raise TypeError('data must be bytes, iterable of bytes, or a file-like object')
+
+ if data == self._data and self._data is None:
+ self.headers.pop('Content-Length', None)
+
+ # https://docs.python.org/3/library/urllib.request.html#urllib.request.Request.data
+ if data != self._data:
+ if self._data is not None:
+ self.headers.pop('Content-Length', None)
+ self._data = data
+
+ if self._data is None:
+ self.headers.pop('Content-Type', None)
+
+ if 'Content-Type' not in self.headers and self._data is not None:
+ self.headers['Content-Type'] = 'application/x-www-form-urlencoded'
+
+ @property
+ def headers(self) -> HTTPHeaderDict:
+ return self._headers
+
+ @headers.setter
+ def headers(self, new_headers: Mapping):
+ """Replaces headers of the request. If not a CaseInsensitiveDict, it will be converted to one."""
+ if isinstance(new_headers, HTTPHeaderDict):
+ self._headers = new_headers
+ elif isinstance(new_headers, Mapping):
+ self._headers = HTTPHeaderDict(new_headers)
+ else:
+ raise TypeError('headers must be a mapping')
+
+ def update(self, url=None, data=None, headers=None, query=None):
+ self.data = data or self.data
+ self.headers.update(headers or {})
+ self.url = update_url_query(url or self.url, query or {})
+
+ def copy(self):
+ return self.__class__(
+ url=self.url,
+ headers=copy.deepcopy(self.headers),
+ proxies=copy.deepcopy(self.proxies),
+ data=self._data,
+ extensions=copy.copy(self.extensions),
+ method=self._method,
+ )
+
+
+HEADRequest = functools.partial(Request, method='HEAD')
+PUTRequest = functools.partial(Request, method='PUT')
+
+
+class Response(io.IOBase):
+ """
+ Base class for HTTP response adapters.
+
+ By default, it provides a basic wrapper for a file-like response object.
+
+ Interface partially backwards-compatible with addinfourl and http.client.HTTPResponse.
+
+ @param fp: Original, file-like, response.
+ @param url: URL that this is a response of.
+ @param headers: response headers.
+ @param status: Response HTTP status code. Default is 200 OK.
+ @param reason: HTTP status reason. Will use built-in reasons based on status code if not provided.
+ """
+
+ def __init__(
+ self,
+ fp: typing.IO,
+ url: str,
+ headers: Mapping[str, str],
+ status: int = 200,
+ reason: str = None):
+
+ self.fp = fp
+ self.headers = Message()
+ for name, value in headers.items():
+ self.headers.add_header(name, value)
+ self.status = status
+ self.url = url
+ try:
+ self.reason = reason or HTTPStatus(status).phrase
+ except ValueError:
+ self.reason = None
+
+ def readable(self):
+ return self.fp.readable()
+
+ def read(self, amt: int = None) -> bytes:
+ # Expected errors raised here should be of type RequestError or subclasses.
+ # Subclasses should redefine this method with more precise error handling.
+ try:
+ return self.fp.read(amt)
+ except Exception as e:
+ raise TransportError(cause=e) from e
+
+ def close(self):
+ self.fp.close()
+ return super().close()
+
+ def get_header(self, name, default=None):
+ """Get header for name.
+ If there are multiple matching headers, return all seperated by comma."""
+ headers = self.headers.get_all(name)
+ if not headers:
+ return default
+ if name.title() == 'Set-Cookie':
+ # Special case, only get the first one
+ # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.3-4.1
+ return headers[0]
+ return ', '.join(headers)
+
+ # The following methods are for compatability reasons and are deprecated
+ @property
+ def code(self):
+ return self.status
+
+ def getcode(self):
+ return self.status
+
+ def geturl(self):
+ return self.url
+
+ def info(self):
+ return self.headers
+
+ def getheader(self, name, default=None):
+ return self.get_header(name, default)
diff --git a/yt_dlp/networking/exceptions.py b/yt_dlp/networking/exceptions.py
index 89b484a22..6fe8afb92 100644
--- a/yt_dlp/networking/exceptions.py
+++ b/yt_dlp/networking/exceptions.py
@@ -1,9 +1,197 @@
-import http.client
-import socket
-import ssl
+from __future__ import annotations
+
+import typing
import urllib.error
-network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
-if hasattr(ssl, 'CertificateError'):
- network_exceptions.append(ssl.CertificateError)
-network_exceptions = tuple(network_exceptions)
+from ..utils import YoutubeDLError
+
+if typing.TYPE_CHECKING:
+ from .common import RequestHandler, Response
+
+
+class RequestError(YoutubeDLError):
+ def __init__(
+ self,
+ msg: str | None = None,
+ cause: Exception | str | None = None,
+ handler: RequestHandler = None
+ ):
+ self.handler = handler
+ self.cause = cause
+ if not msg and cause:
+ msg = str(cause)
+ super().__init__(msg)
+
+
+class UnsupportedRequest(RequestError):
+ """raised when a handler cannot handle a request"""
+ pass
+
+
+class NoSupportingHandlers(RequestError):
+ """raised when no handlers can support a request for various reasons"""
+
+ def __init__(self, unsupported_errors: list[UnsupportedRequest], unexpected_errors: list[Exception]):
+ self.unsupported_errors = unsupported_errors or []
+ self.unexpected_errors = unexpected_errors or []
+
+ # Print a quick summary of the errors
+ err_handler_map = {}
+ for err in unsupported_errors:
+ err_handler_map.setdefault(err.msg, []).append(err.handler.RH_NAME)
+
+ reason_str = ', '.join([f'{msg} ({", ".join(handlers)})' for msg, handlers in err_handler_map.items()])
+ if unexpected_errors:
+ reason_str = ' + '.join(filter(None, [reason_str, f'{len(unexpected_errors)} unexpected error(s)']))
+
+ err_str = 'Unable to handle request'
+ if reason_str:
+ err_str += f': {reason_str}'
+
+ super().__init__(msg=err_str)
+
+
+class TransportError(RequestError):
+ """Network related errors"""
+
+
+class HTTPError(RequestError):
+ def __init__(self, response: Response, redirect_loop=False):
+ self.response = response
+ self.status = response.status
+ self.reason = response.reason
+ self.redirect_loop = redirect_loop
+ msg = f'HTTP Error {response.status}: {response.reason}'
+ if redirect_loop:
+ msg += ' (redirect loop detected)'
+
+ super().__init__(msg=msg)
+
+ def close(self):
+ self.response.close()
+
+ def __repr__(self):
+ return f'<HTTPError {self.status}: {self.reason}>'
+
+
+class IncompleteRead(TransportError):
+ def __init__(self, partial, expected=None, **kwargs):
+ self.partial = partial
+ self.expected = expected
+ msg = f'{len(partial)} bytes read'
+ if expected is not None:
+ msg += f', {expected} more expected'
+
+ super().__init__(msg=msg, **kwargs)
+
+ def __repr__(self):
+ return f'<IncompleteRead: {self.msg}>'
+
+
+class SSLError(TransportError):
+ pass
+
+
+class CertificateVerifyError(SSLError):
+ """Raised when certificate validated has failed"""
+ pass
+
+
+class ProxyError(TransportError):
+ pass
+
+
+class _CompatHTTPError(urllib.error.HTTPError, HTTPError):
+ """
+ Provides backwards compatibility with urllib.error.HTTPError.
+ Do not use this class directly, use HTTPError instead.
+ """
+
+ def __init__(self, http_error: HTTPError):
+ super().__init__(
+ url=http_error.response.url,
+ code=http_error.status,
+ msg=http_error.msg,
+ hdrs=http_error.response.headers,
+ fp=http_error.response
+ )
+ self._closer.file = None # Disable auto close
+ self._http_error = http_error
+ HTTPError.__init__(self, http_error.response, redirect_loop=http_error.redirect_loop)
+
+ @property
+ def status(self):
+ return self._http_error.status
+
+ @status.setter
+ def status(self, value):
+ return
+
+ @property
+ def reason(self):
+ return self._http_error.reason
+
+ @reason.setter
+ def reason(self, value):
+ return
+
+ @property
+ def headers(self):
+ return self._http_error.response.headers
+
+ @headers.setter
+ def headers(self, value):
+ return
+
+ def info(self):
+ return self.response.headers
+
+ def getcode(self):
+ return self.status
+
+ def geturl(self):
+ return self.response.url
+
+ @property
+ def code(self):
+ return self.status
+
+ @code.setter
+ def code(self, value):
+ return
+
+ @property
+ def url(self):
+ return self.response.url
+
+ @url.setter
+ def url(self, value):
+ return
+
+ @property
+ def hdrs(self):
+ return self.response.headers
+
+ @hdrs.setter
+ def hdrs(self, value):
+ return
+
+ @property
+ def filename(self):
+ return self.response.url
+
+ @filename.setter
+ def filename(self, value):
+ return
+
+ def __getattr__(self, name):
+ return super().__getattr__(name)
+
+ def __str__(self):
+ return str(self._http_error)
+
+ def __repr__(self):
+ return repr(self._http_error)
+
+
+network_exceptions = (HTTPError, TransportError)
diff --git a/yt_dlp/utils/_deprecated.py b/yt_dlp/utils/_deprecated.py
index ca0fb1614..e55d42354 100644
--- a/yt_dlp/utils/_deprecated.py
+++ b/yt_dlp/utils/_deprecated.py
@@ -10,16 +10,16 @@ del passthrough_module
from ._utils import preferredencoding
+from ..networking._urllib import HTTPHandler
# isort: split
+from .networking import random_user_agent, std_headers # noqa: F401
from ..networking._urllib import PUTRequest # noqa: F401
from ..networking._urllib import SUPPORTED_ENCODINGS, HEADRequest # noqa: F401
-from ..networking._urllib import HTTPHandler as YoutubeDLHandler # noqa: F401
from ..networking._urllib import ProxyHandler as PerRequestProxyHandler # noqa: F401
from ..networking._urllib import RedirectHandler as YoutubeDLRedirectHandler # noqa: F401
from ..networking._urllib import make_socks_conn_class, update_Request # noqa: F401
from ..networking.exceptions import network_exceptions # noqa: F401
-from .networking import random_user_agent, std_headers # noqa: F401
def encodeFilename(s, for_subprocess=False):
@@ -47,3 +47,12 @@ def decodeOption(optval):
def error_to_compat_str(err):
return str(err)
+
+
+class YoutubeDLHandler(HTTPHandler):
+ def __init__(self, params, *args, **kwargs):
+ self._params = params
+ super().__init__(*args, **kwargs)
+
+
+YoutubeDLHTTPSHandler = YoutubeDLHandler
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index d5704cadc..d0e328716 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -15,8 +15,6 @@ import hashlib
import hmac
import html.entities
import html.parser
-import http.client
-import http.cookiejar
import inspect
import io
import itertools
@@ -897,6 +895,7 @@ def formatSeconds(secs, delim=':', msec=False):
def make_HTTPS_handler(params, **kwargs):
+ from ._deprecated import YoutubeDLHTTPSHandler
from ..networking._helper import make_ssl_context
return YoutubeDLHTTPSHandler(params, context=make_ssl_context(
verify=not params.get('nocheckcertificate'),
@@ -1140,38 +1139,6 @@ class XAttrUnavailableError(YoutubeDLError):
pass
-class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
- def __init__(self, params, https_conn_class=None, *args, **kwargs):
- urllib.request.HTTPSHandler.__init__(self, *args, **kwargs)
- self._https_conn_class = https_conn_class or http.client.HTTPSConnection
- self._params = params
-
- def https_open(self, req):
- kwargs = {}
- conn_class = self._https_conn_class
-
- if hasattr(self, '_context'): # python > 2.6
- kwargs['context'] = self._context
- if hasattr(self, '_check_hostname'): # python 3.x
- kwargs['check_hostname'] = self._check_hostname
-
- socks_proxy = req.headers.get('Ytdl-socks-proxy')
- if socks_proxy:
- from ..networking._urllib import make_socks_conn_class
- conn_class = make_socks_conn_class(conn_class, socks_proxy)
- del req.headers['Ytdl-socks-proxy']
-
- from ..networking._urllib import _create_http_connection
- try:
- return self.do_open(
- functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
- except urllib.error.URLError as e:
- if (isinstance(e.reason, ssl.SSLError)
- and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
- raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
- raise
-
-
def is_path_like(f):
return isinstance(f, (str, bytes, os.PathLike))
diff --git a/yt_dlp/utils/networking.py b/yt_dlp/utils/networking.py
index 95b54fabe..ac355ddc8 100644
--- a/yt_dlp/utils/networking.py
+++ b/yt_dlp/utils/networking.py
@@ -1,4 +1,9 @@
+import collections
import random
+import urllib.parse
+import urllib.request
+
+from ._utils import remove_start
def random_user_agent():
@@ -46,15 +51,67 @@ def random_user_agent():
return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
-std_headers = {
+class HTTPHeaderDict(collections.UserDict, dict):
+ """
+ Store and access keys case-insensitively.
+ The constructor can take multiple dicts, in which keys in the latter are prioritised.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__()
+ for dct in args:
+ if dct is not None:
+ self.update(dct)
+ self.update(kwargs)
+
+ def __setitem__(self, key, value):
+ super().__setitem__(key.title(), str(value))
+
+ def __getitem__(self, key):
+ return super().__getitem__(key.title())
+
+ def __delitem__(self, key):
+ super().__delitem__(key.title())
+
+ def __contains__(self, key):
+ return super().__contains__(key.title() if isinstance(key, str) else key)
+
+
+std_headers = HTTPHeaderDict({
'User-Agent': random_user_agent(),
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-us,en;q=0.5',
'Sec-Fetch-Mode': 'navigate',
-}
+})
+
+
+def clean_proxies(proxies: dict, headers: HTTPHeaderDict):
+ req_proxy = headers.pop('Ytdl-Request-Proxy', None)
+ if req_proxy:
+ proxies.clear() # XXX: compat: Ytdl-Request-Proxy takes preference over everything, including NO_PROXY
+ proxies['all'] = req_proxy
+ for proxy_key, proxy_url in proxies.items():
+ if proxy_url == '__noproxy__':
+ proxies[proxy_key] = None
+ continue
+ if proxy_key == 'no': # special case
+ continue
+ if proxy_url is not None:
+ # Ensure proxies without a scheme are http.
+ proxy_scheme = urllib.request._parse_proxy(proxy_url)[0]
+ if proxy_scheme is None:
+ proxies[proxy_key] = 'http://' + remove_start(proxy_url, '//')
+
+ replace_scheme = {
+ 'socks5': 'socks5h', # compat: socks5 was treated as socks5h
+ 'socks': 'socks4' # compat: non-standard
+ }
+ if proxy_scheme in replace_scheme:
+ proxies[proxy_key] = urllib.parse.urlunparse(
+ urllib.parse.urlparse(proxy_url)._replace(scheme=replace_scheme[proxy_scheme]))
-def clean_headers(headers):
- if 'Youtubedl-no-compression' in headers: # compat
- del headers['Youtubedl-no-compression']
+def clean_headers(headers: HTTPHeaderDict):
+ if 'Youtubedl-No-Compression' in headers: # compat
+ del headers['Youtubedl-No-Compression']
headers['Accept-Encoding'] = 'identity'