aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/utils/networking.py
diff options
context:
space:
mode:
authorcoletdjnz <coletdjnz@protonmail.com>2023-07-15 15:55:23 +0530
committerpukkandan <pukkandan.ytdlp@gmail.com>2023-07-15 16:18:35 +0530
commit227bf1a33be7b89cd7d44ad046844c4ccba104f4 (patch)
tree72ffa5e9135e7e4869c351bd9dec6c0944cfba17 /yt_dlp/utils/networking.py
parentc365dba8430ee33abda85d31f95128605bf240eb (diff)
downloadhypervideo-pre-227bf1a33be7b89cd7d44ad046844c4ccba104f4.tar.lz
hypervideo-pre-227bf1a33be7b89cd7d44ad046844c4ccba104f4.tar.xz
hypervideo-pre-227bf1a33be7b89cd7d44ad046844c4ccba104f4.zip
[networking] Rewrite architecture (#2861)
New networking interface consists of a `RequestDirector` that directs each `Request` to appropriate `RequestHandler` and returns the `Response` or raises `RequestError`. The handlers define adapters to transform its internal Request/Response/Errors to our interfaces. User-facing changes: - Fix issues with per request proxies on redirects for urllib - Support for `ALL_PROXY` environment variable for proxy setting - Support for `socks5h` proxy - Closes https://github.com/yt-dlp/yt-dlp/issues/6325, https://github.com/ytdl-org/youtube-dl/issues/22618, https://github.com/ytdl-org/youtube-dl/pull/28093 - Raise error when using `https` proxy instead of silently converting it to `http` Authored by: coletdjnz
Diffstat (limited to 'yt_dlp/utils/networking.py')
-rw-r--r--yt_dlp/utils/networking.py67
1 files changed, 62 insertions, 5 deletions
diff --git a/yt_dlp/utils/networking.py b/yt_dlp/utils/networking.py
index 95b54fabe..ac355ddc8 100644
--- a/yt_dlp/utils/networking.py
+++ b/yt_dlp/utils/networking.py
@@ -1,4 +1,9 @@
+import collections
import random
+import urllib.parse
+import urllib.request
+
+from ._utils import remove_start
def random_user_agent():
@@ -46,15 +51,67 @@ def random_user_agent():
return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
-std_headers = {
+class HTTPHeaderDict(collections.UserDict, dict):
+ """
+ Store and access keys case-insensitively.
+ The constructor can take multiple dicts, in which keys in the latter are prioritised.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super().__init__()
+ for dct in args:
+ if dct is not None:
+ self.update(dct)
+ self.update(kwargs)
+
+ def __setitem__(self, key, value):
+ super().__setitem__(key.title(), str(value))
+
+ def __getitem__(self, key):
+ return super().__getitem__(key.title())
+
+ def __delitem__(self, key):
+ super().__delitem__(key.title())
+
+ def __contains__(self, key):
+ return super().__contains__(key.title() if isinstance(key, str) else key)
+
+
+std_headers = HTTPHeaderDict({
'User-Agent': random_user_agent(),
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-us,en;q=0.5',
'Sec-Fetch-Mode': 'navigate',
-}
+})
+
+
+def clean_proxies(proxies: dict, headers: HTTPHeaderDict):
+ req_proxy = headers.pop('Ytdl-Request-Proxy', None)
+ if req_proxy:
+ proxies.clear() # XXX: compat: Ytdl-Request-Proxy takes preference over everything, including NO_PROXY
+ proxies['all'] = req_proxy
+ for proxy_key, proxy_url in proxies.items():
+ if proxy_url == '__noproxy__':
+ proxies[proxy_key] = None
+ continue
+ if proxy_key == 'no': # special case
+ continue
+ if proxy_url is not None:
+ # Ensure proxies without a scheme are http.
+ proxy_scheme = urllib.request._parse_proxy(proxy_url)[0]
+ if proxy_scheme is None:
+ proxies[proxy_key] = 'http://' + remove_start(proxy_url, '//')
+
+ replace_scheme = {
+ 'socks5': 'socks5h', # compat: socks5 was treated as socks5h
+ 'socks': 'socks4' # compat: non-standard
+ }
+ if proxy_scheme in replace_scheme:
+ proxies[proxy_key] = urllib.parse.urlunparse(
+ urllib.parse.urlparse(proxy_url)._replace(scheme=replace_scheme[proxy_scheme]))
-def clean_headers(headers):
- if 'Youtubedl-no-compression' in headers: # compat
- del headers['Youtubedl-no-compression']
+def clean_headers(headers: HTTPHeaderDict):
+ if 'Youtubedl-No-Compression' in headers: # compat
+ del headers['Youtubedl-No-Compression']
headers['Accept-Encoding'] = 'identity'