diff options
author | coletdjnz <coletdjnz@protonmail.com> | 2023-07-15 15:55:23 +0530 |
---|---|---|
committer | pukkandan <pukkandan.ytdlp@gmail.com> | 2023-07-15 16:18:35 +0530 |
commit | 227bf1a33be7b89cd7d44ad046844c4ccba104f4 (patch) | |
tree | 72ffa5e9135e7e4869c351bd9dec6c0944cfba17 /yt_dlp/utils/networking.py | |
parent | c365dba8430ee33abda85d31f95128605bf240eb (diff) | |
download | hypervideo-pre-227bf1a33be7b89cd7d44ad046844c4ccba104f4.tar.lz hypervideo-pre-227bf1a33be7b89cd7d44ad046844c4ccba104f4.tar.xz hypervideo-pre-227bf1a33be7b89cd7d44ad046844c4ccba104f4.zip |
[networking] Rewrite architecture (#2861)
New networking interface consists of a `RequestDirector` that directs
each `Request` to appropriate `RequestHandler` and returns the
`Response` or raises `RequestError`. The handlers define adapters to
transform its internal Request/Response/Errors to our interfaces.
User-facing changes:
- Fix issues with per request proxies on redirects for urllib
- Support for `ALL_PROXY` environment variable for proxy setting
- Support for `socks5h` proxy
- Closes https://github.com/yt-dlp/yt-dlp/issues/6325, https://github.com/ytdl-org/youtube-dl/issues/22618, https://github.com/ytdl-org/youtube-dl/pull/28093
- Raise error when using `https` proxy instead of silently converting it to `http`
Authored by: coletdjnz
Diffstat (limited to 'yt_dlp/utils/networking.py')
-rw-r--r-- | yt_dlp/utils/networking.py | 67 |
1 files changed, 62 insertions, 5 deletions
diff --git a/yt_dlp/utils/networking.py b/yt_dlp/utils/networking.py index 95b54fabe..ac355ddc8 100644 --- a/yt_dlp/utils/networking.py +++ b/yt_dlp/utils/networking.py @@ -1,4 +1,9 @@ +import collections import random +import urllib.parse +import urllib.request + +from ._utils import remove_start def random_user_agent(): @@ -46,15 +51,67 @@ def random_user_agent(): return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS) -std_headers = { +class HTTPHeaderDict(collections.UserDict, dict): + """ + Store and access keys case-insensitively. + The constructor can take multiple dicts, in which keys in the latter are prioritised. + """ + + def __init__(self, *args, **kwargs): + super().__init__() + for dct in args: + if dct is not None: + self.update(dct) + self.update(kwargs) + + def __setitem__(self, key, value): + super().__setitem__(key.title(), str(value)) + + def __getitem__(self, key): + return super().__getitem__(key.title()) + + def __delitem__(self, key): + super().__delitem__(key.title()) + + def __contains__(self, key): + return super().__contains__(key.title() if isinstance(key, str) else key) + + +std_headers = HTTPHeaderDict({ 'User-Agent': random_user_agent(), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', 'Sec-Fetch-Mode': 'navigate', -} +}) + + +def clean_proxies(proxies: dict, headers: HTTPHeaderDict): + req_proxy = headers.pop('Ytdl-Request-Proxy', None) + if req_proxy: + proxies.clear() # XXX: compat: Ytdl-Request-Proxy takes preference over everything, including NO_PROXY + proxies['all'] = req_proxy + for proxy_key, proxy_url in proxies.items(): + if proxy_url == '__noproxy__': + proxies[proxy_key] = None + continue + if proxy_key == 'no': # special case + continue + if proxy_url is not None: + # Ensure proxies without a scheme are http. + proxy_scheme = urllib.request._parse_proxy(proxy_url)[0] + if proxy_scheme is None: + proxies[proxy_key] = 'http://' + remove_start(proxy_url, '//') + + replace_scheme = { + 'socks5': 'socks5h', # compat: socks5 was treated as socks5h + 'socks': 'socks4' # compat: non-standard + } + if proxy_scheme in replace_scheme: + proxies[proxy_key] = urllib.parse.urlunparse( + urllib.parse.urlparse(proxy_url)._replace(scheme=replace_scheme[proxy_scheme])) -def clean_headers(headers): - if 'Youtubedl-no-compression' in headers: # compat - del headers['Youtubedl-no-compression'] +def clean_headers(headers: HTTPHeaderDict): + if 'Youtubedl-No-Compression' in headers: # compat + del headers['Youtubedl-No-Compression'] headers['Accept-Encoding'] = 'identity' |