diff options
author | coletdjnz <coletdjnz@protonmail.com> | 2023-07-15 15:55:23 +0530 |
---|---|---|
committer | pukkandan <pukkandan.ytdlp@gmail.com> | 2023-07-15 16:18:35 +0530 |
commit | 227bf1a33be7b89cd7d44ad046844c4ccba104f4 (patch) | |
tree | 72ffa5e9135e7e4869c351bd9dec6c0944cfba17 /test/test_networking.py | |
parent | c365dba8430ee33abda85d31f95128605bf240eb (diff) | |
download | hypervideo-pre-227bf1a33be7b89cd7d44ad046844c4ccba104f4.tar.lz hypervideo-pre-227bf1a33be7b89cd7d44ad046844c4ccba104f4.tar.xz hypervideo-pre-227bf1a33be7b89cd7d44ad046844c4ccba104f4.zip |
[networking] Rewrite architecture (#2861)
New networking interface consists of a `RequestDirector` that directs
each `Request` to appropriate `RequestHandler` and returns the
`Response` or raises `RequestError`. The handlers define adapters to
transform its internal Request/Response/Errors to our interfaces.
User-facing changes:
- Fix issues with per request proxies on redirects for urllib
- Support for `ALL_PROXY` environment variable for proxy setting
- Support for `socks5h` proxy
- Closes https://github.com/yt-dlp/yt-dlp/issues/6325, https://github.com/ytdl-org/youtube-dl/issues/22618, https://github.com/ytdl-org/youtube-dl/pull/28093
- Raise error when using `https` proxy instead of silently converting it to `http`
Authored by: coletdjnz
Diffstat (limited to 'test/test_networking.py')
-rw-r--r-- | test/test_networking.py | 1361 |
1 files changed, 1099 insertions, 262 deletions
diff --git a/test/test_networking.py b/test/test_networking.py index e4e66dce1..147a4ff49 100644 --- a/test/test_networking.py +++ b/test/test_networking.py @@ -3,32 +3,74 @@ # Allow direct execution import os import sys -import unittest + +import pytest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import functools import gzip +import http.client import http.cookiejar import http.server +import inspect import io import pathlib +import random import ssl import tempfile import threading +import time import urllib.error import urllib.request +import warnings import zlib +from email.message import Message +from http.cookiejar import CookieJar -from test.helper import http_server_port -from yt_dlp import YoutubeDL +from test.helper import FakeYDL, http_server_port from yt_dlp.dependencies import brotli -from yt_dlp.utils import sanitized_Request, urlencode_postdata - -from .helper import FakeYDL +from yt_dlp.networking import ( + HEADRequest, + PUTRequest, + Request, + RequestDirector, + RequestHandler, + Response, +) +from yt_dlp.networking._urllib import UrllibRH +from yt_dlp.networking.common import _REQUEST_HANDLERS +from yt_dlp.networking.exceptions import ( + CertificateVerifyError, + HTTPError, + IncompleteRead, + NoSupportingHandlers, + RequestError, + SSLError, + TransportError, + UnsupportedRequest, +) +from yt_dlp.utils._utils import _YDLLogger as FakeLogger +from yt_dlp.utils.networking import HTTPHeaderDict TEST_DIR = os.path.dirname(os.path.abspath(__file__)) +def _build_proxy_handler(name): + class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): + proxy_name = name + + def log_message(self, format, *args): + pass + + def do_GET(self): + self.send_response(200) + self.send_header('Content-Type', 'text/plain; charset=utf-8') + self.end_headers() + self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode()) + return HTTPTestRequestHandler + + class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): protocol_version = 'HTTP/1.1' @@ -36,7 +78,7 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): pass def _headers(self): - payload = str(self.headers).encode('utf-8') + payload = str(self.headers).encode() self.send_response(200) self.send_header('Content-Type', 'application/json') self.send_header('Content-Length', str(len(payload))) @@ -70,7 +112,7 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): return self.rfile.read(int(self.headers['Content-Length'])) def do_POST(self): - data = self._read_data() + data = self._read_data() + str(self.headers).encode() if self.path.startswith('/redirect_'): self._redirect() elif self.path.startswith('/method'): @@ -89,7 +131,7 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): self._status(404) def do_PUT(self): - data = self._read_data() + data = self._read_data() + str(self.headers).encode() if self.path.startswith('/redirect_'): self._redirect() elif self.path.startswith('/method'): @@ -102,7 +144,7 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): payload = b'<html><video src="/vid.mp4" /></html>' self.send_response(200) self.send_header('Content-Type', 'text/html; charset=utf-8') - self.send_header('Content-Length', str(len(payload))) # required for persistent connections + self.send_header('Content-Length', str(len(payload))) self.end_headers() self.wfile.write(payload) elif self.path == '/vid.mp4': @@ -126,10 +168,15 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): self.send_header('Content-Length', str(len(payload))) self.end_headers() self.wfile.write(payload) + elif self.path.startswith('/redirect_loop'): + self.send_response(301) + self.send_header('Location', self.path) + self.send_header('Content-Length', '0') + self.end_headers() elif self.path.startswith('/redirect_'): self._redirect() elif self.path.startswith('/method'): - self._method('GET') + self._method('GET', str(self.headers).encode()) elif self.path.startswith('/headers'): self._headers() elif self.path.startswith('/308-to-headers'): @@ -179,7 +226,32 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): self.send_header('Content-Length', str(len(payload))) self.end_headers() self.wfile.write(payload) - + elif self.path.startswith('/gen_'): + payload = b'<html></html>' + self.send_response(int(self.path[len('/gen_'):])) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + elif self.path.startswith('/incompleteread'): + payload = b'<html></html>' + self.send_response(200) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.send_header('Content-Length', '234234') + self.end_headers() + self.wfile.write(payload) + self.finish() + elif self.path.startswith('/timeout_'): + time.sleep(int(self.path[len('/timeout_'):])) + self._headers() + elif self.path == '/source_address': + payload = str(self.client_address[0]).encode() + self.send_response(200) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload) + self.finish() else: self._status(404) @@ -198,334 +270,1099 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode()) -class FakeLogger: - def debug(self, msg): - pass +def validate_and_send(rh, req): + rh.validate(req) + return rh.send(req) - def warning(self, msg): - pass - def error(self, msg): - pass - - -class TestHTTP(unittest.TestCase): - def setUp(self): - # HTTP server - self.http_httpd = http.server.ThreadingHTTPServer( +class TestRequestHandlerBase: + @classmethod + def setup_class(cls): + cls.http_httpd = http.server.ThreadingHTTPServer( ('127.0.0.1', 0), HTTPTestRequestHandler) - self.http_port = http_server_port(self.http_httpd) - self.http_server_thread = threading.Thread(target=self.http_httpd.serve_forever) + cls.http_port = http_server_port(cls.http_httpd) + cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever) # FIXME: we should probably stop the http server thread after each test # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041 - self.http_server_thread.daemon = True - self.http_server_thread.start() + cls.http_server_thread.daemon = True + cls.http_server_thread.start() # HTTPS server certfn = os.path.join(TEST_DIR, 'testcert.pem') - self.https_httpd = http.server.ThreadingHTTPServer( + cls.https_httpd = http.server.ThreadingHTTPServer( ('127.0.0.1', 0), HTTPTestRequestHandler) sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) sslctx.load_cert_chain(certfn, None) - self.https_httpd.socket = sslctx.wrap_socket(self.https_httpd.socket, server_side=True) - self.https_port = http_server_port(self.https_httpd) - self.https_server_thread = threading.Thread(target=self.https_httpd.serve_forever) - self.https_server_thread.daemon = True - self.https_server_thread.start() - - def test_nocheckcertificate(self): - with FakeYDL({'logger': FakeLogger()}) as ydl: - with self.assertRaises(urllib.error.URLError): - ydl.urlopen(sanitized_Request(f'https://127.0.0.1:{self.https_port}/headers')) - - with FakeYDL({'logger': FakeLogger(), 'nocheckcertificate': True}) as ydl: - r = ydl.urlopen(sanitized_Request(f'https://127.0.0.1:{self.https_port}/headers')) - self.assertEqual(r.status, 200) + cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True) + cls.https_port = http_server_port(cls.https_httpd) + cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever) + cls.https_server_thread.daemon = True + cls.https_server_thread.start() + + +@pytest.fixture +def handler(request): + RH_KEY = request.param + if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler): + handler = RH_KEY + elif RH_KEY in _REQUEST_HANDLERS: + handler = _REQUEST_HANDLERS[RH_KEY] + else: + pytest.skip(f'{RH_KEY} request handler is not available') + + return functools.partial(handler, logger=FakeLogger) + + +class TestHTTPRequestHandler(TestRequestHandlerBase): + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_verify_cert(self, handler): + with handler() as rh: + with pytest.raises(CertificateVerifyError): + validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers')) + + with handler(verify=False) as rh: + r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers')) + assert r.status == 200 r.close() - def test_percent_encode(self): - with FakeYDL() as ydl: + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_ssl_error(self, handler): + # HTTPS server with too old TLS version + # XXX: is there a better way to test this than to create a new server? + https_httpd = http.server.ThreadingHTTPServer( + ('127.0.0.1', 0), HTTPTestRequestHandler) + sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True) + https_port = http_server_port(https_httpd) + https_server_thread = threading.Thread(target=https_httpd.serve_forever) + https_server_thread.daemon = True + https_server_thread.start() + + with handler(verify=False) as rh: + with pytest.raises(SSLError, match='sslv3 alert handshake failure') as exc_info: + validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers')) + assert not issubclass(exc_info.type, CertificateVerifyError) + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_percent_encode(self, handler): + with handler() as rh: # Unicode characters should be encoded with uppercase percent-encoding - res = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/中文.html')) - self.assertEqual(res.status, 200) + res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html')) + assert res.status == 200 res.close() # don't normalize existing percent encodings - res = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/%c7%9f')) - self.assertEqual(res.status, 200) + res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f')) + assert res.status == 200 res.close() - def test_unicode_path_redirection(self): - with FakeYDL() as ydl: - r = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect')) - self.assertEqual(r.url, f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html') + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_unicode_path_redirection(self, handler): + with handler() as rh: + r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect')) + assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html' r.close() - def test_redirect(self): - with FakeYDL() as ydl: - def do_req(redirect_status, method): + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_raise_http_error(self, handler): + with handler() as rh: + for bad_status in (400, 500, 599, 302): + with pytest.raises(HTTPError): + validate_and_send(rh, Request('http://127.0.0.1:%d/gen_%d' % (self.http_port, bad_status))) + + # Should not raise an error + validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close() + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_response_url(self, handler): + with handler() as rh: + # Response url should be that of the last url in redirect chain + res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301')) + assert res.url == f'http://127.0.0.1:{self.http_port}/method' + res.close() + res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200')) + assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200' + res2.close() + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_redirect(self, handler): + with handler() as rh: + def do_req(redirect_status, method, assert_no_content=False): data = b'testdata' if method in ('POST', 'PUT') else None - res = ydl.urlopen(sanitized_Request( - f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data)) - return res.read().decode('utf-8'), res.headers.get('method', '') + res = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data)) + + headers = b'' + data_sent = b'' + if data is not None: + data_sent += res.read(len(data)) + if data_sent != data: + headers += data_sent + data_sent = b'' + + headers += res.read() + + if assert_no_content or data is None: + assert b'Content-Type' not in headers + assert b'Content-Length' not in headers + else: + assert b'Content-Type' in headers + assert b'Content-Length' in headers + + return data_sent.decode(), res.headers.get('method', '') # A 303 must either use GET or HEAD for subsequent request - self.assertEqual(do_req(303, 'POST'), ('', 'GET')) - self.assertEqual(do_req(303, 'HEAD'), ('', 'HEAD')) + assert do_req(303, 'POST', True) == ('', 'GET') + assert do_req(303, 'HEAD') == ('', 'HEAD') - self.assertEqual(do_req(303, 'PUT'), ('', 'GET')) + assert do_req(303, 'PUT', True) == ('', 'GET') # 301 and 302 turn POST only into a GET - # XXX: we should also test if the Content-Type and Content-Length headers are removed - self.assertEqual(do_req(301, 'POST'), ('', 'GET')) - self.assertEqual(do_req(301, 'HEAD'), ('', 'HEAD')) - self.assertEqual(do_req(302, 'POST'), ('', 'GET')) - self.assertEqual(do_req(302, 'HEAD'), ('', 'HEAD')) + assert do_req(301, 'POST', True) == ('', 'GET') + assert do_req(301, 'HEAD') == ('', 'HEAD') + assert do_req(302, 'POST', True) == ('', 'GET') + assert do_req(302, 'HEAD') == ('', 'HEAD') - self.assertEqual(do_req(301, 'PUT'), ('testdata', 'PUT')) - self.assertEqual(do_req(302, 'PUT'), ('testdata', 'PUT')) + assert do_req(301, 'PUT') == ('testdata', 'PUT') + assert do_req(302, 'PUT') == ('testdata', 'PUT') # 307 and 308 should not change method for m in ('POST', 'PUT'): - self.assertEqual(do_req(307, m), ('testdata', m)) - self.assertEqual(do_req(308, m), ('testdata', m)) + assert do_req(307, m) == ('testdata', m) + assert do_req(308, m) == ('testdata', m) - self.assertEqual(do_req(307, 'HEAD'), ('', 'HEAD')) - self.assertEqual(do_req(308, 'HEAD'), ('', 'HEAD')) + assert do_req(307, 'HEAD') == ('', 'HEAD') + assert do_req(308, 'HEAD') == ('', 'HEAD') # These should not redirect and instead raise an HTTPError for code in (300, 304, 305, 306): - with self.assertRaises(urllib.error.HTTPError): + with pytest.raises(HTTPError): do_req(code, 'GET') - def test_content_type(self): - # https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28 - with FakeYDL({'nocheckcertificate': True}) as ydl: - # method should be auto-detected as POST - r = sanitized_Request(f'https://localhost:{self.https_port}/headers', data=urlencode_postdata({'test': 'test'})) - - headers = ydl.urlopen(r).read().decode('utf-8') - self.assertIn('Content-Type: application/x-www-form-urlencoded', headers) - - # test http - r = sanitized_Request(f'http://localhost:{self.http_port}/headers', data=urlencode_postdata({'test': 'test'})) - headers = ydl.urlopen(r).read().decode('utf-8') - self.assertIn('Content-Type: application/x-www-form-urlencoded', headers) - - def test_cookiejar(self): - with FakeYDL() as ydl: - ydl.cookiejar.set_cookie(http.cookiejar.Cookie( - 0, 'test', 'ytdlp', None, False, '127.0.0.1', True, - False, '/headers', True, False, None, False, None, None, {})) - data = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers')).read() - self.assertIn(b'Cookie: test=ytdlp', data) - - def test_passed_cookie_header(self): + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_request_cookie_header(self, handler): # We should accept a Cookie header being passed as in normal headers and handle it appropriately. - with FakeYDL() as ydl: + with handler() as rh: # Specified Cookie header should be used - res = ydl.urlopen( - sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers', - headers={'Cookie': 'test=test'})).read().decode('utf-8') - self.assertIn('Cookie: test=test', res) - - # Specified Cookie header should be removed on any redirect - res = ydl.urlopen( - sanitized_Request(f'http://127.0.0.1:{self.http_port}/308-to-headers', headers={'Cookie': 'test=test'})).read().decode('utf-8') - self.assertNotIn('Cookie: test=test', res) - - # Specified Cookie header should override global cookiejar for that request - ydl.cookiejar.set_cookie(http.cookiejar.Cookie( - version=0, name='test', value='ytdlp', port=None, port_specified=False, - domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/', - path_specified=True, secure=False, expires=None, discard=False, comment=None, - comment_url=None, rest={})) - - data = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'Cookie': 'test=test'})).read() - self.assertNotIn(b'Cookie: test=ytdlp', data) - self.assertIn(b'Cookie: test=test', data) - - def test_no_compression_compat_header(self): - with FakeYDL() as ydl: - data = ydl.urlopen( - sanitized_Request( + res = validate_and_send( + rh, Request( f'http://127.0.0.1:{self.http_port}/headers', - headers={'Youtubedl-no-compression': True})).read() - self.assertIn(b'Accept-Encoding: identity', data) - self.assertNotIn(b'youtubedl-no-compression', data.lower()) + headers={'Cookie': 'test=test'})).read().decode() + assert 'Cookie: test=test' in res - def test_gzip_trailing_garbage(self): - # https://github.com/ytdl-org/youtube-dl/commit/aa3e950764337ef9800c936f4de89b31c00dfcf5 - # https://github.com/ytdl-org/youtube-dl/commit/6f2ec15cee79d35dba065677cad9da7491ec6e6f - with FakeYDL() as ydl: - data = ydl.urlopen(sanitized_Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode('utf-8') - self.assertEqual(data, '<html><video src="/vid.mp4" /></html>') - - @unittest.skipUnless(brotli, 'brotli support is not installed') - def test_brotli(self): - with FakeYDL() as ydl: - res = ydl.urlopen( - sanitized_Request( + # Specified Cookie header should be removed on any redirect + res = validate_and_send( + rh, Request( + f'http://127.0.0.1:{self.http_port}/308-to-headers', + headers={'Cookie': 'test=test'})).read().decode() + assert 'Cookie: test=test' not in res + + # Specified Cookie header should override global cookiejar for that request + cookiejar = http.cookiejar.CookieJar() + cookiejar.set_cookie(http.cookiejar.Cookie( + version=0, name='test', value='ytdlp', port=None, port_specified=False, + domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/', + path_specified=True, secure=False, expires=None, discard=False, comment=None, + comment_url=None, rest={})) + + with handler(cookiejar=cookiejar) as rh: + data = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test'})).read() + assert b'Cookie: test=ytdlp' not in data + assert b'Cookie: test=test' in data + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_redirect_loop(self, handler): + with handler() as rh: + with pytest.raises(HTTPError, match='redirect loop'): + validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop')) + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_incompleteread(self, handler): + with handler(timeout=2) as rh: + with pytest.raises(IncompleteRead): + validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read() + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_cookies(self, handler): + cookiejar = http.cookiejar.CookieJar() + cookiejar.set_cookie(http.cookiejar.Cookie( + 0, 'test', 'ytdlp', None, False, '127.0.0.1', True, + False, '/headers', True, False, None, False, None, None, {})) + + with handler(cookiejar=cookiejar) as rh: + data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read() + assert b'Cookie: test=ytdlp' in data + + # Per request + with handler() as rh: + data = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read() + assert b'Cookie: test=ytdlp' in data + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_headers(self, handler): + + with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh: + # Global Headers + data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read() + assert b'Test1: test' in data + + # Per request headers, merged with global + data = validate_and_send(rh, Request( + f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read() + assert b'Test1: test' in data + assert b'Test2: changed' in data + assert b'Test2: test2' not in data + assert b'Test3: test3' in data + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_timeout(self, handler): + with handler() as rh: + # Default timeout is 20 seconds, so this should go through + validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_3')) + + with handler(timeout=0.5) as rh: + with pytest.raises(TransportError): + validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1')) + + # Per request timeout, should override handler timeout + validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4})) + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_source_address(self, handler): + source_address = f'127.0.0.{random.randint(5, 255)}' + with handler(source_address=source_address) as rh: + data = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode() + assert source_address == data + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_gzip_trailing_garbage(self, handler): + with handler() as rh: + data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode() + assert data == '<html><video src="/vid.mp4" /></html>' + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + @pytest.mark.skipif(not brotli, reason='brotli support is not installed') + def test_brotli(self, handler): + with handler() as rh: + res = validate_and_send( + rh, Request( f'http://127.0.0.1:{self.http_port}/content-encoding', headers={'ytdl-encoding': 'br'})) - self.assertEqual(res.headers.get('Content-Encoding'), 'br') - self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>') - - def test_deflate(self): - with FakeYDL() as ydl: - res = ydl.urlopen( - sanitized_Request( + assert res.headers.get('Content-Encoding') == 'br' + assert res.read() == b'<html><video src="/vid.mp4" /></html>' + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_deflate(self, handler): + with handler() as rh: + res = validate_and_send( + rh, Request( f'http://127.0.0.1:{self.http_port}/content-encoding', headers={'ytdl-encoding': 'deflate'})) - self.assertEqual(res.headers.get('Content-Encoding'), 'deflate') - self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>') - - def test_gzip(self): - with FakeYDL() as ydl: - res = ydl.urlopen( - sanitized_Request( + assert res.headers.get('Content-Encoding') == 'deflate' + assert res.read() == b'<html><video src="/vid.mp4" /></html>' + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_gzip(self, handler): + with handler() as rh: + res = validate_and_send( + rh, Request( f'http://127.0.0.1:{self.http_port}/content-encoding', headers={'ytdl-encoding': 'gzip'})) - self.assertEqual(res.headers.get('Content-Encoding'), 'gzip') - self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>') + assert res.headers.get('Content-Encoding') == 'gzip' + assert res.read() == b'<html><video src="/vid.mp4" /></html>' - def test_multiple_encodings(self): - # https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4 - with FakeYDL() as ydl: + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_multiple_encodings(self, handler): + with handler() as rh: for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'): - res = ydl.urlopen( - sanitized_Request( + res = validate_and_send( + rh, Request( f'http://127.0.0.1:{self.http_port}/content-encoding', headers={'ytdl-encoding': pair})) - self.assertEqual(res.headers.get('Content-Encoding'), pair) - self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>') - - def test_unsupported_encoding(self): - # it should return the raw content - with FakeYDL() as ydl: - res = ydl.urlopen( - sanitized_Request( + assert res.headers.get('Content-Encoding') == pair + assert res.read() == b'<html><video src="/vid.mp4" /></html>' + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_unsupported_encoding(self, handler): + with handler() as rh: + res = validate_and_send( + rh, Request( f'http://127.0.0.1:{self.http_port}/content-encoding', headers={'ytdl-encoding': 'unsupported'})) - self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported') - self.assertEqual(res.read(), b'raw') + assert res.headers.get('Content-Encoding') == 'unsupported' + assert res.read() == b'raw' + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_read(self, handler): + with handler() as rh: + res = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/headers')) + assert res.readable() + assert res.read(1) == b'H' + assert res.read(3) == b'ost' + + +class TestHTTPProxy(TestRequestHandlerBase): + @classmethod + def setup_class(cls): + super().setup_class() + # HTTP Proxy server + cls.proxy = http.server.ThreadingHTTPServer( + ('127.0.0.1', 0), _build_proxy_handler('normal')) + cls.proxy_port = http_server_port(cls.proxy) + cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever) + cls.proxy_thread.daemon = True + cls.proxy_thread.start() + # Geo proxy server + cls.geo_proxy = http.server.ThreadingHTTPServer( + ('127.0.0.1', 0), _build_proxy_handler('geo')) + cls.geo_port = http_server_port(cls.geo_proxy) + cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever) + cls.geo_proxy_thread.daemon = True + cls.geo_proxy_thread.start() + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_http_proxy(self, handler): + http_proxy = f'http://127.0.0.1:{self.proxy_port}' + geo_proxy = f'http://127.0.0.1:{self.geo_port}' + + # Test global http proxy + # Test per request http proxy + # Test per request http proxy disables proxy + url = 'http://foo.com/bar' -class TestClientCert(unittest.TestCase): - def setUp(self): + # Global HTTP proxy + with handler(proxies={'http': http_proxy}) as rh: + res = validate_and_send(rh, Request(url)).read().decode() + assert res == f'normal: {url}' + + # Per request proxy overrides global + res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode() + assert res == f'geo: {url}' + + # and setting to None disables all proxies for that request + real_url = f'http://127.0.0.1:{self.http_port}/headers' + res = validate_and_send( + rh, Request(real_url, proxies={'http': None})).read().decode() + assert res != f'normal: {real_url}' + assert 'Accept' in res + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_noproxy(self, handler): + with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh: + # NO_PROXY + for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'): + nop_response = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode( + 'utf-8') + assert 'Accept' in nop_response + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_allproxy(self, handler): + url = 'http://foo.com/bar' + with handler() as rh: + response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode( + 'utf-8') + assert response == f'normal: {url}' + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_http_proxy_with_idn(self, handler): + with handler(proxies={ + 'http': f'http://127.0.0.1:{self.proxy_port}', + }) as rh: + url = 'http://中文.tw/' + response = rh.send(Request(url)).read().decode() + # b'xn--fiq228c' is '中文'.encode('idna') + assert response == 'normal: http://xn--fiq228c.tw/' + + +class TestClientCertificate: + + @classmethod + def setup_class(cls): certfn = os.path.join(TEST_DIR, 'testcert.pem') - self.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate') - cacertfn = os.path.join(self.certdir, 'ca.crt') - self.httpd = http.server.HTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler) + cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate') + cacertfn = os.path.join(cls.certdir, 'ca.crt') + cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler) sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) sslctx.verify_mode = ssl.CERT_REQUIRED sslctx.load_verify_locations(cafile=cacertfn) sslctx.load_cert_chain(certfn, None) - self.httpd.socket = sslctx.wrap_socket(self.httpd.socket, server_side=True) - self.port = http_server_port(self.httpd) - self.server_thread = threading.Thread(target=self.httpd.serve_forever) - self.server_thread.daemon = True - self.server_thread.start() - - def _run_test(self, **params): - ydl = YoutubeDL({ - 'logger': FakeLogger(), + cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True) + cls.port = http_server_port(cls.httpd) + cls.server_thread = threading.Thread(target=cls.httpd.serve_forever) + cls.server_thread.daemon = True + cls.server_thread.start() + + def _run_test(self, handler, **handler_kwargs): + with handler( # Disable client-side validation of unacceptable self-signed testcert.pem # The test is of a check on the server side, so unaffected - 'nocheckcertificate': True, - **params, + verify=False, + **handler_kwargs, + ) as rh: + validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode() + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_certificate_combined_nopass(self, handler): + self._run_test(handler, client_cert={ + 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'), }) - r = ydl.extract_info(f'https://127.0.0.1:{self.port}/video.html') - self.assertEqual(r['url'], f'https://127.0.0.1:{self.port}/vid.mp4') - def test_certificate_combined_nopass(self): - self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithkey.crt')) + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_certificate_nocombined_nopass(self, handler): + self._run_test(handler, client_cert={ + 'client_certificate': os.path.join(self.certdir, 'client.crt'), + 'client_certificate_key': os.path.join(self.certdir, 'client.key'), + }) - def test_certificate_nocombined_nopass(self): - self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'), - client_certificate_key=os.path.join(self.certdir, 'client.key')) + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_certificate_combined_pass(self, handler): + self._run_test(handler, client_cert={ + 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'), + 'client_certificate_password': 'foobar', + }) - def test_certificate_combined_pass(self): - self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithencryptedkey.crt'), - client_certificate_password='foobar') + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_certificate_nocombined_pass(self, handler): + self._run_test(handler, client_cert={ + 'client_certificate': os.path.join(self.certdir, 'client.crt'), + 'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'), + 'client_certificate_password': 'foobar', + }) - def test_certificate_nocombined_pass(self): - self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'), - client_certificate_key=os.path.join(self.certdir, 'clientencrypted.key'), - client_certificate_password='foobar') +class TestUrllibRequestHandler(TestRequestHandlerBase): + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_file_urls(self, handler): + # See https://github.com/ytdl-org/youtube-dl/issues/8227 + tf = tempfile.NamedTemporaryFile(delete=False) + tf.write(b'foobar') + tf.close() + req = Request(pathlib.Path(tf.name).as_uri()) + with handler() as rh: + with pytest.raises(UnsupportedRequest): + rh.validate(req) + + # Test that urllib never loaded FileHandler + with pytest.raises(TransportError): + rh.send(req) + + with handler(enable_file_urls=True) as rh: + res = validate_and_send(rh, req) + assert res.read() == b'foobar' + res.close() -def _build_proxy_handler(name): - class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): - proxy_name = name + os.unlink(tf.name) - def log_message(self, format, *args): + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_http_error_returns_content(self, handler): + # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost + def get_response(): + with handler() as rh: + # headers url + try: + validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404')) + except HTTPError as e: + return e.response + + assert get_response().read() == b'<html></html>' + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_verify_cert_error_text(self, handler): + # Check the output of the error message + with handler() as rh: + with pytest.raises( + CertificateVerifyError, + match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate' + ): + validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers')) + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_httplib_validation_errors(self, handler): + with handler() as rh: + + # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256 + with pytest.raises(RequestError, match='method can\'t contain control characters') as exc_info: + validate_and_send(rh, Request('http://127.0.0.1', method='GET\n')) + assert not isinstance(exc_info.value, TransportError) + + # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265 + with pytest.raises(RequestError, match='URL can\'t contain control characters') as exc_info: + validate_and_send(rh, Request('http://127.0.0. 1', method='GET\n')) + assert not isinstance(exc_info.value, TransportError) + + # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50 + with pytest.raises(RequestError, match='Invalid header name') as exc_info: + validate_and_send(rh, Request('http://127.0.0.1', headers={'foo\n': 'bar'})) + assert not isinstance(exc_info.value, TransportError) + + +def run_validation(handler, fail, req, **handler_kwargs): + with handler(**handler_kwargs) as rh: + if fail: + with pytest.raises(UnsupportedRequest): + rh.validate(req) + else: + rh.validate(req) + + +class TestRequestHandlerValidation: + + class ValidationRH(RequestHandler): + def _send(self, request): + raise RequestError('test') + + class NoCheckRH(ValidationRH): + _SUPPORTED_FEATURES = None + _SUPPORTED_PROXY_SCHEMES = None + _SUPPORTED_URL_SCHEMES = None + + class HTTPSupportedRH(ValidationRH): + _SUPPORTED_URL_SCHEMES = ('http',) + + URL_SCHEME_TESTS = [ + # scheme, expected to fail, handler kwargs + ('Urllib', [ + ('http', False, {}), + ('https', False, {}), + ('data', False, {}), + ('ftp', False, {}), + ('file', True, {}), + ('file', False, {'enable_file_urls': True}), + ]), + (NoCheckRH, [('http', False, {})]), + (ValidationRH, [('http', True, {})]) + ] + + PROXY_SCHEME_TESTS = [ + # scheme, expected to fail + ('Urllib', [ + ('http', False), + ('https', True), + ('socks4', False), + ('socks4a', False), + ('socks5', False), + ('socks5h', False), + ('socks', True), + ]), + (NoCheckRH, [('http', False)]), + (HTTPSupportedRH, [('http', True)]), + ] + + PROXY_KEY_TESTS = [ + # key, expected to fail + ('Urllib', [ + ('all', False), + ('unrelated', False), + ]), + (NoCheckRH, [('all', False)]), + (HTTPSupportedRH, [('all', True)]), + (HTTPSupportedRH, [('no', True)]), + ] + + @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [ + (handler_tests[0], scheme, fail, handler_kwargs) + for handler_tests in URL_SCHEME_TESTS + for scheme, fail, handler_kwargs in handler_tests[1] + + ], indirect=['handler']) + def test_url_scheme(self, handler, scheme, fail, handler_kwargs): + run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {})) + + @pytest.mark.parametrize('handler,fail', [('Urllib', False)], indirect=['handler']) + def test_no_proxy(self, handler, fail): + run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'})) + run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'}) + + @pytest.mark.parametrize('handler,proxy_key,fail', [ + (handler_tests[0], proxy_key, fail) + for handler_tests in PROXY_KEY_TESTS + for proxy_key, fail in handler_tests[1] + ], indirect=['handler']) + def test_proxy_key(self, handler, proxy_key, fail): + run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'})) + run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'}) + + @pytest.mark.parametrize('handler,scheme,fail', [ + (handler_tests[0], scheme, fail) + for handler_tests in PROXY_SCHEME_TESTS + for scheme, fail in handler_tests[1] + ], indirect=['handler']) + def test_proxy_scheme(self, handler, scheme, fail): + run_validation(handler, fail, Request('http://', proxies={'http': f'{scheme}://example.com'})) + run_validation(handler, fail, Request('http://'), proxies={'http': f'{scheme}://example.com'}) + + @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH], indirect=True) + def test_empty_proxy(self, handler): + run_validation(handler, False, Request('http://', proxies={'http': None})) + run_validation(handler, False, Request('http://'), proxies={'http': None}) + + @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1']) + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_missing_proxy_scheme(self, handler, proxy_url): + run_validation(handler, True, Request('http://', proxies={'http': 'example.com'})) + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_cookiejar_extension(self, handler): + run_validation(handler, True, Request('http://', extensions={'cookiejar': 'notacookiejar'})) + + @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) + def test_timeout_extension(self, handler): + run_validation(handler, True, Request('http://', extensions={'timeout': 'notavalidtimeout'})) + + def test_invalid_request_type(self): + rh = self.ValidationRH(logger=FakeLogger()) + for method in (rh.validate, rh.send): + with pytest.raises(TypeError, match='Expected an instance of Request'): + method('not a request') + + +class FakeResponse(Response): + def __init__(self, request): + # XXX: we could make request part of standard response interface + self.request = request + super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url) + + +class FakeRH(RequestHandler): + + def _validate(self, request): + return + + def _send(self, request: Request): + if request.url.startswith('ssl://'): + raise SSLError(request.url[len('ssl://'):]) + return FakeResponse(request) + + +class FakeRHYDL(FakeYDL): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._request_director = self.build_request_director([FakeRH]) + + +class TestRequestDirector: + + def test_handler_operations(self): + director = RequestDirector(logger=FakeLogger()) + handler = FakeRH(logger=FakeLogger()) + director.add_handler(handler) + assert director.handlers.get(FakeRH.RH_KEY) is handler + + # Handler should overwrite + handler2 = FakeRH(logger=FakeLogger()) + director.add_handler(handler2) + assert director.handlers.get(FakeRH.RH_KEY) is not handler + assert director.handlers.get(FakeRH.RH_KEY) is handler2 + assert len(director.handlers) == 1 + + class AnotherFakeRH(FakeRH): pass + director.add_handler(AnotherFakeRH(logger=FakeLogger())) + assert len(director.handlers) == 2 + assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY - def do_GET(self): - self.send_response(200) - self.send_header('Content-Type', 'text/plain; charset=utf-8') - self.end_headers() - self.wfile.write(f'{self.proxy_name}: {self.path}'.encode()) - return HTTPTestRequestHandler + director.handlers.pop(FakeRH.RH_KEY, None) + assert director.handlers.get(FakeRH.RH_KEY) is None + assert len(director.handlers) == 1 + # RequestErrors should passthrough + with pytest.raises(SSLError): + director.send(Request('ssl://something')) -class TestProxy(unittest.TestCase): - def setUp(self): - self.proxy = http.server.HTTPServer( - ('127.0.0.1', 0), _build_proxy_handler('normal')) - self.port = http_server_port(self.proxy) - self.proxy_thread = threading.Thread(target=self.proxy.serve_forever) - self.proxy_thread.daemon = True - self.proxy_thread.start() + def test_send(self): + director = RequestDirector(logger=FakeLogger()) + with pytest.raises(RequestError): + director.send(Request('any://')) + director.add_handler(FakeRH(logger=FakeLogger())) + assert isinstance(director.send(Request('http://')), FakeResponse) - self.geo_proxy = http.server.HTTPServer( - ('127.0.0.1', 0), _build_proxy_handler('geo')) - self.geo_port = http_server_port(self.geo_proxy) - self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever) - self.geo_proxy_thread.daemon = True - self.geo_proxy_thread.start() - - def test_proxy(self): - geo_proxy = f'127.0.0.1:{self.geo_port}' - ydl = YoutubeDL({ - 'proxy': f'127.0.0.1:{self.port}', - 'geo_verification_proxy': geo_proxy, - }) - url = 'http://foo.com/bar' - response = ydl.urlopen(url).read().decode() - self.assertEqual(response, f'normal: {url}') + def test_unsupported_handlers(self): + director = RequestDirector(logger=FakeLogger()) + director.add_handler(FakeRH(logger=FakeLogger())) - req = urllib.request.Request(url) - req.add_header('Ytdl-request-proxy', geo_proxy) - response = ydl.urlopen(req).read().decode() - self.assertEqual(response, f'geo: {url}') + class SupportedRH(RequestHandler): + _SUPPORTED_URL_SCHEMES = ['http'] - def test_proxy_with_idn(self): - ydl = YoutubeDL({ - 'proxy': f'127.0.0.1:{self.port}', - }) - url = 'http://中文.tw/' - response = ydl.urlopen(url).read().decode() - # b'xn--fiq228c' is '中文'.encode('idna') - self.assertEqual(response, 'normal: http://xn--fiq228c.tw/') + def _send(self, request: Request): + return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url) + # This handler should by default take preference over FakeRH + director.add_handler(SupportedRH(logger=FakeLogger())) + assert director.send(Request('http://')).read() == b'supported' + assert director.send(Request('any://')).read() == b'' -class TestFileURL(unittest.TestCase): - # See https://github.com/ytdl-org/youtube-dl/issues/8227 - def test_file_urls(self): - tf = tempfile.NamedTemporaryFile(delete=False) - tf.write(b'foobar') - tf.close() - url = pathlib.Path(tf.name).as_uri() + director.handlers.pop(FakeRH.RH_KEY) + with pytest.raises(NoSupportingHandlers): + director.send(Request('any://')) + + def test_unexpected_error(self): + director = RequestDirector(logger=FakeLogger()) + + class UnexpectedRH(FakeRH): + def _send(self, request: Request): + raise TypeError('something') + + director.add_handler(UnexpectedRH(logger=FakeLogger)) + with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'): + director.send(Request('any://')) + + director.handlers.clear() + assert len(director.handlers) == 0 + + # Should not be fatal + director.add_handler(FakeRH(logger=FakeLogger())) + director.add_handler(UnexpectedRH(logger=FakeLogger)) + assert director.send(Request('any://')) + + +# XXX: do we want to move this to test_YoutubeDL.py? +class TestYoutubeDLNetworking: + + @staticmethod + def build_handler(ydl, handler: RequestHandler = FakeRH): + return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY) + + def test_compat_opener(self): with FakeYDL() as ydl: - self.assertRaisesRegex( - urllib.error.URLError, 'file:// URLs are explicitly disabled in yt-dlp for security reasons', ydl.urlopen, url) - with FakeYDL({'enable_file_urls': True}) as ydl: - res = ydl.urlopen(url) - self.assertEqual(res.read(), b'foobar') - res.close() - os.unlink(tf.name) + with warnings.catch_warnings(): + warnings.simplefilter('ignore', category=DeprecationWarning) + assert isinstance(ydl._opener, urllib.request.OpenerDirector) + + @pytest.mark.parametrize('proxy,expected', [ + ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}), + ('', {'all': '__noproxy__'}), + (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https + ]) + def test_proxy(self, proxy, expected): + old_http_proxy = os.environ.get('HTTP_PROXY') + try: + os.environ['HTTP_PROXY'] = 'http://127.0.0.1:8081' # ensure that provided proxies override env + with FakeYDL({'proxy': proxy}) as ydl: + assert ydl.proxies == expected + finally: + if old_http_proxy: + os.environ['HTTP_PROXY'] = old_http_proxy + + def test_compat_request(self): + with FakeRHYDL() as ydl: + assert ydl.urlopen('test://') + urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'}) + urllib_req.add_unredirected_header('Cookie', 'bob=bob') + urllib_req.timeout = 2 + + req = ydl.urlopen(urllib_req).request + assert req.url == urllib_req.get_full_url() + assert req.data == urllib_req.data + assert req.method == urllib_req.get_method() + assert 'X-Test' in req.headers + assert 'Cookie' in req.headers + assert req.extensions.get('timeout') == 2 + + with pytest.raises(AssertionError): + ydl.urlopen(None) + + def test_extract_basic_auth(self): + with FakeRHYDL() as ydl: + res = ydl.urlopen(Request('http://user:pass@foo.bar')) + assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz' + + def test_sanitize_url(self): + with FakeRHYDL() as ydl: + res = ydl.urlopen(Request('httpss://foo.bar')) + assert res.request.url == 'https://foo.bar' + + def test_file_urls_error(self): + # use urllib handler + with FakeYDL() as ydl: + with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'): + ydl.urlopen('file://') + + def test_legacy_server_connect_error(self): + with FakeRHYDL() as ydl: + for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'): + with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'): + ydl.urlopen(f'ssl://{error}') + + with pytest.raises(SSLError, match='testerror'): + ydl.urlopen('ssl://testerror') + + @pytest.mark.parametrize('proxy_key,proxy_url,expected', [ + ('http', '__noproxy__', None), + ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'), + ('https', 'example.com', 'http://example.com'), + ('https', 'socks5://example.com', 'socks5h://example.com'), + ('http', 'socks://example.com', 'socks4://example.com'), + ('http', 'socks4://example.com', 'socks4://example.com'), + ]) + def test_clean_proxy(self, proxy_key, proxy_url, expected): + # proxies should be cleaned in urlopen() + with FakeRHYDL() as ydl: + req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request + assert req.proxies[proxy_key] == expected + + # and should also be cleaned when building the handler + env_key = f'{proxy_key.upper()}_PROXY' + old_env_proxy = os.environ.get(env_key) + try: + os.environ[env_key] = proxy_url # ensure that provided proxies override env + with FakeYDL() as ydl: + rh = self.build_handler(ydl) + assert rh.proxies[proxy_key] == expected + finally: + if old_env_proxy: + os.environ[env_key] = old_env_proxy + + def test_clean_proxy_header(self): + with FakeRHYDL() as ydl: + req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request + assert 'ytdl-request-proxy' not in req.headers + assert req.proxies == {'all': 'http://foo.bar'} + + with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl: + rh = self.build_handler(ydl) + assert 'ytdl-request-proxy' not in rh.headers + assert rh.proxies == {'all': 'http://foo.bar'} + + def test_clean_header(self): + with FakeRHYDL() as ydl: + res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True})) + assert 'Youtubedl-no-compression' not in res.request.headers + assert res.request.headers.get('Accept-Encoding') == 'identity' + + with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl: + rh = self.build_handler(ydl) + assert 'Youtubedl-no-compression' not in rh.headers + assert rh.headers.get('Accept-Encoding') == 'identity' + + def test_build_handler_params(self): + with FakeYDL({ + 'http_headers': {'test': 'testtest'}, + 'socket_timeout': 2, + 'proxy': 'http://127.0.0.1:8080', + 'source_address': '127.0.0.45', + 'debug_printtraffic': True, + 'compat_opts': ['no-certifi'], + 'nocheckcertificate': True, + 'legacy_server_connect': True, + }) as ydl: + rh = self.build_handler(ydl) + assert rh.headers.get('test') == 'testtest' + assert 'Accept' in rh.headers # ensure std_headers are still there + assert rh.timeout == 2 + assert rh.proxies.get('all') == 'http://127.0.0.1:8080' + assert rh.source_address == '127.0.0.45' + assert rh.verbose is True + assert rh.prefer_system_certs is True + assert rh.verify is False + assert rh.legacy_ssl_support is True + + @pytest.mark.parametrize('ydl_params', [ + {'client_certificate': 'fakecert.crt'}, + {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'}, + {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'}, + {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'}, + ]) + def test_client_certificate(self, ydl_params): + with FakeYDL(ydl_params) as ydl: + rh = self.build_handler(ydl) + assert rh._client_cert == ydl_params # XXX: Too bound to implementation + + def test_urllib_file_urls(self): + with FakeYDL({'enable_file_urls': False}) as ydl: + rh = self.build_handler(ydl, UrllibRH) + assert rh.enable_file_urls is False + with FakeYDL({'enable_file_urls': True}) as ydl: + rh = self.build_handler(ydl, UrllibRH) + assert rh.enable_file_urls is True + + +class TestRequest: + + def test_query(self): + req = Request('http://example.com?q=something', query={'v': 'xyz'}) + assert req.url == 'http://example.com?q=something&v=xyz' + + req.update(query={'v': '123'}) + assert req.url == 'http://example.com?q=something&v=123' + req.update(url='http://example.com', query={'v': 'xyz'}) + assert req.url == 'http://example.com?v=xyz' + + def test_method(self): + req = Request('http://example.com') + assert req.method == 'GET' + req.data = b'test' + assert req.method == 'POST' + req.data = None + assert req.method == 'GET' + req.data = b'test2' + req.method = 'PUT' + assert req.method == 'PUT' + req.data = None + assert req.method == 'PUT' + with pytest.raises(TypeError): + req.method = 1 + + def test_request_helpers(self): + assert HEADRequest('http://example.com').method == 'HEAD' + assert PUTRequest('http://example.com').method == 'PUT' + + def test_headers(self): + req = Request('http://example.com', headers={'tesT': 'test'}) + assert req.headers == HTTPHeaderDict({'test': 'test'}) + req.update(headers={'teSt2': 'test2'}) + assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'}) + + req.headers = new_headers = HTTPHeaderDict({'test': 'test'}) + assert req.headers == HTTPHeaderDict({'test': 'test'}) + assert req.headers is new_headers + + # test converts dict to case insensitive dict + req.headers = new_headers = {'test2': 'test2'} + assert isinstance(req.headers, HTTPHeaderDict) + assert req.headers is not new_headers + + with pytest.raises(TypeError): + req.headers = None + + def test_data_type(self): + req = Request('http://example.com') + assert req.data is None + # test bytes is allowed + req.data = b'test' + assert req.data == b'test' + # test iterable of bytes is allowed + i = [b'test', b'test2'] + req.data = i + assert req.data == i + + # test file-like object is allowed + f = io.BytesIO(b'test') + req.data = f + assert req.data == f + + # common mistake: test str not allowed + with pytest.raises(TypeError): + req.data = 'test' + assert req.data != 'test' + + # common mistake: test dict is not allowed + with pytest.raises(TypeError): + req.data = {'test': 'test'} + assert req.data != {'test': 'test'} + + def test_content_length_header(self): + req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'') + assert req.headers.get('Content-Length') == '0' + + req.data = b'test' + assert 'Content-Length' not in req.headers + + req = Request('http://example.com', headers={'Content-Length': '10'}) + assert 'Content-Length' not in req.headers + + def test_content_type_header(self): + req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test') + assert req.headers.get('Content-Type') == 'test' + req.data = b'test2' + assert req.headers.get('Content-Type') == 'test' + req.data = None + assert 'Content-Type' not in req.headers + req.data = b'test3' + assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded' + + def test_proxies(self): + req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'}) + assert req.proxies == {'http': 'http://127.0.0.1:8080'} + + def test_extensions(self): + req = Request(url='http://example.com', extensions={'timeout': 2}) + assert req.extensions == {'timeout': 2} + + def test_copy(self): + req = Request( + url='http://example.com', + extensions={'cookiejar': CookieJar()}, + headers={'Accept-Encoding': 'br'}, + proxies={'http': 'http://127.0.0.1'}, + data=[b'123'] + ) + req_copy = req.copy() + assert req_copy is not req + assert req_copy.url == req.url + assert req_copy.headers == req.headers + assert req_copy.headers is not req.headers + assert req_copy.proxies == req.proxies + assert req_copy.proxies is not req.proxies + + # Data is not able to be copied + assert req_copy.data == req.data + assert req_copy.data is req.data + + # Shallow copy extensions + assert req_copy.extensions is not req.extensions + assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar'] + + # Subclasses are copied by default + class AnotherRequest(Request): + pass -if __name__ == '__main__': - unittest.main() + req = AnotherRequest(url='http://127.0.0.1') + assert isinstance(req.copy(), AnotherRequest) + + def test_url(self): + req = Request(url='https://фtest.example.com/ some spaceв?ä=c',) + assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c' + + assert Request(url='//example.com').url == 'http://example.com' + + with pytest.raises(TypeError): + Request(url='https://').url = None + + +class TestResponse: + + @pytest.mark.parametrize('reason,status,expected', [ + ('custom', 200, 'custom'), + (None, 404, 'Not Found'), # fallback status + ('', 403, 'Forbidden'), + (None, 999, None) + ]) + def test_reason(self, reason, status, expected): + res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason) + assert res.reason == expected + + def test_headers(self): + headers = Message() + headers.add_header('Test', 'test') + headers.add_header('Test', 'test2') + headers.add_header('content-encoding', 'br') + res = Response(io.BytesIO(b''), headers=headers, url='test://') + assert res.headers.get_all('test') == ['test', 'test2'] + assert 'Content-Encoding' in res.headers + + def test_get_header(self): + headers = Message() + headers.add_header('Set-Cookie', 'cookie1') + headers.add_header('Set-cookie', 'cookie2') + headers.add_header('Test', 'test') + headers.add_header('Test', 'test2') + res = Response(io.BytesIO(b''), headers=headers, url='test://') + assert res.get_header('test') == 'test, test2' + assert res.get_header('set-Cookie') == 'cookie1' + assert res.get_header('notexist', 'default') == 'default' + + def test_compat(self): + res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'}) + assert res.code == res.getcode() == res.status + assert res.geturl() == res.url + assert res.info() is res.headers + assert res.getheader('test') == res.get_header('test') |