aboutsummaryrefslogtreecommitdiffstats
path: root/test/test_networking.py
diff options
context:
space:
mode:
authorpukkandan <pukkandan.ytdlp@gmail.com>2023-07-15 14:30:08 +0530
committerpukkandan <pukkandan.ytdlp@gmail.com>2023-07-15 16:18:34 +0530
commitc365dba8430ee33abda85d31f95128605bf240eb (patch)
tree174d35c23267863ca89a1a030935b445edf56799 /test/test_networking.py
parent1b392f905d20ef1f1b300b180f867d43c9ce49b8 (diff)
downloadhypervideo-pre-c365dba8430ee33abda85d31f95128605bf240eb.tar.lz
hypervideo-pre-c365dba8430ee33abda85d31f95128605bf240eb.tar.xz
hypervideo-pre-c365dba8430ee33abda85d31f95128605bf240eb.zip
[networking] Add module (#2861)
No actual changes - code is only moved around
Diffstat (limited to 'test/test_networking.py')
-rw-r--r--test/test_networking.py531
1 files changed, 531 insertions, 0 deletions
diff --git a/test/test_networking.py b/test/test_networking.py
new file mode 100644
index 000000000..e4e66dce1
--- /dev/null
+++ b/test/test_networking.py
@@ -0,0 +1,531 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+import unittest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import gzip
+import http.cookiejar
+import http.server
+import io
+import pathlib
+import ssl
+import tempfile
+import threading
+import urllib.error
+import urllib.request
+import zlib
+
+from test.helper import http_server_port
+from yt_dlp import YoutubeDL
+from yt_dlp.dependencies import brotli
+from yt_dlp.utils import sanitized_Request, urlencode_postdata
+
+from .helper import FakeYDL
+
+TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
+ protocol_version = 'HTTP/1.1'
+
+ def log_message(self, format, *args):
+ pass
+
+ def _headers(self):
+ payload = str(self.headers).encode('utf-8')
+ self.send_response(200)
+ self.send_header('Content-Type', 'application/json')
+ self.send_header('Content-Length', str(len(payload)))
+ self.end_headers()
+ self.wfile.write(payload)
+
+ def _redirect(self):
+ self.send_response(int(self.path[len('/redirect_'):]))
+ self.send_header('Location', '/method')
+ self.send_header('Content-Length', '0')
+ self.end_headers()
+
+ def _method(self, method, payload=None):
+ self.send_response(200)
+ self.send_header('Content-Length', str(len(payload or '')))
+ self.send_header('Method', method)
+ self.end_headers()
+ if payload:
+ self.wfile.write(payload)
+
+ def _status(self, status):
+ payload = f'<html>{status} NOT FOUND</html>'.encode()
+ self.send_response(int(status))
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.send_header('Content-Length', str(len(payload)))
+ self.end_headers()
+ self.wfile.write(payload)
+
+ def _read_data(self):
+ if 'Content-Length' in self.headers:
+ return self.rfile.read(int(self.headers['Content-Length']))
+
+ def do_POST(self):
+ data = self._read_data()
+ if self.path.startswith('/redirect_'):
+ self._redirect()
+ elif self.path.startswith('/method'):
+ self._method('POST', data)
+ elif self.path.startswith('/headers'):
+ self._headers()
+ else:
+ self._status(404)
+
+ def do_HEAD(self):
+ if self.path.startswith('/redirect_'):
+ self._redirect()
+ elif self.path.startswith('/method'):
+ self._method('HEAD')
+ else:
+ self._status(404)
+
+ def do_PUT(self):
+ data = self._read_data()
+ if self.path.startswith('/redirect_'):
+ self._redirect()
+ elif self.path.startswith('/method'):
+ self._method('PUT', data)
+ else:
+ self._status(404)
+
+ def do_GET(self):
+ if self.path == '/video.html':
+ payload = b'<html><video src="/vid.mp4" /></html>'
+ self.send_response(200)
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.send_header('Content-Length', str(len(payload))) # required for persistent connections
+ self.end_headers()
+ self.wfile.write(payload)
+ elif self.path == '/vid.mp4':
+ payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
+ self.send_response(200)
+ self.send_header('Content-Type', 'video/mp4')
+ self.send_header('Content-Length', str(len(payload)))
+ self.end_headers()
+ self.wfile.write(payload)
+ elif self.path == '/%E4%B8%AD%E6%96%87.html':
+ payload = b'<html><video src="/vid.mp4" /></html>'
+ self.send_response(200)
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.send_header('Content-Length', str(len(payload)))
+ self.end_headers()
+ self.wfile.write(payload)
+ elif self.path == '/%c7%9f':
+ payload = b'<html><video src="/vid.mp4" /></html>'
+ self.send_response(200)
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.send_header('Content-Length', str(len(payload)))
+ self.end_headers()
+ self.wfile.write(payload)
+ elif self.path.startswith('/redirect_'):
+ self._redirect()
+ elif self.path.startswith('/method'):
+ self._method('GET')
+ elif self.path.startswith('/headers'):
+ self._headers()
+ elif self.path.startswith('/308-to-headers'):
+ self.send_response(308)
+ self.send_header('Location', '/headers')
+ self.send_header('Content-Length', '0')
+ self.end_headers()
+ elif self.path == '/trailing_garbage':
+ payload = b'<html><video src="/vid.mp4" /></html>'
+ self.send_response(200)
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.send_header('Content-Encoding', 'gzip')
+ buf = io.BytesIO()
+ with gzip.GzipFile(fileobj=buf, mode='wb') as f:
+ f.write(payload)
+ compressed = buf.getvalue() + b'trailing garbage'
+ self.send_header('Content-Length', str(len(compressed)))
+ self.end_headers()
+ self.wfile.write(compressed)
+ elif self.path == '/302-non-ascii-redirect':
+ new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
+ self.send_response(301)
+ self.send_header('Location', new_url)
+ self.send_header('Content-Length', '0')
+ self.end_headers()
+ elif self.path == '/content-encoding':
+ encodings = self.headers.get('ytdl-encoding', '')
+ payload = b'<html><video src="/vid.mp4" /></html>'
+ for encoding in filter(None, (e.strip() for e in encodings.split(','))):
+ if encoding == 'br' and brotli:
+ payload = brotli.compress(payload)
+ elif encoding == 'gzip':
+ buf = io.BytesIO()
+ with gzip.GzipFile(fileobj=buf, mode='wb') as f:
+ f.write(payload)
+ payload = buf.getvalue()
+ elif encoding == 'deflate':
+ payload = zlib.compress(payload)
+ elif encoding == 'unsupported':
+ payload = b'raw'
+ break
+ else:
+ self._status(415)
+ return
+ self.send_response(200)
+ self.send_header('Content-Encoding', encodings)
+ self.send_header('Content-Length', str(len(payload)))
+ self.end_headers()
+ self.wfile.write(payload)
+
+ else:
+ self._status(404)
+
+ def send_header(self, keyword, value):
+ """
+ Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
+ This is against what is defined in RFC 3986, however we need to test we support this
+ since some sites incorrectly do this.
+ """
+ if keyword.lower() == 'connection':
+ return super().send_header(keyword, value)
+
+ if not hasattr(self, '_headers_buffer'):
+ self._headers_buffer = []
+
+ self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
+
+
+class FakeLogger:
+ def debug(self, msg):
+ pass
+
+ def warning(self, msg):
+ pass
+
+ def error(self, msg):
+ pass
+
+
+class TestHTTP(unittest.TestCase):
+ def setUp(self):
+ # HTTP server
+ self.http_httpd = http.server.ThreadingHTTPServer(
+ ('127.0.0.1', 0), HTTPTestRequestHandler)
+ self.http_port = http_server_port(self.http_httpd)
+ self.http_server_thread = threading.Thread(target=self.http_httpd.serve_forever)
+ # FIXME: we should probably stop the http server thread after each test
+ # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
+ self.http_server_thread.daemon = True
+ self.http_server_thread.start()
+
+ # HTTPS server
+ certfn = os.path.join(TEST_DIR, 'testcert.pem')
+ self.https_httpd = http.server.ThreadingHTTPServer(
+ ('127.0.0.1', 0), HTTPTestRequestHandler)
+ sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+ sslctx.load_cert_chain(certfn, None)
+ self.https_httpd.socket = sslctx.wrap_socket(self.https_httpd.socket, server_side=True)
+ self.https_port = http_server_port(self.https_httpd)
+ self.https_server_thread = threading.Thread(target=self.https_httpd.serve_forever)
+ self.https_server_thread.daemon = True
+ self.https_server_thread.start()
+
+ def test_nocheckcertificate(self):
+ with FakeYDL({'logger': FakeLogger()}) as ydl:
+ with self.assertRaises(urllib.error.URLError):
+ ydl.urlopen(sanitized_Request(f'https://127.0.0.1:{self.https_port}/headers'))
+
+ with FakeYDL({'logger': FakeLogger(), 'nocheckcertificate': True}) as ydl:
+ r = ydl.urlopen(sanitized_Request(f'https://127.0.0.1:{self.https_port}/headers'))
+ self.assertEqual(r.status, 200)
+ r.close()
+
+ def test_percent_encode(self):
+ with FakeYDL() as ydl:
+ # Unicode characters should be encoded with uppercase percent-encoding
+ res = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
+ self.assertEqual(res.status, 200)
+ res.close()
+ # don't normalize existing percent encodings
+ res = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
+ self.assertEqual(res.status, 200)
+ res.close()
+
+ def test_unicode_path_redirection(self):
+ with FakeYDL() as ydl:
+ r = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
+ self.assertEqual(r.url, f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html')
+ r.close()
+
+ def test_redirect(self):
+ with FakeYDL() as ydl:
+ def do_req(redirect_status, method):
+ data = b'testdata' if method in ('POST', 'PUT') else None
+ res = ydl.urlopen(sanitized_Request(
+ f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
+ return res.read().decode('utf-8'), res.headers.get('method', '')
+
+ # A 303 must either use GET or HEAD for subsequent request
+ self.assertEqual(do_req(303, 'POST'), ('', 'GET'))
+ self.assertEqual(do_req(303, 'HEAD'), ('', 'HEAD'))
+
+ self.assertEqual(do_req(303, 'PUT'), ('', 'GET'))
+
+ # 301 and 302 turn POST only into a GET
+ # XXX: we should also test if the Content-Type and Content-Length headers are removed
+ self.assertEqual(do_req(301, 'POST'), ('', 'GET'))
+ self.assertEqual(do_req(301, 'HEAD'), ('', 'HEAD'))
+ self.assertEqual(do_req(302, 'POST'), ('', 'GET'))
+ self.assertEqual(do_req(302, 'HEAD'), ('', 'HEAD'))
+
+ self.assertEqual(do_req(301, 'PUT'), ('testdata', 'PUT'))
+ self.assertEqual(do_req(302, 'PUT'), ('testdata', 'PUT'))
+
+ # 307 and 308 should not change method
+ for m in ('POST', 'PUT'):
+ self.assertEqual(do_req(307, m), ('testdata', m))
+ self.assertEqual(do_req(308, m), ('testdata', m))
+
+ self.assertEqual(do_req(307, 'HEAD'), ('', 'HEAD'))
+ self.assertEqual(do_req(308, 'HEAD'), ('', 'HEAD'))
+
+ # These should not redirect and instead raise an HTTPError
+ for code in (300, 304, 305, 306):
+ with self.assertRaises(urllib.error.HTTPError):
+ do_req(code, 'GET')
+
+ def test_content_type(self):
+ # https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28
+ with FakeYDL({'nocheckcertificate': True}) as ydl:
+ # method should be auto-detected as POST
+ r = sanitized_Request(f'https://localhost:{self.https_port}/headers', data=urlencode_postdata({'test': 'test'}))
+
+ headers = ydl.urlopen(r).read().decode('utf-8')
+ self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
+
+ # test http
+ r = sanitized_Request(f'http://localhost:{self.http_port}/headers', data=urlencode_postdata({'test': 'test'}))
+ headers = ydl.urlopen(r).read().decode('utf-8')
+ self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
+
+ def test_cookiejar(self):
+ with FakeYDL() as ydl:
+ ydl.cookiejar.set_cookie(http.cookiejar.Cookie(
+ 0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
+ False, '/headers', True, False, None, False, None, None, {}))
+ data = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
+ self.assertIn(b'Cookie: test=ytdlp', data)
+
+ def test_passed_cookie_header(self):
+ # We should accept a Cookie header being passed as in normal headers and handle it appropriately.
+ with FakeYDL() as ydl:
+ # Specified Cookie header should be used
+ res = ydl.urlopen(
+ sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers',
+ headers={'Cookie': 'test=test'})).read().decode('utf-8')
+ self.assertIn('Cookie: test=test', res)
+
+ # Specified Cookie header should be removed on any redirect
+ res = ydl.urlopen(
+ sanitized_Request(f'http://127.0.0.1:{self.http_port}/308-to-headers', headers={'Cookie': 'test=test'})).read().decode('utf-8')
+ self.assertNotIn('Cookie: test=test', res)
+
+ # Specified Cookie header should override global cookiejar for that request
+ ydl.cookiejar.set_cookie(http.cookiejar.Cookie(
+ version=0, name='test', value='ytdlp', port=None, port_specified=False,
+ domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/',
+ path_specified=True, secure=False, expires=None, discard=False, comment=None,
+ comment_url=None, rest={}))
+
+ data = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'Cookie': 'test=test'})).read()
+ self.assertNotIn(b'Cookie: test=ytdlp', data)
+ self.assertIn(b'Cookie: test=test', data)
+
+ def test_no_compression_compat_header(self):
+ with FakeYDL() as ydl:
+ data = ydl.urlopen(
+ sanitized_Request(
+ f'http://127.0.0.1:{self.http_port}/headers',
+ headers={'Youtubedl-no-compression': True})).read()
+ self.assertIn(b'Accept-Encoding: identity', data)
+ self.assertNotIn(b'youtubedl-no-compression', data.lower())
+
+ def test_gzip_trailing_garbage(self):
+ # https://github.com/ytdl-org/youtube-dl/commit/aa3e950764337ef9800c936f4de89b31c00dfcf5
+ # https://github.com/ytdl-org/youtube-dl/commit/6f2ec15cee79d35dba065677cad9da7491ec6e6f
+ with FakeYDL() as ydl:
+ data = ydl.urlopen(sanitized_Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode('utf-8')
+ self.assertEqual(data, '<html><video src="/vid.mp4" /></html>')
+
+ @unittest.skipUnless(brotli, 'brotli support is not installed')
+ def test_brotli(self):
+ with FakeYDL() as ydl:
+ res = ydl.urlopen(
+ sanitized_Request(
+ f'http://127.0.0.1:{self.http_port}/content-encoding',
+ headers={'ytdl-encoding': 'br'}))
+ self.assertEqual(res.headers.get('Content-Encoding'), 'br')
+ self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
+
+ def test_deflate(self):
+ with FakeYDL() as ydl:
+ res = ydl.urlopen(
+ sanitized_Request(
+ f'http://127.0.0.1:{self.http_port}/content-encoding',
+ headers={'ytdl-encoding': 'deflate'}))
+ self.assertEqual(res.headers.get('Content-Encoding'), 'deflate')
+ self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
+
+ def test_gzip(self):
+ with FakeYDL() as ydl:
+ res = ydl.urlopen(
+ sanitized_Request(
+ f'http://127.0.0.1:{self.http_port}/content-encoding',
+ headers={'ytdl-encoding': 'gzip'}))
+ self.assertEqual(res.headers.get('Content-Encoding'), 'gzip')
+ self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
+
+ def test_multiple_encodings(self):
+ # https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4
+ with FakeYDL() as ydl:
+ for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
+ res = ydl.urlopen(
+ sanitized_Request(
+ f'http://127.0.0.1:{self.http_port}/content-encoding',
+ headers={'ytdl-encoding': pair}))
+ self.assertEqual(res.headers.get('Content-Encoding'), pair)
+ self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
+
+ def test_unsupported_encoding(self):
+ # it should return the raw content
+ with FakeYDL() as ydl:
+ res = ydl.urlopen(
+ sanitized_Request(
+ f'http://127.0.0.1:{self.http_port}/content-encoding',
+ headers={'ytdl-encoding': 'unsupported'}))
+ self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported')
+ self.assertEqual(res.read(), b'raw')
+
+
+class TestClientCert(unittest.TestCase):
+ def setUp(self):
+ certfn = os.path.join(TEST_DIR, 'testcert.pem')
+ self.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
+ cacertfn = os.path.join(self.certdir, 'ca.crt')
+ self.httpd = http.server.HTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
+ sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+ sslctx.verify_mode = ssl.CERT_REQUIRED
+ sslctx.load_verify_locations(cafile=cacertfn)
+ sslctx.load_cert_chain(certfn, None)
+ self.httpd.socket = sslctx.wrap_socket(self.httpd.socket, server_side=True)
+ self.port = http_server_port(self.httpd)
+ self.server_thread = threading.Thread(target=self.httpd.serve_forever)
+ self.server_thread.daemon = True
+ self.server_thread.start()
+
+ def _run_test(self, **params):
+ ydl = YoutubeDL({
+ 'logger': FakeLogger(),
+ # Disable client-side validation of unacceptable self-signed testcert.pem
+ # The test is of a check on the server side, so unaffected
+ 'nocheckcertificate': True,
+ **params,
+ })
+ r = ydl.extract_info(f'https://127.0.0.1:{self.port}/video.html')
+ self.assertEqual(r['url'], f'https://127.0.0.1:{self.port}/vid.mp4')
+
+ def test_certificate_combined_nopass(self):
+ self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithkey.crt'))
+
+ def test_certificate_nocombined_nopass(self):
+ self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'),
+ client_certificate_key=os.path.join(self.certdir, 'client.key'))
+
+ def test_certificate_combined_pass(self):
+ self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
+ client_certificate_password='foobar')
+
+ def test_certificate_nocombined_pass(self):
+ self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'),
+ client_certificate_key=os.path.join(self.certdir, 'clientencrypted.key'),
+ client_certificate_password='foobar')
+
+
+def _build_proxy_handler(name):
+ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
+ proxy_name = name
+
+ def log_message(self, format, *args):
+ pass
+
+ def do_GET(self):
+ self.send_response(200)
+ self.send_header('Content-Type', 'text/plain; charset=utf-8')
+ self.end_headers()
+ self.wfile.write(f'{self.proxy_name}: {self.path}'.encode())
+ return HTTPTestRequestHandler
+
+
+class TestProxy(unittest.TestCase):
+ def setUp(self):
+ self.proxy = http.server.HTTPServer(
+ ('127.0.0.1', 0), _build_proxy_handler('normal'))
+ self.port = http_server_port(self.proxy)
+ self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
+ self.proxy_thread.daemon = True
+ self.proxy_thread.start()
+
+ self.geo_proxy = http.server.HTTPServer(
+ ('127.0.0.1', 0), _build_proxy_handler('geo'))
+ self.geo_port = http_server_port(self.geo_proxy)
+ self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
+ self.geo_proxy_thread.daemon = True
+ self.geo_proxy_thread.start()
+
+ def test_proxy(self):
+ geo_proxy = f'127.0.0.1:{self.geo_port}'
+ ydl = YoutubeDL({
+ 'proxy': f'127.0.0.1:{self.port}',
+ 'geo_verification_proxy': geo_proxy,
+ })
+ url = 'http://foo.com/bar'
+ response = ydl.urlopen(url).read().decode()
+ self.assertEqual(response, f'normal: {url}')
+
+ req = urllib.request.Request(url)
+ req.add_header('Ytdl-request-proxy', geo_proxy)
+ response = ydl.urlopen(req).read().decode()
+ self.assertEqual(response, f'geo: {url}')
+
+ def test_proxy_with_idn(self):
+ ydl = YoutubeDL({
+ 'proxy': f'127.0.0.1:{self.port}',
+ })
+ url = 'http://中文.tw/'
+ response = ydl.urlopen(url).read().decode()
+ # b'xn--fiq228c' is '中文'.encode('idna')
+ self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
+
+
+class TestFileURL(unittest.TestCase):
+ # See https://github.com/ytdl-org/youtube-dl/issues/8227
+ def test_file_urls(self):
+ tf = tempfile.NamedTemporaryFile(delete=False)
+ tf.write(b'foobar')
+ tf.close()
+ url = pathlib.Path(tf.name).as_uri()
+ with FakeYDL() as ydl:
+ self.assertRaisesRegex(
+ urllib.error.URLError, 'file:// URLs are explicitly disabled in yt-dlp for security reasons', ydl.urlopen, url)
+ with FakeYDL({'enable_file_urls': True}) as ydl:
+ res = ydl.urlopen(url)
+ self.assertEqual(res.read(), b'foobar')
+ res.close()
+ os.unlink(tf.name)
+
+
+if __name__ == '__main__':
+ unittest.main()