aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/networking/_urllib.py
diff options
context:
space:
mode:
authorcoletdjnz <coletdjnz@protonmail.com>2023-07-29 10:40:20 +1200
committerGitHub <noreply@github.com>2023-07-28 22:40:20 +0000
commit4bf912282a34b58b6b35d8f7e6be535770c89c76 (patch)
tree829a0271e2e709a8a79f2a9de29f72dea8108d05 /yt_dlp/networking/_urllib.py
parenta15fcd299e767a510debd8dc1646fe863b96ce0e (diff)
downloadhypervideo-pre-4bf912282a34b58b6b35d8f7e6be535770c89c76.tar.lz
hypervideo-pre-4bf912282a34b58b6b35d8f7e6be535770c89c76.tar.xz
hypervideo-pre-4bf912282a34b58b6b35d8f7e6be535770c89c76.zip
[networking] Remove dot segments during URL normalization (#7662)
This implements RFC3986 5.2.4 remove_dot_segments during the URL normalization process. Closes #3355, #6526 Authored by: coletdjnz
Diffstat (limited to 'yt_dlp/networking/_urllib.py')
-rw-r--r--yt_dlp/networking/_urllib.py7
1 files changed, 4 insertions, 3 deletions
diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py
index 3fe5fa52e..0c4794954 100644
--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@@ -41,7 +41,8 @@ from .exceptions import (
from ..dependencies import brotli
from ..socks import ProxyError as SocksProxyError
from ..socks import sockssocket
-from ..utils import escape_url, update_url_query
+from ..utils import update_url_query
+from ..utils.networking import normalize_url
SUPPORTED_ENCODINGS = ['gzip', 'deflate']
CONTENT_DECODE_ERRORS = [zlib.error, OSError]
@@ -179,7 +180,7 @@ class HTTPHandler(urllib.request.AbstractHTTPHandler):
# Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
# the code of this workaround has been moved here from YoutubeDL.urlopen()
url = req.get_full_url()
- url_escaped = escape_url(url)
+ url_escaped = normalize_url(url)
# Substitute URL if any change after escaping
if url != url_escaped:
@@ -212,7 +213,7 @@ class HTTPHandler(urllib.request.AbstractHTTPHandler):
if location:
# As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
location = location.encode('iso-8859-1').decode()
- location_escaped = escape_url(location)
+ location_escaped = normalize_url(location)
if location != location_escaped:
del resp.headers['Location']
resp.headers['Location'] = location_escaped