[networking] Remove dot segments during URL normalization (#7662)

This implements RFC3986 5.2.4 remove_dot_segments during the URL normalization process. Closes #3355, #6526 Authored by: coletdjnz
author: coletdjnz <coletdjnz@protonmail.com> 2023-07-29 10:40:20 +1200
committer: GitHub <noreply@github.com> 2023-07-28 22:40:20 +0000
commit: 4bf912282a34b58b6b35d8f7e6be535770c89c76 (patch)
tree: 829a0271e2e709a8a79f2a9de29f72dea8108d05 /yt_dlp/networking/_urllib.py
parent: a15fcd299e767a510debd8dc1646fe863b96ce0e (diff)
download: hypervideo-pre-4bf912282a34b58b6b35d8f7e6be535770c89c76.tar.lz
hypervideo-pre-4bf912282a34b58b6b35d8f7e6be535770c89c76.tar.xz
hypervideo-pre-4bf912282a34b58b6b35d8f7e6be535770c89c76.zip
1 files changed, 4 insertions, 3 deletions
diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py
index 3fe5fa52e..0c4794954 100644
--- a/yt_dlp/networking/_urllib.py
+++ b/yt_dlp/networking/_urllib.py
@@ -41,7 +41,8 @@ from .exceptions import (
 from ..dependencies import brotli
 from ..socks import ProxyError as SocksProxyError
 from ..socks import sockssocket
-from ..utils import escape_url, update_url_query
+from ..utils import update_url_query
+from ..utils.networking import normalize_url
 
 SUPPORTED_ENCODINGS = ['gzip', 'deflate']
 CONTENT_DECODE_ERRORS = [zlib.error, OSError]
@@ -179,7 +180,7 @@ class HTTPHandler(urllib.request.AbstractHTTPHandler):
         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
         # the code of this workaround has been moved here from YoutubeDL.urlopen()
         url = req.get_full_url()
-        url_escaped = escape_url(url)
+        url_escaped = normalize_url(url)
 
         # Substitute URL if any change after escaping
         if url != url_escaped:
@@ -212,7 +213,7 @@ class HTTPHandler(urllib.request.AbstractHTTPHandler):
             if location:
                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
                 location = location.encode('iso-8859-1').decode()
-                location_escaped = escape_url(location)
+                location_escaped = normalize_url(location)
                 if location != location_escaped:
                     del resp.headers['Location']
                     resp.headers['Location'] = location_escaped
author	coletdjnz <coletdjnz@protonmail.com>	2023-07-29 10:40:20 +1200
committer	GitHub <noreply@github.com>	2023-07-28 22:40:20 +0000
commit	4bf912282a34b58b6b35d8f7e6be535770c89c76 (patch)
tree	829a0271e2e709a8a79f2a9de29f72dea8108d05 /yt_dlp/networking/_urllib.py
parent	a15fcd299e767a510debd8dc1646fe863b96ce0e (diff)
download	hypervideo-pre-4bf912282a34b58b6b35d8f7e6be535770c89c76.tar.lz hypervideo-pre-4bf912282a34b58b6b35d8f7e6be535770c89c76.tar.xz hypervideo-pre-4bf912282a34b58b6b35d8f7e6be535770c89c76.zip