From 4bf912282a34b58b6b35d8f7e6be535770c89c76 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sat, 29 Jul 2023 10:40:20 +1200 Subject: [networking] Remove dot segments during URL normalization (#7662) This implements RFC3986 5.2.4 remove_dot_segments during the URL normalization process. Closes #3355, #6526 Authored by: coletdjnz --- yt_dlp/networking/_urllib.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'yt_dlp/networking/_urllib.py') diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py index 3fe5fa52e..0c4794954 100644 --- a/yt_dlp/networking/_urllib.py +++ b/yt_dlp/networking/_urllib.py @@ -41,7 +41,8 @@ from .exceptions import ( from ..dependencies import brotli from ..socks import ProxyError as SocksProxyError from ..socks import sockssocket -from ..utils import escape_url, update_url_query +from ..utils import update_url_query +from ..utils.networking import normalize_url SUPPORTED_ENCODINGS = ['gzip', 'deflate'] CONTENT_DECODE_ERRORS = [zlib.error, OSError] @@ -179,7 +180,7 @@ class HTTPHandler(urllib.request.AbstractHTTPHandler): # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) # the code of this workaround has been moved here from YoutubeDL.urlopen() url = req.get_full_url() - url_escaped = escape_url(url) + url_escaped = normalize_url(url) # Substitute URL if any change after escaping if url != url_escaped: @@ -212,7 +213,7 @@ class HTTPHandler(urllib.request.AbstractHTTPHandler): if location: # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 location = location.encode('iso-8859-1').decode() - location_escaped = escape_url(location) + location_escaped = normalize_url(location) if location != location_escaped: del resp.headers['Location'] resp.headers['Location'] = location_escaped -- cgit v1.2.3