diff options
author | coletdjnz <coletdjnz@protonmail.com> | 2023-07-29 10:40:20 +1200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-28 22:40:20 +0000 |
commit | 4bf912282a34b58b6b35d8f7e6be535770c89c76 (patch) | |
tree | 829a0271e2e709a8a79f2a9de29f72dea8108d05 /yt_dlp/networking/_urllib.py | |
parent | a15fcd299e767a510debd8dc1646fe863b96ce0e (diff) | |
download | hypervideo-pre-4bf912282a34b58b6b35d8f7e6be535770c89c76.tar.lz hypervideo-pre-4bf912282a34b58b6b35d8f7e6be535770c89c76.tar.xz hypervideo-pre-4bf912282a34b58b6b35d8f7e6be535770c89c76.zip |
[networking] Remove dot segments during URL normalization (#7662)
This implements RFC3986 5.2.4 remove_dot_segments during the URL normalization process.
Closes #3355, #6526
Authored by: coletdjnz
Diffstat (limited to 'yt_dlp/networking/_urllib.py')
-rw-r--r-- | yt_dlp/networking/_urllib.py | 7 |
1 files changed, 4 insertions, 3 deletions
diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py index 3fe5fa52e..0c4794954 100644 --- a/yt_dlp/networking/_urllib.py +++ b/yt_dlp/networking/_urllib.py @@ -41,7 +41,8 @@ from .exceptions import ( from ..dependencies import brotli from ..socks import ProxyError as SocksProxyError from ..socks import sockssocket -from ..utils import escape_url, update_url_query +from ..utils import update_url_query +from ..utils.networking import normalize_url SUPPORTED_ENCODINGS = ['gzip', 'deflate'] CONTENT_DECODE_ERRORS = [zlib.error, OSError] @@ -179,7 +180,7 @@ class HTTPHandler(urllib.request.AbstractHTTPHandler): # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09) # the code of this workaround has been moved here from YoutubeDL.urlopen() url = req.get_full_url() - url_escaped = escape_url(url) + url_escaped = normalize_url(url) # Substitute URL if any change after escaping if url != url_escaped: @@ -212,7 +213,7 @@ class HTTPHandler(urllib.request.AbstractHTTPHandler): if location: # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 location = location.encode('iso-8859-1').decode() - location_escaped = escape_url(location) + location_escaped = normalize_url(location) if location != location_escaped: del resp.headers['Location'] resp.headers['Location'] = location_escaped |