aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/utils.py
diff options
context:
space:
mode:
authorpukkandan <pukkandan.ytdlp@gmail.com>2022-08-04 20:19:32 +0530
committerpukkandan <pukkandan.ytdlp@gmail.com>2022-08-04 20:21:32 +0530
commit989a01c2610832193c268d072ada8814bfd4c00d (patch)
tree5929a9294d1e78110369f21565ffcc278d414672 /yt_dlp/utils.py
parent05e2243e8032061f300c00ca62999b6b29e1ed8f (diff)
downloadhypervideo-pre-989a01c2610832193c268d072ada8814bfd4c00d.tar.lz
hypervideo-pre-989a01c2610832193c268d072ada8814bfd4c00d.tar.xz
hypervideo-pre-989a01c2610832193c268d072ada8814bfd4c00d.zip
[outtmpl] Smarter replacing of unsupported characters
Closes #1330
Diffstat (limited to 'yt_dlp/utils.py')
-rw-r--r--yt_dlp/utils.py6
1 files changed, 6 insertions, 0 deletions
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index c56f31013..3a33cad2e 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -40,6 +40,7 @@ import tempfile
import time
import traceback
import types
+import unicodedata
import urllib.error
import urllib.parse
import urllib.request
@@ -647,6 +648,9 @@ def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
return ACCENT_CHARS[char]
elif not restricted and char == '\n':
return '\0 '
+ elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?|/\\':
+ # Replace with their full-width unicode counterparts
+ return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
elif char == '?' or ord(char) < 32 or ord(char) == 127:
return ''
elif char == '"':
@@ -659,6 +663,8 @@ def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
return '\0_'
return char
+ if restricted and is_id is NO_DEFAULT:
+ s = unicodedata.normalize('NFKC', s)
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
result = ''.join(map(replace_insane, s))
if is_id is NO_DEFAULT: