diff options
author | pukkandan <pukkandan.ytdlp@gmail.com> | 2022-03-27 10:04:04 +0530 |
---|---|---|
committer | pukkandan <pukkandan.ytdlp@gmail.com> | 2022-03-27 11:18:35 +0530 |
commit | 5c3895fff150871fde273a10c55691403931b4dc (patch) | |
tree | 692946c934742281549d2c10042ef310d321ff50 /yt_dlp | |
parent | fd2ad7cb245423e49db1be9d9654c7dd3103619a (diff) | |
download | hypervideo-pre-5c3895fff150871fde273a10c55691403931b4dc.tar.lz hypervideo-pre-5c3895fff150871fde273a10c55691403931b4dc.tar.xz hypervideo-pre-5c3895fff150871fde273a10c55691403931b4dc.zip |
[outtmpl] Limit changes during sanitization
Closes #2761
Diffstat (limited to 'yt_dlp')
-rw-r--r-- | yt_dlp/YoutubeDL.py | 7 | ||||
-rw-r--r-- | yt_dlp/options.py | 2 | ||||
-rw-r--r-- | yt_dlp/utils.py | 34 |
3 files changed, 25 insertions, 18 deletions
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 478bdacca..c2f4f3a95 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -87,6 +87,7 @@ from .utils import ( MaxDownloadsReached, merge_headers, network_exceptions, + NO_DEFAULT, number_of_digits, orderedSet, OUTTMPL_TYPES, @@ -1150,8 +1151,10 @@ class YoutubeDL(object): na = self.params.get('outtmpl_na_placeholder', 'NA') def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')): - return sanitize_filename(str(value), restricted=restricted, - is_id=re.search(r'(^|[_.])id(\.|$)', key)) + return sanitize_filename(str(value), restricted=restricted, is_id=( + bool(re.search(r'(^|[_.])id(\.|$)', key)) + if 'filename-sanitization' in self.params.get('compat_opts', []) + else NO_DEFAULT)) sanitizer = sanitize if callable(sanitize) else filename_sanitizer sanitize = bool(sanitize) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 34a2e1103..eb306898a 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -338,7 +338,7 @@ def create_parser(): action='callback', callback=_set_from_options_callback, callback_kwargs={ 'allowed_values': { - 'filename', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles', + 'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles', 'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-attach-info-json', 'embed-metadata', 'embed-thumbnail-atomicparsley', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi', diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index e9eaf7b4e..6854dbb63 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -705,36 +705,40 @@ def timeconvert(timestr): return timestamp -def sanitize_filename(s, restricted=False, is_id=False): +def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT): """Sanitizes a string so it could be used as part of a filename. - If restricted is set, use a stricter subset of allowed characters. - Set is_id if this is not an arbitrary string, but an ID that should be kept - if possible. + @param restricted Use a stricter subset of allowed characters + @param is_id Whether this is an ID that should be kept unchanged if possible. + If unset, yt-dlp's new sanitization rules are in effect """ + if s == '': + return '' + def replace_insane(char): if restricted and char in ACCENT_CHARS: return ACCENT_CHARS[char] elif not restricted and char == '\n': - return ' ' + return '\0 ' elif char == '?' or ord(char) < 32 or ord(char) == 127: return '' elif char == '"': return '' if restricted else '\'' elif char == ':': - return '_-' if restricted else ' -' + return '\0_\0-' if restricted else '\0 \0-' elif char in '\\/|*<>': - return '_' - if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()): - return '_' - if restricted and ord(char) > 127: - return '_' + return '\0_' + if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127): + return '\0_' return char - if s == '': - return '' - # Handle timestamps - s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) + s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps result = ''.join(map(replace_insane, s)) + if is_id is NO_DEFAULT: + result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result) # Remove repeated substitute chars + STRIP_RE = '(?:\0.|[ _-])*' + result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result) # Remove substitute chars from start/end + result = result.replace('\0', '') or '_' + if not is_id: while '__' in result: result = result.replace('__', '_') |