aboutsummaryrefslogtreecommitdiffstats
path: root/youtube/util.py
diff options
context:
space:
mode:
authorJames Taylor <user234683@users.noreply.github.com>2020-10-22 14:30:33 -0700
committerJames Taylor <user234683@users.noreply.github.com>2020-10-22 14:30:33 -0700
commit5f4884dce8e3eb3215ee8b97469a741310669083 (patch)
tree68b9406805ae2ea6e5e2e58480ba31823a6bbd0f /youtube/util.py
parentf8b6db14800806c132c6ec5c587832c0200ada6e (diff)
downloadyt-local-5f4884dce8e3eb3215ee8b97469a741310669083.tar.lz
yt-local-5f4884dce8e3eb3215ee8b97469a741310669083.tar.xz
yt-local-5f4884dce8e3eb3215ee8b97469a741310669083.zip
Put vid title at end of download urls so downloads w/ that filename
Diffstat (limited to 'youtube/util.py')
-rw-r--r--youtube/util.py54
1 files changed, 54 insertions, 0 deletions
diff --git a/youtube/util.py b/youtube/util.py
index 579f512..e468224 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -15,6 +15,7 @@ import json
import gevent
import gevent.queue
import gevent.lock
+import collections
# The trouble with the requests library: It ships its own certificate bundle via certifi
# instead of using the system certificate store, meaning self-signed certificates
@@ -435,3 +436,56 @@ def check_gevent_exceptions(*tasks):
if task.exception:
raise task.exception
+
+# https://stackoverflow.com/a/62888
+replacement_map = collections.OrderedDict([
+ ('<', '_'),
+ ('>', '_'),
+ (': ', ' - '),
+ (':', '-'),
+ ('"', "'"),
+ ('/', '_'),
+ ('\\', '_'),
+ ('|', '-'),
+ ('?', ''),
+ ('*', '_'),
+ ('\t', ' '),
+])
+DOS_names = {'con', 'prn', 'aux', 'nul', 'com0', 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9', 'lpt0', 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9'}
+def to_valid_filename(name):
+ '''Changes the name so it's valid on Windows, Linux, and Mac'''
+ # See https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file
+ # for Windows specs
+
+ # Additional recommendations for Linux:
+ # https://dwheeler.com/essays/fixing-unix-linux-filenames.html#standards
+
+ # remove control characters
+ name = re.sub(r'[\x00-\x1f]', '_', name)
+
+ # reserved characters
+ for reserved_char, replacement in replacement_map.items():
+ name = name.replace(reserved_char, replacement)
+
+ # check for all periods/spaces
+ if all(c == '.' or c == ' ' for c in name):
+ name = '_'*len(name)
+
+ # remove trailing periods and spaces
+ name = name.rstrip('. ')
+
+ # check for reserved DOS names, such as nul or nul.txt
+ base_ext_parts = name.rsplit('.', maxsplit=1)
+ if base_ext_parts[0].lower() in DOS_names:
+ base_ext_parts[0] += '_'
+ name = '.'.join(base_ext_parts)
+
+ # check for blank name
+ if name == '':
+ name = '_'
+
+ # check if name begins with a hyphen, period, or space
+ if name[0] in ('-', '.', ' '):
+ name = '_' + name
+
+ return name