aboutsummaryrefslogtreecommitdiffstats
path: root/youtube
diff options
context:
space:
mode:
Diffstat (limited to 'youtube')
-rw-r--r--youtube/util.py54
-rw-r--r--youtube/watch.py11
2 files changed, 65 insertions, 0 deletions
diff --git a/youtube/util.py b/youtube/util.py
index 579f512..e468224 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -15,6 +15,7 @@ import json
import gevent
import gevent.queue
import gevent.lock
+import collections
# The trouble with the requests library: It ships its own certificate bundle via certifi
# instead of using the system certificate store, meaning self-signed certificates
@@ -435,3 +436,56 @@ def check_gevent_exceptions(*tasks):
if task.exception:
raise task.exception
+
+# https://stackoverflow.com/a/62888
+replacement_map = collections.OrderedDict([
+ ('<', '_'),
+ ('>', '_'),
+ (': ', ' - '),
+ (':', '-'),
+ ('"', "'"),
+ ('/', '_'),
+ ('\\', '_'),
+ ('|', '-'),
+ ('?', ''),
+ ('*', '_'),
+ ('\t', ' '),
+])
+DOS_names = {'con', 'prn', 'aux', 'nul', 'com0', 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9', 'lpt0', 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9'}
+def to_valid_filename(name):
+ '''Changes the name so it's valid on Windows, Linux, and Mac'''
+ # See https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file
+ # for Windows specs
+
+ # Additional recommendations for Linux:
+ # https://dwheeler.com/essays/fixing-unix-linux-filenames.html#standards
+
+ # remove control characters
+ name = re.sub(r'[\x00-\x1f]', '_', name)
+
+ # reserved characters
+ for reserved_char, replacement in replacement_map.items():
+ name = name.replace(reserved_char, replacement)
+
+ # check for all periods/spaces
+ if all(c == '.' or c == ' ' for c in name):
+ name = '_'*len(name)
+
+ # remove trailing periods and spaces
+ name = name.rstrip('. ')
+
+ # check for reserved DOS names, such as nul or nul.txt
+ base_ext_parts = name.rsplit('.', maxsplit=1)
+ if base_ext_parts[0].lower() in DOS_names:
+ base_ext_parts[0] += '_'
+ name = '.'.join(base_ext_parts)
+
+ # check for blank name
+ if name == '':
+ name = '_'
+
+ # check if name begins with a hyphen, period, or space
+ if name[0] in ('-', '.', ' '):
+ name = '_' + name
+
+ return name
diff --git a/youtube/watch.py b/youtube/watch.py
index 11ef9f2..bc2008c 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -398,6 +398,17 @@ def get_watch_page(video_id=None):
for fmt in info['formats']:
fmt['url'] = util.prefix_url(fmt['url'])
+ # Add video title to end of url path so it has a filename other than just
+ # "videoplayback" when downloaded
+ title = urllib.parse.quote(util.to_valid_filename(info['title']))
+ for fmt in info['formats']:
+ filename = title
+ ext = fmt.get('ext')
+ if ext:
+ filename += '.' + ext
+ fmt['url'] = fmt['url'].replace(
+ '/videoplayback',
+ '/videoplayback/name/' + filename)
if settings.gather_googlevideo_domains:
with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'), 'a+', encoding='utf-8') as f: