From 5f4884dce8e3eb3215ee8b97469a741310669083 Mon Sep 17 00:00:00 2001
From: James Taylor <user234683@users.noreply.github.com>
Date: Thu, 22 Oct 2020 14:30:33 -0700
Subject: Put vid title at end of download urls so downloads w/ that filename

---
 server.py        |  3 +++
 youtube/util.py  | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 youtube/watch.py | 11 +++++++++++
 3 files changed, 68 insertions(+)

diff --git a/server.py b/server.py
index a7a3fc8..e456e3c 100644
--- a/server.py
+++ b/server.py
@@ -41,6 +41,9 @@ def proxy_site(env, start_response, video=False):
         headers['Range'] = env['HTTP_RANGE']
 
     url = "https://" + env['SERVER_NAME'] + env['PATH_INFO']
+    # remove /name portion
+    if video and '/videoplayback/name/' in url:
+        url = url[0:url.rfind('/name/')]
     if env['QUERY_STRING']:
         url += '?' + env['QUERY_STRING']
 
diff --git a/youtube/util.py b/youtube/util.py
index 579f512..e468224 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -15,6 +15,7 @@ import json
 import gevent
 import gevent.queue
 import gevent.lock
+import collections
 
 # The trouble with the requests library: It ships its own certificate bundle via certifi
 #  instead of using the system certificate store, meaning self-signed certificates
@@ -435,3 +436,56 @@ def check_gevent_exceptions(*tasks):
         if task.exception:
             raise task.exception
 
+
+# https://stackoverflow.com/a/62888
+replacement_map = collections.OrderedDict([
+    ('<', '_'),
+    ('>', '_'),
+    (': ', ' - '),
+    (':', '-'),
+    ('"', "'"),
+    ('/', '_'),
+    ('\\', '_'),
+    ('|', '-'),
+    ('?', ''),
+    ('*', '_'),
+    ('\t', ' '),
+])
+DOS_names = {'con', 'prn', 'aux', 'nul', 'com0', 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9', 'lpt0', 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9'}
+def to_valid_filename(name):
+    '''Changes the name so it's valid on Windows, Linux, and Mac'''
+    # See https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file
+    # for Windows specs
+
+    # Additional recommendations for Linux:
+    # https://dwheeler.com/essays/fixing-unix-linux-filenames.html#standards
+
+    # remove control characters
+    name = re.sub(r'[\x00-\x1f]', '_', name)
+
+    # reserved characters
+    for reserved_char, replacement in replacement_map.items():
+        name = name.replace(reserved_char, replacement)
+
+    # check for all periods/spaces
+    if all(c == '.' or c == ' ' for c in name):
+        name = '_'*len(name)
+
+    # remove trailing periods and spaces
+    name = name.rstrip('. ')
+
+    # check for reserved DOS names, such as nul or nul.txt
+    base_ext_parts = name.rsplit('.', maxsplit=1)
+    if base_ext_parts[0].lower() in DOS_names:
+        base_ext_parts[0] += '_'
+    name = '.'.join(base_ext_parts)
+
+    # check for blank name
+    if name == '':
+        name = '_'
+
+    # check if name begins with a hyphen, period, or space
+    if name[0] in ('-', '.', ' '):
+        name = '_' + name
+
+    return name
diff --git a/youtube/watch.py b/youtube/watch.py
index 11ef9f2..bc2008c 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -398,6 +398,17 @@ def get_watch_page(video_id=None):
     for fmt in info['formats']:
         fmt['url'] = util.prefix_url(fmt['url'])
 
+    # Add video title to end of url path so it has a filename other than just
+    # "videoplayback" when downloaded
+    title = urllib.parse.quote(util.to_valid_filename(info['title']))
+    for fmt in info['formats']:
+        filename = title
+        ext = fmt.get('ext')
+        if ext:
+            filename += '.' + ext
+        fmt['url'] = fmt['url'].replace(
+            '/videoplayback',
+            '/videoplayback/name/' + filename)
 
     if settings.gather_googlevideo_domains:
         with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'), 'a+', encoding='utf-8') as f:
-- 
cgit v1.2.3