aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Taylor <user234683@users.noreply.github.com>2020-10-22 14:30:33 -0700
committerJames Taylor <user234683@users.noreply.github.com>2020-10-22 14:30:33 -0700
commit5f4884dce8e3eb3215ee8b97469a741310669083 (patch)
tree68b9406805ae2ea6e5e2e58480ba31823a6bbd0f
parentf8b6db14800806c132c6ec5c587832c0200ada6e (diff)
downloadyt-local-5f4884dce8e3eb3215ee8b97469a741310669083.tar.lz
yt-local-5f4884dce8e3eb3215ee8b97469a741310669083.tar.xz
yt-local-5f4884dce8e3eb3215ee8b97469a741310669083.zip
Put vid title at end of download urls so downloads w/ that filename
-rw-r--r--server.py3
-rw-r--r--youtube/util.py54
-rw-r--r--youtube/watch.py11
3 files changed, 68 insertions, 0 deletions
diff --git a/server.py b/server.py
index a7a3fc8..e456e3c 100644
--- a/server.py
+++ b/server.py
@@ -41,6 +41,9 @@ def proxy_site(env, start_response, video=False):
headers['Range'] = env['HTTP_RANGE']
url = "https://" + env['SERVER_NAME'] + env['PATH_INFO']
+ # remove /name portion
+ if video and '/videoplayback/name/' in url:
+ url = url[0:url.rfind('/name/')]
if env['QUERY_STRING']:
url += '?' + env['QUERY_STRING']
diff --git a/youtube/util.py b/youtube/util.py
index 579f512..e468224 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -15,6 +15,7 @@ import json
import gevent
import gevent.queue
import gevent.lock
+import collections
# The trouble with the requests library: It ships its own certificate bundle via certifi
# instead of using the system certificate store, meaning self-signed certificates
@@ -435,3 +436,56 @@ def check_gevent_exceptions(*tasks):
if task.exception:
raise task.exception
+
+# https://stackoverflow.com/a/62888
+replacement_map = collections.OrderedDict([
+ ('<', '_'),
+ ('>', '_'),
+ (': ', ' - '),
+ (':', '-'),
+ ('"', "'"),
+ ('/', '_'),
+ ('\\', '_'),
+ ('|', '-'),
+ ('?', ''),
+ ('*', '_'),
+ ('\t', ' '),
+])
+DOS_names = {'con', 'prn', 'aux', 'nul', 'com0', 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9', 'lpt0', 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9'}
+def to_valid_filename(name):
+ '''Changes the name so it's valid on Windows, Linux, and Mac'''
+ # See https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file
+ # for Windows specs
+
+ # Additional recommendations for Linux:
+ # https://dwheeler.com/essays/fixing-unix-linux-filenames.html#standards
+
+ # remove control characters
+ name = re.sub(r'[\x00-\x1f]', '_', name)
+
+ # reserved characters
+ for reserved_char, replacement in replacement_map.items():
+ name = name.replace(reserved_char, replacement)
+
+ # check for all periods/spaces
+ if all(c == '.' or c == ' ' for c in name):
+ name = '_'*len(name)
+
+ # remove trailing periods and spaces
+ name = name.rstrip('. ')
+
+ # check for reserved DOS names, such as nul or nul.txt
+ base_ext_parts = name.rsplit('.', maxsplit=1)
+ if base_ext_parts[0].lower() in DOS_names:
+ base_ext_parts[0] += '_'
+ name = '.'.join(base_ext_parts)
+
+ # check for blank name
+ if name == '':
+ name = '_'
+
+ # check if name begins with a hyphen, period, or space
+ if name[0] in ('-', '.', ' '):
+ name = '_' + name
+
+ return name
diff --git a/youtube/watch.py b/youtube/watch.py
index 11ef9f2..bc2008c 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -398,6 +398,17 @@ def get_watch_page(video_id=None):
for fmt in info['formats']:
fmt['url'] = util.prefix_url(fmt['url'])
+ # Add video title to end of url path so it has a filename other than just
+ # "videoplayback" when downloaded
+ title = urllib.parse.quote(util.to_valid_filename(info['title']))
+ for fmt in info['formats']:
+ filename = title
+ ext = fmt.get('ext')
+ if ext:
+ filename += '.' + ext
+ fmt['url'] = fmt['url'].replace(
+ '/videoplayback',
+ '/videoplayback/name/' + filename)
if settings.gather_googlevideo_domains:
with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'), 'a+', encoding='utf-8') as f: