From e9989af03a0d6044106030f164f807cee42c1420 Mon Sep 17 00:00:00 2001
From: James Taylor <user234683@users.noreply.github.com>
Date: Fri, 18 Sep 2020 14:37:24 -0700
Subject: Add tor video routing Includes non-tor video routing by default, so
 no more chances of the browser leaking headers or user agent to googlevideo
 Adjust settings upgrade system to facilitate change to route_tor setting. Add
 some more space on settings page for dropdown settings so does not overflow
 due to options with long names. Closes #7

---
 server.py                       | 36 +++++++++++++++++++++++++++------
 settings.py                     | 44 ++++++++++++++++++++++++++++++++---------
 youtube/templates/settings.html |  2 +-
 youtube/util.py                 | 37 +++++++++++++++++++++-------------
 youtube/watch.py                | 14 +++++++++----
 5 files changed, 99 insertions(+), 34 deletions(-)

diff --git a/server.py b/server.py
index cc59b19..c7b579c 100644
--- a/server.py
+++ b/server.py
@@ -32,24 +32,48 @@ def youtu_be(env, start_response):
         env['QUERY_STRING'] += '&v=' + id
     yield from yt_app(env, start_response)
 
-def proxy_site(env, start_response):
+def proxy_site(env, start_response, video=False):
     headers = {
         'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
         'Accept': '*/*',
     }
+    if 'HTTP_RANGE' in env:
+        headers['Range'] = env['HTTP_RANGE']
+
     url = "https://" + env['SERVER_NAME'] + env['PATH_INFO']
     if env['QUERY_STRING']:
         url += '?' + env['QUERY_STRING']
 
-
-    content, response = util.fetch_url(url, headers, return_response=True)
+    if video and settings.route_tor == 1:
+        response, cleanup_func = util.fetch_url_response(url, headers,
+                                                         use_tor=False)
+    else:
+        response, cleanup_func = util.fetch_url_response(url, headers)
 
     headers = response.getheaders()
     if isinstance(headers, urllib3._collections.HTTPHeaderDict):
         headers = headers.items()
 
-    start_response('200 OK', headers )
-    yield content
+    start_response(str(response.status) + ' ' + response.reason, headers)
+    while True:
+        # a bit over 3 seconds of 360p video
+        # we want each TCP packet to transmit in large multiples,
+        # such as 65,536, so we shouldn't read in small chunks
+        # such as 8192 lest that causes the socket library to limit the
+        # TCP window size
+        # Might need fine-tuning, since this gives us 4*65536
+        # The tradeoff is that larger values (such as 6 seconds) only 
+        # allows video to buffer in those increments, meaning user must wait
+        # until the entire chunk is downloaded before video starts playing
+        content_part = response.read(32*8192)
+        if not content_part:
+            break
+        yield content_part
+
+    cleanup_func(response)
+
+def proxy_video(env, start_response):
+    yield from proxy_site(env, start_response, video=True)
 
 site_handlers = {
     'youtube.com':yt_app,
@@ -57,7 +81,7 @@ site_handlers = {
     'ytimg.com': proxy_site,
     'yt3.ggpht.com': proxy_site,
     'lh3.googleusercontent.com': proxy_site,
-
+    'googlevideo.com': proxy_video,
 }
 
 def split_url(url):
diff --git a/settings.py b/settings.py
index a6c2d55..dd39c9b 100644
--- a/settings.py
+++ b/settings.py
@@ -9,10 +9,17 @@ from flask import request
 
 SETTINGS_INFO = collections.OrderedDict([
     ('route_tor', {
-        'type': bool,
-        'default': False,
+        'type': int,
+        'default': 0,
         'label': 'Route Tor',
-        'comment': '',
+        'comment': '''0 - Off
+1 - On, except video
+2 - On, including video (see warnings)''',
+        'options': [
+            (0, 'Off'),
+            (1, 'On, except video'),
+            (2, 'On, including video (see warnings)'),
+        ],
     }),
 
     ('port_number', {
@@ -148,7 +155,7 @@ For security reasons, enabling this is not recommended.''',
 
     ('settings_version', {
         'type': int,
-        'default': 2,
+        'default': 3,
         'comment': '''Do not change, remove, or comment out this value, or else your settings may be lost or corrupted''',
         'hidden': True,
     }),
@@ -186,8 +193,21 @@ def upgrade_to_2(settings_dict):
     if 'enable_related_videos' in settings_dict:
         new_settings['related_videos_mode'] = int(settings_dict['enable_related_videos'])
         del new_settings['enable_related_videos']
+    new_settings['settings_version'] = 2
+    return new_settings
+
+def upgrade_to_3(settings_dict):
+    new_settings = settings_dict.copy()
+    if 'route_tor' in settings_dict:
+        new_settings['route_tor'] = int(settings_dict['route_tor'])
+    new_settings['settings_version'] = 3
     return new_settings
 
+upgrade_functions = {
+    1: upgrade_to_2,
+    2: upgrade_to_3,
+}
+
 def log_ignored_line(line_number, message):
     print("WARNING: Ignoring settings.txt line " + str(node.lineno) + " (" + message + ")")
 
@@ -251,14 +271,20 @@ else:
 
             current_settings_dict[target.id] = node.value.__getattribute__(attributes[type(node.value)])
 
-
-        if 'settings_version' not in current_settings_dict:
-            print('Upgrading settings.txt')
-            current_settings_dict = add_missing_settings(upgrade_to_2(current_settings_dict))
+        # upgrades
+        latest_version = SETTINGS_INFO['settings_version']['default']
+        while current_settings_dict.get('settings_version',1) < latest_version:
+            current_version = current_settings_dict.get('settings_version', 1)
+            print('Upgrading settings.txt to version', current_version+1)
+            upgrade_func = upgrade_functions[current_version]
+            # Must add missing settings here rather than below because
+            # save_settings needs all settings to be present
+            current_settings_dict = add_missing_settings(
+                upgrade_func(current_settings_dict))
             save_settings(current_settings_dict)
 
         # some settings not in the file, add those missing settings to the file
-        elif not current_settings_dict.keys() >= SETTINGS_INFO.keys():
+        if not current_settings_dict.keys() >= SETTINGS_INFO.keys():
             print('Adding missing settings to settings.txt')
             current_settings_dict = add_missing_settings(current_settings_dict)
             save_settings(current_settings_dict)
diff --git a/youtube/templates/settings.html b/youtube/templates/settings.html
index 19a2461..5d1df5f 100644
--- a/youtube/templates/settings.html
+++ b/youtube/templates/settings.html
@@ -4,7 +4,7 @@
 {% block style %}
     .settings-form {
         margin: auto;
-        width: 500px;
+        width: 600px;
         margin-top:10px;
         padding: 10px;
         display: block;
diff --git a/youtube/util.py b/youtube/util.py
index b19f91b..77c4fb1 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -119,8 +119,11 @@ def decode_content(content, encoding_header):
             content = gzip.decompress(content)
     return content
 
-def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False, debug_name=None):
+def fetch_url_response(url, headers=(), timeout=15, data=None,
+                       cookiejar_send=None, cookiejar_receive=None,
+                       use_tor=True):
     '''
+    returns response, cleanup_function
     When cookiejar_send is set to a CookieJar object,
      those cookies will be sent in the request (but cookies in response will not be merged into it)
     When cookiejar_receive is set to a CookieJar object,
@@ -147,8 +150,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
         elif not isinstance(data, bytes):
             data = urllib.parse.urlencode(data).encode('ascii')
 
-    start_time = time.time()
-
     if cookiejar_send is not None or cookiejar_receive is not None:     # Use urllib
         req = urllib.request.Request(url, data=data, headers=headers)
 
@@ -160,19 +161,30 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
             opener = urllib.request.build_opener(cookie_processor)
 
         response = opener.open(req, timeout=timeout)
-        response_time = time.time()
-
-
-        content = response.read()
+        cleanup_func = (lambda r: None)
 
     else:           # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
         pool = get_pool(use_tor and settings.route_tor)
-
         response = pool.request(method, url, headers=headers, timeout=timeout, preload_content=False, decode_content=False)
-        response_time = time.time()
+        cleanup_func = (lambda r: r.release_conn())
+
+    return response, cleanup_func
 
-        content = response.read()
-        response.release_conn()
+def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
+              cookiejar_send=None, cookiejar_receive=None, use_tor=True,
+              debug_name=None):
+    start_time = time.time()
+
+    response, cleanup_func = fetch_url_response(
+        url, headers, timeout=timeout,
+        cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
+        use_tor=use_tor)
+    response_time = time.time()
+
+    content = response.read()
+    read_finish = time.time()
+
+    cleanup_func(response)  # release_connection for urllib3
 
     if (response.status == 429
             and content.startswith(b'<!DOCTYPE')
@@ -185,7 +197,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
     elif response.status >= 400:
         raise FetchError(str(response.status), reason=response.reason, ip=None)
 
-    read_finish = time.time()
     if report_text:
         print(report_text, '    Latency:', round(response_time - start_time,3), '    Read time:', round(read_finish - response_time,3))
     content = decode_content(content, response.getheader('Content-Encoding', default='identity'))
@@ -198,8 +209,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja
         with open(os.path.join(save_dir, debug_name), 'wb') as f:
             f.write(content)
 
-    if return_response:
-        return content, response
     return content
 
 def head(url, use_tor=False, report_text=None, max_redirects=10):
diff --git a/youtube/watch.py b/youtube/watch.py
index c1f5e1e..cedf632 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -24,7 +24,7 @@ except FileNotFoundError:
 
 def get_video_sources(info):
     video_sources = []
-    if not settings.theater_mode:
+    if (not settings.theater_mode) or settings.route_tor == 2:
         max_resolution = 360
     else:
         max_resolution = settings.default_resolution
@@ -270,10 +270,11 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None):
     else:
         info['hls_formats'] = []
 
-    # check for 403
+    # check for 403. Unnecessary for tor video routing b/c ip address is same
     info['invidious_used'] = False
     info['invidious_reload_button'] = False
-    if settings.route_tor and info['formats'] and info['formats'][0]['url']:
+    if (settings.route_tor == 1
+            and info['formats'] and info['formats'][0]['url']):
         try:
             response = util.head(info['formats'][0]['url'],
                 report_text='Checked for URL access')
@@ -408,10 +409,10 @@ def get_watch_page(video_id=None):
         "author":   info['author'],
     }
 
+    # prefix urls, and other post-processing not handled by yt_data_extract
     for item in info['related_videos']:
         util.prefix_urls(item)
         util.add_extra_html_info(item)
-
     if info['playlist']:
         playlist_id = info['playlist']['id']
         for item in info['playlist']['items']:
@@ -423,6 +424,11 @@ def get_watch_page(video_id=None):
                 item['url'] += '&index=' + str(item['index'])
         info['playlist']['author_url'] = util.prefix_url(
             info['playlist']['author_url'])
+    # Don't prefix hls_formats for now because the urls inside the manifest
+    # would need to be prefixed as well.
+    for fmt in info['formats']:
+        fmt['url'] = util.prefix_url(fmt['url'])
+
 
     if settings.gather_googlevideo_domains:
         with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'), 'a+', encoding='utf-8') as f:
-- 
cgit v1.2.3