From e9989af03a0d6044106030f164f807cee42c1420 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Fri, 18 Sep 2020 14:37:24 -0700 Subject: Add tor video routing Includes non-tor video routing by default, so no more chances of the browser leaking headers or user agent to googlevideo Adjust settings upgrade system to facilitate change to route_tor setting. Add some more space on settings page for dropdown settings so does not overflow due to options with long names. Closes #7 --- server.py | 36 +++++++++++++++++++++++++++------ settings.py | 44 ++++++++++++++++++++++++++++++++--------- youtube/templates/settings.html | 2 +- youtube/util.py | 37 +++++++++++++++++++++------------- youtube/watch.py | 14 +++++++++---- 5 files changed, 99 insertions(+), 34 deletions(-) diff --git a/server.py b/server.py index cc59b19..c7b579c 100644 --- a/server.py +++ b/server.py @@ -32,24 +32,48 @@ def youtu_be(env, start_response): env['QUERY_STRING'] += '&v=' + id yield from yt_app(env, start_response) -def proxy_site(env, start_response): +def proxy_site(env, start_response, video=False): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)', 'Accept': '*/*', } + if 'HTTP_RANGE' in env: + headers['Range'] = env['HTTP_RANGE'] + url = "https://" + env['SERVER_NAME'] + env['PATH_INFO'] if env['QUERY_STRING']: url += '?' + env['QUERY_STRING'] - - content, response = util.fetch_url(url, headers, return_response=True) + if video and settings.route_tor == 1: + response, cleanup_func = util.fetch_url_response(url, headers, + use_tor=False) + else: + response, cleanup_func = util.fetch_url_response(url, headers) headers = response.getheaders() if isinstance(headers, urllib3._collections.HTTPHeaderDict): headers = headers.items() - start_response('200 OK', headers ) - yield content + start_response(str(response.status) + ' ' + response.reason, headers) + while True: + # a bit over 3 seconds of 360p video + # we want each TCP packet to transmit in large multiples, + # such as 65,536, so we shouldn't read in small chunks + # such as 8192 lest that causes the socket library to limit the + # TCP window size + # Might need fine-tuning, since this gives us 4*65536 + # The tradeoff is that larger values (such as 6 seconds) only + # allows video to buffer in those increments, meaning user must wait + # until the entire chunk is downloaded before video starts playing + content_part = response.read(32*8192) + if not content_part: + break + yield content_part + + cleanup_func(response) + +def proxy_video(env, start_response): + yield from proxy_site(env, start_response, video=True) site_handlers = { 'youtube.com':yt_app, @@ -57,7 +81,7 @@ site_handlers = { 'ytimg.com': proxy_site, 'yt3.ggpht.com': proxy_site, 'lh3.googleusercontent.com': proxy_site, - + 'googlevideo.com': proxy_video, } def split_url(url): diff --git a/settings.py b/settings.py index a6c2d55..dd39c9b 100644 --- a/settings.py +++ b/settings.py @@ -9,10 +9,17 @@ from flask import request SETTINGS_INFO = collections.OrderedDict([ ('route_tor', { - 'type': bool, - 'default': False, + 'type': int, + 'default': 0, 'label': 'Route Tor', - 'comment': '', + 'comment': '''0 - Off +1 - On, except video +2 - On, including video (see warnings)''', + 'options': [ + (0, 'Off'), + (1, 'On, except video'), + (2, 'On, including video (see warnings)'), + ], }), ('port_number', { @@ -148,7 +155,7 @@ For security reasons, enabling this is not recommended.''', ('settings_version', { 'type': int, - 'default': 2, + 'default': 3, 'comment': '''Do not change, remove, or comment out this value, or else your settings may be lost or corrupted''', 'hidden': True, }), @@ -186,8 +193,21 @@ def upgrade_to_2(settings_dict): if 'enable_related_videos' in settings_dict: new_settings['related_videos_mode'] = int(settings_dict['enable_related_videos']) del new_settings['enable_related_videos'] + new_settings['settings_version'] = 2 + return new_settings + +def upgrade_to_3(settings_dict): + new_settings = settings_dict.copy() + if 'route_tor' in settings_dict: + new_settings['route_tor'] = int(settings_dict['route_tor']) + new_settings['settings_version'] = 3 return new_settings +upgrade_functions = { + 1: upgrade_to_2, + 2: upgrade_to_3, +} + def log_ignored_line(line_number, message): print("WARNING: Ignoring settings.txt line " + str(node.lineno) + " (" + message + ")") @@ -251,14 +271,20 @@ else: current_settings_dict[target.id] = node.value.__getattribute__(attributes[type(node.value)]) - - if 'settings_version' not in current_settings_dict: - print('Upgrading settings.txt') - current_settings_dict = add_missing_settings(upgrade_to_2(current_settings_dict)) + # upgrades + latest_version = SETTINGS_INFO['settings_version']['default'] + while current_settings_dict.get('settings_version',1) < latest_version: + current_version = current_settings_dict.get('settings_version', 1) + print('Upgrading settings.txt to version', current_version+1) + upgrade_func = upgrade_functions[current_version] + # Must add missing settings here rather than below because + # save_settings needs all settings to be present + current_settings_dict = add_missing_settings( + upgrade_func(current_settings_dict)) save_settings(current_settings_dict) # some settings not in the file, add those missing settings to the file - elif not current_settings_dict.keys() >= SETTINGS_INFO.keys(): + if not current_settings_dict.keys() >= SETTINGS_INFO.keys(): print('Adding missing settings to settings.txt') current_settings_dict = add_missing_settings(current_settings_dict) save_settings(current_settings_dict) diff --git a/youtube/templates/settings.html b/youtube/templates/settings.html index 19a2461..5d1df5f 100644 --- a/youtube/templates/settings.html +++ b/youtube/templates/settings.html @@ -4,7 +4,7 @@ {% block style %} .settings-form { margin: auto; - width: 500px; + width: 600px; margin-top:10px; padding: 10px; display: block; diff --git a/youtube/util.py b/youtube/util.py index b19f91b..77c4fb1 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -119,8 +119,11 @@ def decode_content(content, encoding_header): content = gzip.decompress(content) return content -def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False, debug_name=None): +def fetch_url_response(url, headers=(), timeout=15, data=None, + cookiejar_send=None, cookiejar_receive=None, + use_tor=True): ''' + returns response, cleanup_function When cookiejar_send is set to a CookieJar object, those cookies will be sent in the request (but cookies in response will not be merged into it) When cookiejar_receive is set to a CookieJar object, @@ -147,8 +150,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja elif not isinstance(data, bytes): data = urllib.parse.urlencode(data).encode('ascii') - start_time = time.time() - if cookiejar_send is not None or cookiejar_receive is not None: # Use urllib req = urllib.request.Request(url, data=data, headers=headers) @@ -160,19 +161,30 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja opener = urllib.request.build_opener(cookie_processor) response = opener.open(req, timeout=timeout) - response_time = time.time() - - - content = response.read() + cleanup_func = (lambda r: None) else: # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them. pool = get_pool(use_tor and settings.route_tor) - response = pool.request(method, url, headers=headers, timeout=timeout, preload_content=False, decode_content=False) - response_time = time.time() + cleanup_func = (lambda r: r.release_conn()) + + return response, cleanup_func - content = response.read() - response.release_conn() +def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, + cookiejar_send=None, cookiejar_receive=None, use_tor=True, + debug_name=None): + start_time = time.time() + + response, cleanup_func = fetch_url_response( + url, headers, timeout=timeout, + cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive, + use_tor=use_tor) + response_time = time.time() + + content = response.read() + read_finish = time.time() + + cleanup_func(response) # release_connection for urllib3 if (response.status == 429 and content.startswith(b'= 400: raise FetchError(str(response.status), reason=response.reason, ip=None) - read_finish = time.time() if report_text: print(report_text, ' Latency:', round(response_time - start_time,3), ' Read time:', round(read_finish - response_time,3)) content = decode_content(content, response.getheader('Content-Encoding', default='identity')) @@ -198,8 +209,6 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookieja with open(os.path.join(save_dir, debug_name), 'wb') as f: f.write(content) - if return_response: - return content, response return content def head(url, use_tor=False, report_text=None, max_redirects=10): diff --git a/youtube/watch.py b/youtube/watch.py index c1f5e1e..cedf632 100644 --- a/youtube/watch.py +++ b/youtube/watch.py @@ -24,7 +24,7 @@ except FileNotFoundError: def get_video_sources(info): video_sources = [] - if not settings.theater_mode: + if (not settings.theater_mode) or settings.route_tor == 2: max_resolution = 360 else: max_resolution = settings.default_resolution @@ -270,10 +270,11 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None): else: info['hls_formats'] = [] - # check for 403 + # check for 403. Unnecessary for tor video routing b/c ip address is same info['invidious_used'] = False info['invidious_reload_button'] = False - if settings.route_tor and info['formats'] and info['formats'][0]['url']: + if (settings.route_tor == 1 + and info['formats'] and info['formats'][0]['url']): try: response = util.head(info['formats'][0]['url'], report_text='Checked for URL access') @@ -408,10 +409,10 @@ def get_watch_page(video_id=None): "author": info['author'], } + # prefix urls, and other post-processing not handled by yt_data_extract for item in info['related_videos']: util.prefix_urls(item) util.add_extra_html_info(item) - if info['playlist']: playlist_id = info['playlist']['id'] for item in info['playlist']['items']: @@ -423,6 +424,11 @@ def get_watch_page(video_id=None): item['url'] += '&index=' + str(item['index']) info['playlist']['author_url'] = util.prefix_url( info['playlist']['author_url']) + # Don't prefix hls_formats for now because the urls inside the manifest + # would need to be prefixed as well. + for fmt in info['formats']: + fmt['url'] = util.prefix_url(fmt['url']) + if settings.gather_googlevideo_domains: with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'), 'a+', encoding='utf-8') as f: -- cgit v1.2.3