aboutsummaryrefslogtreecommitdiffstats
path: root/server.py
diff options
context:
space:
mode:
Diffstat (limited to 'server.py')
-rw-r--r--server.py276
1 files changed, 214 insertions, 62 deletions
diff --git a/server.py b/server.py
index 5b20814..8da5411 100644
--- a/server.py
+++ b/server.py
@@ -1,59 +1,181 @@
+#!/usr/bin/env python3
from gevent import monkey
monkey.patch_all()
import gevent.socket
+from youtube import yt_app
+from youtube import util
+
+# these are just so the files get run - they import yt_app and add routes to it
+from youtube import watch, search, playlist, channel, local_playlist, comments, subscriptions
+
+import settings
+
from gevent.pywsgi import WSGIServer
-from youtube.youtube import youtube
import urllib
+import urllib3
import socket
-import socks
+import socks, sockshandler
import subprocess
import re
+import sys
+import time
-import settings
-
-
-BAN_FILE = "banned_addresses.txt"
-try:
- with open(BAN_FILE, 'r') as f:
- banned_addresses = f.read().splitlines()
-except FileNotFoundError:
- banned_addresses = ()
-
-def ban_address(address):
- banned_addresses.append(address)
- with open(BAN_FILE, 'a') as f:
- f.write(address + "\n")
-
def youtu_be(env, start_response):
id = env['PATH_INFO'][1:]
env['PATH_INFO'] = '/watch'
- env['QUERY_STRING'] = 'v=' + id
- return youtube(env, start_response)
+ if not env['QUERY_STRING']:
+ env['QUERY_STRING'] = 'v=' + id
+ else:
+ env['QUERY_STRING'] += '&v=' + id
+ yield from yt_app(env, start_response)
+
+
+RANGE_RE = re.compile(r'bytes=(\d+-(?:\d+)?)')
+def parse_range(range_header, content_length):
+ # Range header can be like bytes=200-1000 or bytes=200-
+ # amount_received is the length of bytes from the range that have already
+ # been received
+ match = RANGE_RE.fullmatch(range_header.strip())
+ if not match:
+ print('Unsupported range header format:', range_header)
+ return None
+ start, end = match.group(1).split('-')
+ start_byte = int(start)
+ if not end:
+ end_byte = start_byte + content_length - 1
+ else:
+ end_byte = int(end)
+ return start_byte, end_byte
-def proxy_site(env, start_response):
- headers = {
+
+def proxy_site(env, start_response, video=False):
+ send_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
'Accept': '*/*',
}
+ current_range_start = 0
+ range_end = None
+ if 'HTTP_RANGE' in env:
+ send_headers['Range'] = env['HTTP_RANGE']
+
url = "https://" + env['SERVER_NAME'] + env['PATH_INFO']
+ # remove /name portion
+ if video and '/videoplayback/name/' in url:
+ url = url[0:url.rfind('/name/')]
if env['QUERY_STRING']:
url += '?' + env['QUERY_STRING']
- req = urllib.request.Request(url, headers=headers)
- response = urllib.request.urlopen(req, timeout = 10)
- start_response('200 OK', response.getheaders() )
- return response.read()
+
+ try_num = 1
+ first_attempt = True
+ current_attempt_position = 0
+ while try_num <= 3: # Try a given byte position three times
+ if not first_attempt:
+ print('(Try %d)' % try_num, 'Trying with', send_headers['Range'])
+
+ if video:
+ params = urllib.parse.parse_qs(env['QUERY_STRING'])
+ params_use_tor = int(params.get('use_tor', '0')[0])
+ use_tor = (settings.route_tor == 2) or params_use_tor
+ response, cleanup_func = util.fetch_url_response(url, send_headers,
+ use_tor=use_tor,
+ max_redirects=10)
+ else:
+ response, cleanup_func = util.fetch_url_response(url, send_headers)
+
+ response_headers = response.headers
+ if isinstance(response_headers, urllib3._collections.HTTPHeaderDict):
+ response_headers = response_headers.items()
+ if video:
+ response_headers = (list(response_headers)
+ +[('Access-Control-Allow-Origin', '*')])
+
+ if first_attempt:
+ start_response(str(response.status) + ' ' + response.reason,
+ response_headers)
+
+ content_length = int(dict(response_headers).get('Content-Length', 0))
+ if response.status >= 400:
+ print('Error: YouTube returned "%d %s" while routing %s' % (
+ response.status, response.reason, url.split('?')[0]))
+
+ total_received = 0
+ retry = False
+ while True:
+ # a bit over 3 seconds of 360p video
+ # we want each TCP packet to transmit in large multiples,
+ # such as 65,536, so we shouldn't read in small chunks
+ # such as 8192 lest that causes the socket library to limit the
+ # TCP window size
+ # Might need fine-tuning, since this gives us 4*65536
+ # The tradeoff is that larger values (such as 6 seconds) only
+ # allows video to buffer in those increments, meaning user must
+ # wait until the entire chunk is downloaded before video starts
+ # playing
+ content_part = response.read(32*8192)
+ total_received += len(content_part)
+ if not content_part:
+ # Sometimes YouTube closes the connection before sending all of
+ # the content. Retry with a range request for the missing
+ # content. See
+ # https://github.com/user234683/youtube-local/issues/40
+ if total_received < content_length:
+ if 'Range' in send_headers:
+ int_range = parse_range(send_headers['Range'],
+ content_length)
+ if not int_range: # give up b/c unrecognized range
+ break
+ start, end = int_range
+ else:
+ start, end = 0, (content_length - 1)
+
+ fail_byte = start + total_received
+ send_headers['Range'] = 'bytes=%d-%d' % (fail_byte, end)
+ print(
+ 'Warning: YouTube closed the connection before byte',
+ str(fail_byte) + '.', 'Expected', start+content_length,
+ 'bytes.'
+ )
+
+ retry = True
+ first_attempt = False
+ if fail_byte == current_attempt_position:
+ try_num += 1
+ else:
+ try_num = 1
+ current_attempt_position = fail_byte
+ break
+ yield content_part
+ cleanup_func(response)
+ if retry:
+ # YouTube will return 503 Service Unavailable if you do a bunch
+ # of range requests too quickly.
+ time.sleep(1)
+ continue
+ else:
+ break
+ else: # no break
+ print('Error: YouTube closed the connection before',
+ 'providing all content. Retried three times:', url.split('?')[0])
+
+
+def proxy_video(env, start_response):
+ yield from proxy_site(env, start_response, video=True)
+
site_handlers = {
- 'youtube.com':youtube,
- 'youtu.be':youtu_be,
+ 'youtube.com': yt_app,
+ 'youtube-nocookie.com': yt_app,
+ 'youtu.be': youtu_be,
'ytimg.com': proxy_site,
- 'yt3.ggpht.com': proxy_site,
- 'lh3.googleusercontent.com': proxy_site,
-
+ 'ggpht.com': proxy_site,
+ 'googleusercontent.com': proxy_site,
+ 'sponsor.ajay.app': proxy_site,
+ 'googlevideo.com': proxy_video,
}
+
def split_url(url):
''' Split https://sub.example.com/foo/bar.html into ('sub.example.com', '/foo/bar.html')'''
# XXX: Is this regex safe from REDOS?
@@ -61,35 +183,41 @@ def split_url(url):
match = re.match(r'(?:https?://)?([\w-]+(?:\.[\w-]+)+?)(/.*|$)', url)
if match is None:
raise ValueError('Invalid or unsupported url: ' + url)
-
+
return match.group(1), match.group(2)
-
def error_code(code, start_response):
start_response(code, ())
return code.encode()
+
def site_dispatch(env, start_response):
client_address = env['REMOTE_ADDR']
try:
+ # correct malformed query string with ? separators instead of &
+ env['QUERY_STRING'] = env['QUERY_STRING'].replace('?', '&')
+
+ # Fix PATH_INFO for UWSGI
+ if 'REQUEST_URI' in env:
+ env['PATH_INFO'] = urllib.parse.unquote(
+ env['REQUEST_URI'].split('?')[0]
+ )
+
method = env['REQUEST_METHOD']
path = env['PATH_INFO']
- if client_address in banned_addresses:
- yield error_code('403 Fuck Off', start_response)
- return
- if method=="POST" and client_address not in ('127.0.0.1', '::1'):
+
+ if (method == "POST"
+ and client_address not in ('127.0.0.1', '::1')
+ and not settings.allow_foreign_post_requests):
yield error_code('403 Forbidden', start_response)
return
- if "phpmyadmin" in path or (path == "/" and method == "HEAD"):
- ban_address(client_address)
- start_response('403 Fuck Off', ())
- yield b'403 Fuck Off'
+
+ # redirect localhost:8080 to localhost:8080/https://youtube.com
+ if path == '' or path == '/':
+ start_response('302 Found', [('Location', '/https://youtube.com')])
return
- '''if env['QUERY_STRING']:
- path += '?' + env['QUERY_STRING']'''
- #path_parts = urllib.parse.urlparse(path)
try:
env['SERVER_NAME'], env['PATH_INFO'] = split_url(path[1:])
except ValueError:
@@ -108,18 +236,11 @@ def site_dispatch(env, start_response):
except KeyError:
continue
else:
- yield handler(env, start_response)
+ yield from handler(env, start_response)
break
else: # did not break
yield error_code('404 Not Found', start_response)
return
-
-
- except socket.error as e:
- start_response('502 Bad Gateway', ())
- print(str(e))
- yield b'502 Bad Gateway'
-
except Exception:
start_response('500 Internal Server Error', ())
yield b'500 Internal Server Error'
@@ -127,17 +248,48 @@ def site_dispatch(env, start_response):
return
+class FilteredRequestLog:
+ '''Don't log noisy thumbnail and avatar requests'''
+ filter_re = re.compile(r'''(?x)
+ "GET\ /https://(
+ i[.]ytimg[.]com/|
+ www[.]youtube[.]com/data/subscription_thumbnails/|
+ yt3[.]ggpht[.]com/|
+ www[.]youtube[.]com/api/timedtext|
+ [-\w]+[.]googlevideo[.]com/).*"\ (200|206)
+ ''')
+
+ def __init__(self):
+ pass
+
+ def write(self, s):
+ if not self.filter_re.search(s):
+ sys.stderr.write(s)
+
+
+if __name__ == '__main__':
+ if settings.allow_foreign_addresses:
+ server = WSGIServer(('0.0.0.0', settings.port_number), site_dispatch,
+ log=FilteredRequestLog())
+ ip_server = '0.0.0.0'
+ else:
+ server = WSGIServer(('127.0.0.1', settings.port_number), site_dispatch,
+ log=FilteredRequestLog())
+ ip_server = '127.0.0.1'
+
+ print('Starting httpserver at http://%s:%s/' %
+ (ip_server, settings.port_number))
+ # Show privacy-focused tips
+ print('')
+ print('Privacy & Rate Limiting Tips:')
+ print(' - Enable Tor routing in /settings for anonymity and better rate limits')
+ print(' - The system auto-retries with exponential backoff (max 5 retries)')
+ print(' - Wait a few minutes if you hit rate limits (429)')
+ print(' - For maximum privacy: Use Tor + No cookies')
+ print('')
-if settings.route_tor:
- #subprocess.Popen(TOR_PATH)
- socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, '127.0.0.1', 9150)
- socket.socket = socks.socksocket
- gevent.socket.socket = socks.socksocket
+ server.serve_forever()
-if settings.allow_foreign_addresses:
- server = WSGIServer(('0.0.0.0', settings.port_number), site_dispatch)
-else:
- server = WSGIServer(('127.0.0.1', settings.port_number), site_dispatch)
-print('Started httpserver on port ' , settings.port_number)
-server.serve_forever()
+# for uwsgi, gunicorn, etc.
+application = site_dispatch