aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Taylor <user234683@users.noreply.github.com>2020-12-18 21:17:06 -0800
committerJesús <heckyel@hyperbola.info>2020-12-19 10:49:08 -0500
commitafb2aca4609d554858e4e669409f7c30afdc7e63 (patch)
tree64517abffa6b0b2c9a07b4e7a0cbf7f3bd64112a
parent6b6a6653a02da9a76b101ba6c4e05366d49ee034 (diff)
downloadyt-local-afb2aca4609d554858e4e669409f7c30afdc7e63.tar.lz
yt-local-afb2aca4609d554858e4e669409f7c30afdc7e63.tar.xz
yt-local-afb2aca4609d554858e4e669409f7c30afdc7e63.zip
video routing: Range request missing content when connection closed
googlevideo sometimes doesn't send all video content and closes the connection. Retry with a range request for the bytes needed a maximum of three times. Fixes first type of #40 Signed-off-by: Jesús <heckyel@hyperbola.info>
-rw-r--r--server.py142
1 files changed, 110 insertions, 32 deletions
diff --git a/server.py b/server.py
index 6c39f21..b388218 100644
--- a/server.py
+++ b/server.py
@@ -19,6 +19,7 @@ import socks, sockshandler
import subprocess
import re
import sys
+import time
def youtu_be(env, start_response):
@@ -31,13 +32,33 @@ def youtu_be(env, start_response):
yield from yt_app(env, start_response)
+RANGE_RE = re.compile(r'bytes=(\d+-(?:\d+)?)')
+def parse_range(range_header, content_length):
+ # Range header can be like bytes=200-1000 or bytes=200-
+ # amount_received is the length of bytes from the range that have already
+ # been received
+ match = RANGE_RE.fullmatch(range_header.strip())
+ if not match:
+ print('Unsupported range header format:', range_header)
+ return None
+ start, end = match.group(1).split('-')
+ start_byte = int(start)
+ if not end:
+ end_byte = start_byte + content_length - 1
+ else:
+ end_byte = int(end)
+ return start_byte, end_byte
+
+
def proxy_site(env, start_response, video=False):
- headers = {
+ send_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
'Accept': '*/*',
}
+ current_range_start = 0
+ range_end = None
if 'HTTP_RANGE' in env:
- headers['Range'] = env['HTTP_RANGE']
+ send_headers['Range'] = env['HTTP_RANGE']
url = "https://" + env['SERVER_NAME'] + env['PATH_INFO']
# remove /name portion
@@ -46,37 +67,94 @@ def proxy_site(env, start_response, video=False):
if env['QUERY_STRING']:
url += '?' + env['QUERY_STRING']
- if video:
- params = urllib.parse.parse_qs(env['QUERY_STRING'])
- params_use_tor = int(params.get('use_tor', '0')[0])
- use_tor = (settings.route_tor == 2) or params_use_tor
- response, cleanup_func = util.fetch_url_response(url, headers,
- use_tor=use_tor,
- max_redirects=10)
- else:
- response, cleanup_func = util.fetch_url_response(url, headers)
-
- headers = response.getheaders()
- if isinstance(headers, urllib3._collections.HTTPHeaderDict):
- headers = headers.items()
-
- start_response(str(response.status) + ' ' + response.reason, headers)
- while True:
- # a bit over 3 seconds of 360p video
- # we want each TCP packet to transmit in large multiples,
- # such as 65,536, so we shouldn't read in small chunks
- # such as 8192 lest that causes the socket library to limit the
- # TCP window size
- # Might need fine-tuning, since this gives us 4*65536
- # The tradeoff is that larger values (such as 6 seconds) only
- # allows video to buffer in those increments, meaning user must wait
- # until the entire chunk is downloaded before video starts playing
- content_part = response.read(32*8192)
- if not content_part:
+ try_num = 1
+ first_attempt = True
+ current_attempt_position = 0
+ while try_num <= 3: # Try a given byte position three times
+ if not first_attempt:
+ print('(Try %d)' % try_num, 'Trying with', send_headers['Range'])
+
+ if video:
+ params = urllib.parse.parse_qs(env['QUERY_STRING'])
+ params_use_tor = int(params.get('use_tor', '0')[0])
+ use_tor = (settings.route_tor == 2) or params_use_tor
+ response, cleanup_func = util.fetch_url_response(url, send_headers,
+ use_tor=use_tor,
+ max_redirects=10)
+ else:
+ response, cleanup_func = util.fetch_url_response(url, send_headers)
+
+ response_headers = response.getheaders()
+ if isinstance(response_headers, urllib3._collections.HTTPHeaderDict):
+ response_headers = response_headers.items()
+
+ if first_attempt:
+ start_response(str(response.status) + ' ' + response.reason,
+ response_headers)
+
+ content_length = int(dict(response_headers).get('Content-Length', 0))
+ if response.status >= 400:
+ print('Error: Youtube returned "%d %s" while routing %s' % (
+ response.status, response.reason, url.split('?')[0]))
+
+ total_received = 0
+ retry = False
+ while True:
+ # a bit over 3 seconds of 360p video
+ # we want each TCP packet to transmit in large multiples,
+ # such as 65,536, so we shouldn't read in small chunks
+ # such as 8192 lest that causes the socket library to limit the
+ # TCP window size
+ # Might need fine-tuning, since this gives us 4*65536
+ # The tradeoff is that larger values (such as 6 seconds) only
+ # allows video to buffer in those increments, meaning user must
+ # wait until the entire chunk is downloaded before video starts
+ # playing
+ content_part = response.read(32*8192)
+ total_received += len(content_part)
+ if not content_part:
+ # Sometimes Youtube closes the connection before sending all of
+ # the content. Retry with a range request for the missing
+ # content. See
+ # https://github.com/user234683/youtube-local/issues/40
+ if total_received < content_length:
+ if 'Range' in send_headers:
+ int_range = parse_range(send_headers['Range'],
+ content_length)
+ if not int_range: # give up b/c unrecognized range
+ break
+ start, end = int_range
+ else:
+ start, end = 0, (content_length - 1)
+
+ fail_byte = start + total_received
+ send_headers['Range'] = 'bytes=%d-%d' % (fail_byte, end)
+ print(
+ 'Warning: Youtube closed the connection before byte',
+ str(fail_byte) + '.', 'Expected', start+content_length,
+ 'bytes.'
+ )
+
+ retry = True
+ first_attempt = False
+ if fail_byte == current_attempt_position:
+ try_num += 1
+ else:
+ try_num = 1
+ current_attempt_position = fail_byte
+ break
+ yield content_part
+ cleanup_func(response)
+ if retry:
+ # Youtube will return 503 Service Unavailable if you do a bunch
+ # of range requests too quickly.
+ time.sleep(1)
+ continue
+ else:
break
- yield content_part
-
- cleanup_func(response)
+ else: # no break
+ print('Error: Youtube closed the connection before',
+ 'providing all content. Retried three times:', url.split('?')[0])
def proxy_video(env, start_response):