From afb2aca4609d554858e4e669409f7c30afdc7e63 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Fri, 18 Dec 2020 21:17:06 -0800 Subject: video routing: Range request missing content when connection closed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit googlevideo sometimes doesn't send all video content and closes the connection. Retry with a range request for the bytes needed a maximum of three times. Fixes first type of #40 Signed-off-by: Jesús --- server.py | 142 ++++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 110 insertions(+), 32 deletions(-) diff --git a/server.py b/server.py index 6c39f21..b388218 100644 --- a/server.py +++ b/server.py @@ -19,6 +19,7 @@ import socks, sockshandler import subprocess import re import sys +import time def youtu_be(env, start_response): @@ -31,13 +32,33 @@ def youtu_be(env, start_response): yield from yt_app(env, start_response) +RANGE_RE = re.compile(r'bytes=(\d+-(?:\d+)?)') +def parse_range(range_header, content_length): + # Range header can be like bytes=200-1000 or bytes=200- + # amount_received is the length of bytes from the range that have already + # been received + match = RANGE_RE.fullmatch(range_header.strip()) + if not match: + print('Unsupported range header format:', range_header) + return None + start, end = match.group(1).split('-') + start_byte = int(start) + if not end: + end_byte = start_byte + content_length - 1 + else: + end_byte = int(end) + return start_byte, end_byte + + def proxy_site(env, start_response, video=False): - headers = { + send_headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)', 'Accept': '*/*', } + current_range_start = 0 + range_end = None if 'HTTP_RANGE' in env: - headers['Range'] = env['HTTP_RANGE'] + send_headers['Range'] = env['HTTP_RANGE'] url = "https://" + env['SERVER_NAME'] + env['PATH_INFO'] # remove /name portion @@ -46,37 +67,94 @@ def proxy_site(env, start_response, video=False): if env['QUERY_STRING']: url += '?' + env['QUERY_STRING'] - if video: - params = urllib.parse.parse_qs(env['QUERY_STRING']) - params_use_tor = int(params.get('use_tor', '0')[0]) - use_tor = (settings.route_tor == 2) or params_use_tor - response, cleanup_func = util.fetch_url_response(url, headers, - use_tor=use_tor, - max_redirects=10) - else: - response, cleanup_func = util.fetch_url_response(url, headers) - - headers = response.getheaders() - if isinstance(headers, urllib3._collections.HTTPHeaderDict): - headers = headers.items() - - start_response(str(response.status) + ' ' + response.reason, headers) - while True: - # a bit over 3 seconds of 360p video - # we want each TCP packet to transmit in large multiples, - # such as 65,536, so we shouldn't read in small chunks - # such as 8192 lest that causes the socket library to limit the - # TCP window size - # Might need fine-tuning, since this gives us 4*65536 - # The tradeoff is that larger values (such as 6 seconds) only - # allows video to buffer in those increments, meaning user must wait - # until the entire chunk is downloaded before video starts playing - content_part = response.read(32*8192) - if not content_part: + try_num = 1 + first_attempt = True + current_attempt_position = 0 + while try_num <= 3: # Try a given byte position three times + if not first_attempt: + print('(Try %d)' % try_num, 'Trying with', send_headers['Range']) + + if video: + params = urllib.parse.parse_qs(env['QUERY_STRING']) + params_use_tor = int(params.get('use_tor', '0')[0]) + use_tor = (settings.route_tor == 2) or params_use_tor + response, cleanup_func = util.fetch_url_response(url, send_headers, + use_tor=use_tor, + max_redirects=10) + else: + response, cleanup_func = util.fetch_url_response(url, send_headers) + + response_headers = response.getheaders() + if isinstance(response_headers, urllib3._collections.HTTPHeaderDict): + response_headers = response_headers.items() + + if first_attempt: + start_response(str(response.status) + ' ' + response.reason, + response_headers) + + content_length = int(dict(response_headers).get('Content-Length', 0)) + if response.status >= 400: + print('Error: Youtube returned "%d %s" while routing %s' % ( + response.status, response.reason, url.split('?')[0])) + + total_received = 0 + retry = False + while True: + # a bit over 3 seconds of 360p video + # we want each TCP packet to transmit in large multiples, + # such as 65,536, so we shouldn't read in small chunks + # such as 8192 lest that causes the socket library to limit the + # TCP window size + # Might need fine-tuning, since this gives us 4*65536 + # The tradeoff is that larger values (such as 6 seconds) only + # allows video to buffer in those increments, meaning user must + # wait until the entire chunk is downloaded before video starts + # playing + content_part = response.read(32*8192) + total_received += len(content_part) + if not content_part: + # Sometimes Youtube closes the connection before sending all of + # the content. Retry with a range request for the missing + # content. See + # https://github.com/user234683/youtube-local/issues/40 + if total_received < content_length: + if 'Range' in send_headers: + int_range = parse_range(send_headers['Range'], + content_length) + if not int_range: # give up b/c unrecognized range + break + start, end = int_range + else: + start, end = 0, (content_length - 1) + + fail_byte = start + total_received + send_headers['Range'] = 'bytes=%d-%d' % (fail_byte, end) + print( + 'Warning: Youtube closed the connection before byte', + str(fail_byte) + '.', 'Expected', start+content_length, + 'bytes.' + ) + + retry = True + first_attempt = False + if fail_byte == current_attempt_position: + try_num += 1 + else: + try_num = 1 + current_attempt_position = fail_byte + break + yield content_part + cleanup_func(response) + if retry: + # Youtube will return 503 Service Unavailable if you do a bunch + # of range requests too quickly. + time.sleep(1) + continue + else: break - yield content_part - - cleanup_func(response) + else: # no break + print('Error: Youtube closed the connection before', + 'providing all content. Retried three times:', url.split('?')[0]) def proxy_video(env, start_response): -- cgit v1.2.3