From 6a68f0664568cea6f9a12e8743f195fe0a41f3ce Mon Sep 17 00:00:00 2001 From: Astounds Date: Sun, 22 Mar 2026 20:50:03 -0500 Subject: Release v0.4.0 - HD Thumbnails, YouTube 2024+ Support, and yt-dlp Integration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Major Features: - HD video thumbnails (hq720.jpg) with automatic fallback to lower qualities - HD channel avatars (240x240 instead of 88x88) - YouTube 2024+ lockupViewModel support for channel playlists - youtubei/v1/browse API integration for channel playlist tabs - yt-dlp integration for multi-language audio and subtitles Bug Fixes: - Fixed undefined `abort` import in playlist.py - Fixed undefined functions in proto.py (encode_varint, bytes_to_hex, succinct_encode) - Fixed missing `traceback` import in proto_debug.py - Fixed blurry playlist thumbnails using default.jpg instead of HD versions - Fixed channel playlists page using deprecated pbj=1 format Improvements: - Automatic thumbnail fallback system (hq720 → sddefault → hqdefault → mqdefault → default) - JavaScript thumbnail_fallback() handler for 404 errors - Better thumbnail quality across all pages (watch, channel, playlist, subscriptions) - Consistent HD avatar display for all channel items - Settings system automatically adds new settings without breaking user config Files Modified: - youtube/watch.py - HD thumbnails for related videos and playlist items - youtube/channel.py - HD thumbnails for channel playlists, youtubei API integration - youtube/playlist.py - HD thumbnails, fixed abort import - youtube/util.py - HD thumbnail URLs, avatar HD upgrade, prefix_url improvements - youtube/comments.py - HD video thumbnail - youtube/subscriptions.py - HD thumbnails, fixed abort import - youtube/yt_data_extract/common.py - lockupViewModel support, extract_lockup_view_model_info() - youtube/yt_data_extract/everything_else.py - HD playlist thumbnails - youtube/proto.py - Fixed undefined function references - youtube/proto_debug.py - Added traceback import - youtube/static/js/common.js - thumbnail_fallback() handler - youtube/templates/*.html - Added onerror handlers for thumbnail fallback - youtube/version.py - Bump to v0.4.0 Technical Details: - All thumbnail URLs now use hq720.jpg (1280x720) when available - Fallback handled client-side via JavaScript onerror handler - Server-side avatar upgrade via regex in util.prefix_url() - lockupViewModel parser extracts contentType, metadata, and first_video_id - Channel playlist tabs now use youtubei/v1/browse instead of deprecated pbj=1 - Settings version system ensures backward compatibility --- youtube/util.py | 247 ++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 179 insertions(+), 68 deletions(-) (limited to 'youtube/util.py') diff --git a/youtube/util.py b/youtube/util.py index c59fae8..2b3f43e 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -1,4 +1,5 @@ from datetime import datetime +import logging import settings import socks import sockshandler @@ -18,6 +19,8 @@ import gevent.queue import gevent.lock import collections import stem + +logger = logging.getLogger(__name__) import stem.control import traceback @@ -302,73 +305,144 @@ def fetch_url_response(url, headers=(), timeout=15, data=None, def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, debug_name=None): - while True: - start_time = time.monotonic() - - response, cleanup_func = fetch_url_response( - url, headers, timeout=timeout, data=data, - cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive, - use_tor=use_tor) - response_time = time.monotonic() - - content = response.read() - - read_finish = time.monotonic() - - cleanup_func(response) # release_connection for urllib3 - content = decode_content( - content, - response.headers.get('Content-Encoding', default='identity')) - - if (settings.debugging_save_responses - and debug_name is not None - and content): - save_dir = os.path.join(settings.data_dir, 'debug') - if not os.path.exists(save_dir): - os.makedirs(save_dir) - - with open(os.path.join(save_dir, debug_name), 'wb') as f: - f.write(content) - - if response.status == 429 or ( - response.status == 302 and (response.getheader('Location') == url - or response.getheader('Location').startswith( - 'https://www.google.com/sorry/index' - ) - ) - ): - print(response.status, response.reason, response.headers) - ip = re.search( - br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)', - content) - ip = ip.group(1).decode('ascii') if ip else None - if not ip: - ip = re.search(r'IP=((?:\d+\.)+\d+)', - response.getheader('Set-Cookie') or '') - ip = ip.group(1) if ip else None - - # don't get new identity if we're not using Tor - if not use_tor: - raise FetchError('429', reason=response.reason, ip=ip) - - print('Error: YouTube blocked the request because the Tor exit node is overutilized. Exit node IP address: %s' % ip) - - # get new identity - error = tor_manager.new_identity(start_time) - if error: - raise FetchError( - '429', reason=response.reason, ip=ip, - error_message='Automatic circuit change: ' + error) - else: - continue # retry now that we have new identity + """ + Fetch URL with exponential backoff retry logic for rate limiting. + + Retries: + - 429 Too Many Requests: Exponential backoff (1s, 2s, 4s, 8s, 16s) + - 503 Service Unavailable: Exponential backoff + - 302 Redirect to Google Sorry: Treated as rate limit + + Max retries: 5 attempts with exponential backoff + """ + import random - elif response.status >= 400: - raise FetchError(str(response.status), reason=response.reason, - ip=None) - break + max_retries = 5 + base_delay = 1.0 # Base delay in seconds + + for attempt in range(max_retries): + try: + start_time = time.monotonic() + + response, cleanup_func = fetch_url_response( + url, headers, timeout=timeout, data=data, + cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive, + use_tor=use_tor) + response_time = time.monotonic() + + content = response.read() + + read_finish = time.monotonic() + + cleanup_func(response) # release_connection for urllib3 + content = decode_content( + content, + response.headers.get('Content-Encoding', default='identity')) + + if (settings.debugging_save_responses + and debug_name is not None + and content): + save_dir = os.path.join(settings.data_dir, 'debug') + if not os.path.exists(save_dir): + os.makedirs(save_dir) + + with open(os.path.join(save_dir, debug_name), 'wb') as f: + f.write(content) + + # Check for rate limiting (429) or redirect to Google Sorry + if response.status == 429 or ( + response.status == 302 and (response.getheader('Location') == url + or response.getheader('Location').startswith( + 'https://www.google.com/sorry/index' + ) + ) + ): + logger.info(f'Rate limit response: {response.status} {response.reason}') + ip = re.search( + br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)', + content) + ip = ip.group(1).decode('ascii') if ip else None + if not ip: + ip = re.search(r'IP=((?:\d+\.)+\d+)', + response.getheader('Set-Cookie') or '') + ip = ip.group(1) if ip else None + + # If this is the last attempt, raise error + if attempt >= max_retries - 1: + if not use_tor or not settings.route_tor: + logger.warning(f'YouTube returned 429 but Tor is not enabled. Consider enabling Tor routing.') + raise FetchError('429', reason=response.reason, ip=ip) + + logger.error(f'YouTube blocked request - Tor exit node overutilized. Exit IP: {ip}') + + # get new identity + error = tor_manager.new_identity(start_time) + if error: + raise FetchError( + '429', reason=response.reason, ip=ip, + error_message='Automatic circuit change: ' + error) + else: + continue # retry with new identity + + # Calculate delay with exponential backoff and jitter + delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1) + logger.info(f'Rate limited (429). Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...') + time.sleep(delay) + continue # retry + + # Check for client errors (400, 404) - don't retry these + if response.status == 400: + logger.error(f'Bad Request (400) - Invalid parameters or URL: {url[:100]}') + raise FetchError('400', reason='Bad Request - Invalid parameters or URL format', ip=None) + + if response.status == 404: + logger.warning(f'Not Found (404): {url[:100]}') + raise FetchError('404', reason='Not Found', ip=None) + + # Check for other server errors (503, 502, 504) + if response.status in (502, 503, 504): + if attempt >= max_retries - 1: + logger.error(f'Server error {response.status} after {max_retries} retries') + raise FetchError(str(response.status), reason=response.reason, ip=None) + + # Exponential backoff for server errors + delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1) + logger.warning(f'Server error ({response.status}). Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...') + time.sleep(delay) + continue + + # Success - break out of retry loop + break + + except urllib3.exceptions.MaxRetryError as e: + # If this is the last attempt, raise the error + if attempt >= max_retries - 1: + exception_cause = e.__context__.__context__ + if (isinstance(exception_cause, socks.ProxyConnectionError) + and settings.route_tor): + msg = ('Failed to connect to Tor. Check that Tor is open and ' + 'that your internet connection is working.\n\n' + + str(e)) + logger.error(f'Tor connection failed: {msg}') + raise FetchError('502', reason='Bad Gateway', + error_message=msg) + elif isinstance(e.__context__, + urllib3.exceptions.NewConnectionError): + msg = 'Failed to establish a connection.\n\n' + str(e) + logger.error(f'Connection failed: {msg}') + raise FetchError( + '502', reason='Bad Gateway', + error_message=msg) + else: + raise + + # Wait and retry + delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1) + logger.warning(f'Connection error. Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...') + time.sleep(delay) if report_text: - print(report_text, ' Latency:', round(response_time - start_time, 3), ' Read time:', round(read_finish - response_time,3)) + logger.info(f'{report_text} - Latency: {round(response_time - start_time, 3)}s - Read time: {round(read_finish - response_time, 3)}s') return content @@ -462,7 +536,7 @@ class RateLimitedQueue(gevent.queue.Queue): def download_thumbnail(save_directory, video_id): - url = f"https://i.ytimg.com/vi/{video_id}/hqdefault.jpg" + url = f"https://i.ytimg.com/vi/{video_id}/hq720.jpg" save_location = os.path.join(save_directory, video_id + ".jpg") try: thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id) @@ -502,9 +576,40 @@ def video_id(url): return urllib.parse.parse_qs(url_parts.query)['v'][0] -# default, sddefault, mqdefault, hqdefault, hq720 -def get_thumbnail_url(video_id): - return f"{settings.img_prefix}https://i.ytimg.com/vi/{video_id}/hqdefault.jpg" +def get_thumbnail_url(video_id, quality='hq720'): + """Get thumbnail URL with fallback to lower quality if needed. + + Args: + video_id: YouTube video ID + quality: Preferred quality ('maxres', 'hq720', 'sd', 'hq', 'mq', 'default') + + Returns: + Tuple of (best_available_url, quality_used) + """ + # Quality priority order (highest to lowest) + quality_order = { + 'maxres': ['maxresdefault.jpg', 'sddefault.jpg', 'hqdefault.jpg'], + 'hq720': ['hq720.jpg', 'sddefault.jpg', 'hqdefault.jpg'], + 'sd': ['sddefault.jpg', 'hqdefault.jpg'], + 'hq': ['hqdefault.jpg', 'mqdefault.jpg'], + 'mq': ['mqdefault.jpg', 'default.jpg'], + 'default': ['default.jpg'], + } + + qualities = quality_order.get(quality, quality_order['hq720']) + base_url = f"{settings.img_prefix}https://i.ytimg.com/vi/{video_id}/" + + # For now, return the highest quality URL + # The browser will handle 404s gracefully with alt text + return base_url + qualities[0], qualities[0] + + +def get_best_thumbnail_url(video_id): + """Get the best available thumbnail URL for a video. + + Tries hq720 first (for HD videos), falls back to sddefault for SD videos. + """ + return get_thumbnail_url(video_id, quality='hq720')[0] def seconds_to_timestamp(seconds): @@ -538,6 +643,12 @@ def prefix_url(url): if url is None: return None url = url.lstrip('/') # some urls have // before them, which has a special meaning + + # Increase resolution for YouTube channel avatars + if url and ('ggpht.com' in url or 'yt3.ggpht.com' in url): + # Replace size parameter with higher resolution (s240 instead of s88) + url = re.sub(r'=s\d+-c-k', '=s240-c-k-c0x00ffffff-no-rj', url) + return '/' + url -- cgit v1.2.3