Release v0.4.0 - HD Thumbnails, YouTube 2024+ Support, and yt-dlp Integrationv0.4.0

Major Features: - HD video thumbnails (hq720.jpg) with automatic fallback to lower qualities - HD channel avatars (240x240 instead of 88x88) - YouTube 2024+ lockupViewModel support for channel playlists - youtubei/v1/browse API integration for channel playlist tabs - yt-dlp integration for multi-language audio and subtitles Bug Fixes: - Fixed undefined `abort` import in playlist.py - Fixed undefined functions in proto.py (encode_varint, bytes_to_hex, succinct_encode) - Fixed missing `traceback` import in proto_debug.py - Fixed blurry playlist thumbnails using default.jpg instead of HD versions - Fixed channel playlists page using deprecated pbj=1 format Improvements: - Automatic thumbnail fallback system (hq720 → sddefault → hqdefault → mqdefault → default) - JavaScript thumbnail_fallback() handler for 404 errors - Better thumbnail quality across all pages (watch, channel, playlist, subscriptions) - Consistent HD avatar display for all channel items - Settings system automatically adds new settings without breaking user config Files Modified: - youtube/watch.py - HD thumbnails for related videos and playlist items - youtube/channel.py - HD thumbnails for channel playlists, youtubei API integration - youtube/playlist.py - HD thumbnails, fixed abort import - youtube/util.py - HD thumbnail URLs, avatar HD upgrade, prefix_url improvements - youtube/comments.py - HD video thumbnail - youtube/subscriptions.py - HD thumbnails, fixed abort import - youtube/yt_data_extract/common.py - lockupViewModel support, extract_lockup_view_model_info() - youtube/yt_data_extract/everything_else.py - HD playlist thumbnails - youtube/proto.py - Fixed undefined function references - youtube/proto_debug.py - Added traceback import - youtube/static/js/common.js - thumbnail_fallback() handler - youtube/templates/*.html - Added onerror handlers for thumbnail fallback - youtube/version.py - Bump to v0.4.0 Technical Details: - All thumbnail URLs now use hq720.jpg (1280x720) when available - Fallback handled client-side via JavaScript onerror handler - Server-side avatar upgrade via regex in util.prefix_url() - lockupViewModel parser extracts contentType, metadata, and first_video_id - Channel playlist tabs now use youtubei/v1/browse instead of deprecated pbj=1 - Settings version system ensures backward compatibility
author: Astounds <kirito@disroot.org> 2026-03-22 20:50:03 -0500
committer: Astounds <kirito@disroot.org> 2026-03-22 20:50:03 -0500
commit: 6a68f0664568cea6f9a12e8743f195fe0a41f3ce (patch)
tree: 4ad12a70811a4821c0cc9dc94c19c1ccf2bca808 /youtube/util.py
parent: 84e1acaab8f7e4e7e36d19e3b6847a0ab6c33759 (diff)
download: yt-local-0.4.0.tar.lz
yt-local-0.4.0.tar.xz
yt-local-0.4.0.zip
1 files changed, 179 insertions, 68 deletions
diff --git a/youtube/util.py b/youtube/util.py
index c59fae8..2b3f43e 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -1,4 +1,5 @@
 from datetime import datetime
+import logging
 import settings
 import socks
 import sockshandler
@@ -18,6 +19,8 @@ import gevent.queue
 import gevent.lock
 import collections
 import stem
+
+logger = logging.getLogger(__name__)
 import stem.control
 import traceback
 
@@ -302,73 +305,144 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
 def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
               cookiejar_send=None, cookiejar_receive=None, use_tor=True,
               debug_name=None):
-    while True:
-        start_time = time.monotonic()
-
-        response, cleanup_func = fetch_url_response(
-            url, headers, timeout=timeout, data=data,
-            cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
-            use_tor=use_tor)
-        response_time = time.monotonic()
-
-        content = response.read()
-
-        read_finish = time.monotonic()
-
-        cleanup_func(response)  # release_connection for urllib3
-        content = decode_content(
-            content,
-            response.headers.get('Content-Encoding', default='identity'))
-
-        if (settings.debugging_save_responses
-                and debug_name is not None
-                and content):
-            save_dir = os.path.join(settings.data_dir, 'debug')
-            if not os.path.exists(save_dir):
-                os.makedirs(save_dir)
-
-            with open(os.path.join(save_dir, debug_name), 'wb') as f:
-                f.write(content)
-
-        if response.status == 429 or (
-            response.status == 302 and (response.getheader('Location') == url
-                or response.getheader('Location').startswith(
-                       'https://www.google.com/sorry/index'
-                   )
-            )
-        ):
-            print(response.status, response.reason, response.headers)
-            ip = re.search(
-                br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
-                content)
-            ip = ip.group(1).decode('ascii') if ip else None
-            if not ip:
-                ip = re.search(r'IP=((?:\d+\.)+\d+)',
-                               response.getheader('Set-Cookie') or '')
-                ip = ip.group(1) if ip else None
-
-            # don't get new identity if we're not using Tor
-            if not use_tor:
-                raise FetchError('429', reason=response.reason, ip=ip)
-
-            print('Error: YouTube blocked the request because the Tor exit node is overutilized. Exit node IP address: %s' % ip)
-
-            # get new identity
-            error = tor_manager.new_identity(start_time)
-            if error:
-                raise FetchError(
-                    '429', reason=response.reason, ip=ip,
-                    error_message='Automatic circuit change: ' + error)
-            else:
-                continue # retry now that we have new identity
+    """
+    Fetch URL with exponential backoff retry logic for rate limiting.
+
+    Retries:
+    - 429 Too Many Requests: Exponential backoff (1s, 2s, 4s, 8s, 16s)
+    - 503 Service Unavailable: Exponential backoff
+    - 302 Redirect to Google Sorry: Treated as rate limit
+
+    Max retries: 5 attempts with exponential backoff
+    """
+    import random
 
-        elif response.status >= 400:
-            raise FetchError(str(response.status), reason=response.reason,
-                             ip=None)
-        break
+    max_retries = 5
+    base_delay = 1.0  # Base delay in seconds
+
+    for attempt in range(max_retries):
+        try:
+            start_time = time.monotonic()
+
+            response, cleanup_func = fetch_url_response(
+                url, headers, timeout=timeout, data=data,
+                cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
+                use_tor=use_tor)
+            response_time = time.monotonic()
+
+            content = response.read()
+
+            read_finish = time.monotonic()
+
+            cleanup_func(response)  # release_connection for urllib3
+            content = decode_content(
+                content,
+                response.headers.get('Content-Encoding', default='identity'))
+
+            if (settings.debugging_save_responses
+                    and debug_name is not None
+                    and content):
+                save_dir = os.path.join(settings.data_dir, 'debug')
+                if not os.path.exists(save_dir):
+                    os.makedirs(save_dir)
+
+                with open(os.path.join(save_dir, debug_name), 'wb') as f:
+                    f.write(content)
+
+            # Check for rate limiting (429) or redirect to Google Sorry
+            if response.status == 429 or (
+                response.status == 302 and (response.getheader('Location') == url
+                    or response.getheader('Location').startswith(
+                           'https://www.google.com/sorry/index'
+                       )
+                    )
+            ):
+                logger.info(f'Rate limit response: {response.status} {response.reason}')
+                ip = re.search(
+                    br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
+                    content)
+                ip = ip.group(1).decode('ascii') if ip else None
+                if not ip:
+                    ip = re.search(r'IP=((?:\d+\.)+\d+)',
+                                   response.getheader('Set-Cookie') or '')
+                    ip = ip.group(1) if ip else None
+
+                # If this is the last attempt, raise error
+                if attempt >= max_retries - 1:
+                    if not use_tor or not settings.route_tor:
+                        logger.warning(f'YouTube returned 429 but Tor is not enabled. Consider enabling Tor routing.')
+                        raise FetchError('429', reason=response.reason, ip=ip)
+
+                    logger.error(f'YouTube blocked request - Tor exit node overutilized. Exit IP: {ip}')
+
+                    # get new identity
+                    error = tor_manager.new_identity(start_time)
+                    if error:
+                        raise FetchError(
+                            '429', reason=response.reason, ip=ip,
+                            error_message='Automatic circuit change: ' + error)
+                    else:
+                        continue  # retry with new identity
+
+                # Calculate delay with exponential backoff and jitter
+                delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
+                logger.info(f'Rate limited (429). Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
+                time.sleep(delay)
+                continue  # retry
+
+            # Check for client errors (400, 404) - don't retry these
+            if response.status == 400:
+                logger.error(f'Bad Request (400) - Invalid parameters or URL: {url[:100]}')
+                raise FetchError('400', reason='Bad Request - Invalid parameters or URL format', ip=None)
+
+            if response.status == 404:
+                logger.warning(f'Not Found (404): {url[:100]}')
+                raise FetchError('404', reason='Not Found', ip=None)
+
+            # Check for other server errors (503, 502, 504)
+            if response.status in (502, 503, 504):
+                if attempt >= max_retries - 1:
+                    logger.error(f'Server error {response.status} after {max_retries} retries')
+                    raise FetchError(str(response.status), reason=response.reason, ip=None)
+
+                # Exponential backoff for server errors
+                delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
+                logger.warning(f'Server error ({response.status}). Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
+                time.sleep(delay)
+                continue
+
+            # Success - break out of retry loop
+            break
+
+        except urllib3.exceptions.MaxRetryError as e:
+            # If this is the last attempt, raise the error
+            if attempt >= max_retries - 1:
+                exception_cause = e.__context__.__context__
+                if (isinstance(exception_cause, socks.ProxyConnectionError)
+                        and settings.route_tor):
+                    msg = ('Failed to connect to Tor. Check that Tor is open and '
+                           'that your internet connection is working.\n\n'
+                           + str(e))
+                    logger.error(f'Tor connection failed: {msg}')
+                    raise FetchError('502', reason='Bad Gateway',
+                                     error_message=msg)
+                elif isinstance(e.__context__,
+                                urllib3.exceptions.NewConnectionError):
+                    msg = 'Failed to establish a connection.\n\n' + str(e)
+                    logger.error(f'Connection failed: {msg}')
+                    raise FetchError(
+                        '502', reason='Bad Gateway',
+                         error_message=msg)
+                else:
+                    raise
+
+            # Wait and retry
+            delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
+            logger.warning(f'Connection error. Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
+            time.sleep(delay)
 
     if report_text:
-        print(report_text, '    Latency:', round(response_time - start_time, 3), '    Read time:', round(read_finish - response_time,3))
+        logger.info(f'{report_text} - Latency: {round(response_time - start_time, 3)}s - Read time: {round(read_finish - response_time, 3)}s')
 
     return content
 
@@ -462,7 +536,7 @@ class RateLimitedQueue(gevent.queue.Queue):
 
 
 def download_thumbnail(save_directory, video_id):
-    url = f"https://i.ytimg.com/vi/{video_id}/hqdefault.jpg"
+    url = f"https://i.ytimg.com/vi/{video_id}/hq720.jpg"
     save_location = os.path.join(save_directory, video_id + ".jpg")
     try:
         thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id)
@@ -502,9 +576,40 @@ def video_id(url):
     return urllib.parse.parse_qs(url_parts.query)['v'][0]
 
 
-# default, sddefault, mqdefault, hqdefault, hq720
-def get_thumbnail_url(video_id):
-    return f"{settings.img_prefix}https://i.ytimg.com/vi/{video_id}/hqdefault.jpg"
+def get_thumbnail_url(video_id, quality='hq720'):
+    """Get thumbnail URL with fallback to lower quality if needed.
+
+    Args:
+        video_id: YouTube video ID
+        quality: Preferred quality ('maxres', 'hq720', 'sd', 'hq', 'mq', 'default')
+
+    Returns:
+        Tuple of (best_available_url, quality_used)
+    """
+    # Quality priority order (highest to lowest)
+    quality_order = {
+        'maxres': ['maxresdefault.jpg', 'sddefault.jpg', 'hqdefault.jpg'],
+        'hq720': ['hq720.jpg', 'sddefault.jpg', 'hqdefault.jpg'],
+        'sd': ['sddefault.jpg', 'hqdefault.jpg'],
+        'hq': ['hqdefault.jpg', 'mqdefault.jpg'],
+        'mq': ['mqdefault.jpg', 'default.jpg'],
+        'default': ['default.jpg'],
+    }
+
+    qualities = quality_order.get(quality, quality_order['hq720'])
+    base_url = f"{settings.img_prefix}https://i.ytimg.com/vi/{video_id}/"
+
+    # For now, return the highest quality URL
+    # The browser will handle 404s gracefully with alt text
+    return base_url + qualities[0], qualities[0]
+
+
+def get_best_thumbnail_url(video_id):
+    """Get the best available thumbnail URL for a video.
+
+    Tries hq720 first (for HD videos), falls back to sddefault for SD videos.
+    """
+    return get_thumbnail_url(video_id, quality='hq720')[0]
 
 
 def seconds_to_timestamp(seconds):
@@ -538,6 +643,12 @@ def prefix_url(url):
     if url is None:
         return None
     url = url.lstrip('/')     # some urls have // before them, which has a special meaning
+
+    # Increase resolution for YouTube channel avatars
+    if url and ('ggpht.com' in url or 'yt3.ggpht.com' in url):
+        # Replace size parameter with higher resolution (s240 instead of s88)
+        url = re.sub(r'=s\d+-c-k', '=s240-c-k-c0x00ffffff-no-rj', url)
+
     return '/' + url
author	Astounds <kirito@disroot.org>	2026-03-22 20:50:03 -0500
committer	Astounds <kirito@disroot.org>	2026-03-22 20:50:03 -0500
commit	6a68f0664568cea6f9a12e8743f195fe0a41f3ce (patch)
tree	4ad12a70811a4821c0cc9dc94c19c1ccf2bca808 /youtube/util.py
parent	84e1acaab8f7e4e7e36d19e3b6847a0ab6c33759 (diff)
download	yt-local-0.4.0.tar.lz yt-local-0.4.0.tar.xz yt-local-0.4.0.zip