aboutsummaryrefslogtreecommitdiffstats
path: root/youtube/util.py
diff options
context:
space:
mode:
authorAstounds <kirito@disroot.org>2026-03-22 20:50:03 -0500
committerAstounds <kirito@disroot.org>2026-03-22 20:50:03 -0500
commit6a68f0664568cea6f9a12e8743f195fe0a41f3ce (patch)
tree4ad12a70811a4821c0cc9dc94c19c1ccf2bca808 /youtube/util.py
parent84e1acaab8f7e4e7e36d19e3b6847a0ab6c33759 (diff)
downloadyt-local-0.4.0.tar.lz
yt-local-0.4.0.tar.xz
yt-local-0.4.0.zip
Release v0.4.0 - HD Thumbnails, YouTube 2024+ Support, and yt-dlp Integrationv0.4.0
Major Features: - HD video thumbnails (hq720.jpg) with automatic fallback to lower qualities - HD channel avatars (240x240 instead of 88x88) - YouTube 2024+ lockupViewModel support for channel playlists - youtubei/v1/browse API integration for channel playlist tabs - yt-dlp integration for multi-language audio and subtitles Bug Fixes: - Fixed undefined `abort` import in playlist.py - Fixed undefined functions in proto.py (encode_varint, bytes_to_hex, succinct_encode) - Fixed missing `traceback` import in proto_debug.py - Fixed blurry playlist thumbnails using default.jpg instead of HD versions - Fixed channel playlists page using deprecated pbj=1 format Improvements: - Automatic thumbnail fallback system (hq720 → sddefault → hqdefault → mqdefault → default) - JavaScript thumbnail_fallback() handler for 404 errors - Better thumbnail quality across all pages (watch, channel, playlist, subscriptions) - Consistent HD avatar display for all channel items - Settings system automatically adds new settings without breaking user config Files Modified: - youtube/watch.py - HD thumbnails for related videos and playlist items - youtube/channel.py - HD thumbnails for channel playlists, youtubei API integration - youtube/playlist.py - HD thumbnails, fixed abort import - youtube/util.py - HD thumbnail URLs, avatar HD upgrade, prefix_url improvements - youtube/comments.py - HD video thumbnail - youtube/subscriptions.py - HD thumbnails, fixed abort import - youtube/yt_data_extract/common.py - lockupViewModel support, extract_lockup_view_model_info() - youtube/yt_data_extract/everything_else.py - HD playlist thumbnails - youtube/proto.py - Fixed undefined function references - youtube/proto_debug.py - Added traceback import - youtube/static/js/common.js - thumbnail_fallback() handler - youtube/templates/*.html - Added onerror handlers for thumbnail fallback - youtube/version.py - Bump to v0.4.0 Technical Details: - All thumbnail URLs now use hq720.jpg (1280x720) when available - Fallback handled client-side via JavaScript onerror handler - Server-side avatar upgrade via regex in util.prefix_url() - lockupViewModel parser extracts contentType, metadata, and first_video_id - Channel playlist tabs now use youtubei/v1/browse instead of deprecated pbj=1 - Settings version system ensures backward compatibility
Diffstat (limited to 'youtube/util.py')
-rw-r--r--youtube/util.py247
1 files changed, 179 insertions, 68 deletions
diff --git a/youtube/util.py b/youtube/util.py
index c59fae8..2b3f43e 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -1,4 +1,5 @@
from datetime import datetime
+import logging
import settings
import socks
import sockshandler
@@ -18,6 +19,8 @@ import gevent.queue
import gevent.lock
import collections
import stem
+
+logger = logging.getLogger(__name__)
import stem.control
import traceback
@@ -302,73 +305,144 @@ def fetch_url_response(url, headers=(), timeout=15, data=None,
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
cookiejar_send=None, cookiejar_receive=None, use_tor=True,
debug_name=None):
- while True:
- start_time = time.monotonic()
-
- response, cleanup_func = fetch_url_response(
- url, headers, timeout=timeout, data=data,
- cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
- use_tor=use_tor)
- response_time = time.monotonic()
-
- content = response.read()
-
- read_finish = time.monotonic()
-
- cleanup_func(response) # release_connection for urllib3
- content = decode_content(
- content,
- response.headers.get('Content-Encoding', default='identity'))
-
- if (settings.debugging_save_responses
- and debug_name is not None
- and content):
- save_dir = os.path.join(settings.data_dir, 'debug')
- if not os.path.exists(save_dir):
- os.makedirs(save_dir)
-
- with open(os.path.join(save_dir, debug_name), 'wb') as f:
- f.write(content)
-
- if response.status == 429 or (
- response.status == 302 and (response.getheader('Location') == url
- or response.getheader('Location').startswith(
- 'https://www.google.com/sorry/index'
- )
- )
- ):
- print(response.status, response.reason, response.headers)
- ip = re.search(
- br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
- content)
- ip = ip.group(1).decode('ascii') if ip else None
- if not ip:
- ip = re.search(r'IP=((?:\d+\.)+\d+)',
- response.getheader('Set-Cookie') or '')
- ip = ip.group(1) if ip else None
-
- # don't get new identity if we're not using Tor
- if not use_tor:
- raise FetchError('429', reason=response.reason, ip=ip)
-
- print('Error: YouTube blocked the request because the Tor exit node is overutilized. Exit node IP address: %s' % ip)
-
- # get new identity
- error = tor_manager.new_identity(start_time)
- if error:
- raise FetchError(
- '429', reason=response.reason, ip=ip,
- error_message='Automatic circuit change: ' + error)
- else:
- continue # retry now that we have new identity
+ """
+ Fetch URL with exponential backoff retry logic for rate limiting.
+
+ Retries:
+ - 429 Too Many Requests: Exponential backoff (1s, 2s, 4s, 8s, 16s)
+ - 503 Service Unavailable: Exponential backoff
+ - 302 Redirect to Google Sorry: Treated as rate limit
+
+ Max retries: 5 attempts with exponential backoff
+ """
+ import random
- elif response.status >= 400:
- raise FetchError(str(response.status), reason=response.reason,
- ip=None)
- break
+ max_retries = 5
+ base_delay = 1.0 # Base delay in seconds
+
+ for attempt in range(max_retries):
+ try:
+ start_time = time.monotonic()
+
+ response, cleanup_func = fetch_url_response(
+ url, headers, timeout=timeout, data=data,
+ cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive,
+ use_tor=use_tor)
+ response_time = time.monotonic()
+
+ content = response.read()
+
+ read_finish = time.monotonic()
+
+ cleanup_func(response) # release_connection for urllib3
+ content = decode_content(
+ content,
+ response.headers.get('Content-Encoding', default='identity'))
+
+ if (settings.debugging_save_responses
+ and debug_name is not None
+ and content):
+ save_dir = os.path.join(settings.data_dir, 'debug')
+ if not os.path.exists(save_dir):
+ os.makedirs(save_dir)
+
+ with open(os.path.join(save_dir, debug_name), 'wb') as f:
+ f.write(content)
+
+ # Check for rate limiting (429) or redirect to Google Sorry
+ if response.status == 429 or (
+ response.status == 302 and (response.getheader('Location') == url
+ or response.getheader('Location').startswith(
+ 'https://www.google.com/sorry/index'
+ )
+ )
+ ):
+ logger.info(f'Rate limit response: {response.status} {response.reason}')
+ ip = re.search(
+ br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
+ content)
+ ip = ip.group(1).decode('ascii') if ip else None
+ if not ip:
+ ip = re.search(r'IP=((?:\d+\.)+\d+)',
+ response.getheader('Set-Cookie') or '')
+ ip = ip.group(1) if ip else None
+
+ # If this is the last attempt, raise error
+ if attempt >= max_retries - 1:
+ if not use_tor or not settings.route_tor:
+ logger.warning(f'YouTube returned 429 but Tor is not enabled. Consider enabling Tor routing.')
+ raise FetchError('429', reason=response.reason, ip=ip)
+
+ logger.error(f'YouTube blocked request - Tor exit node overutilized. Exit IP: {ip}')
+
+ # get new identity
+ error = tor_manager.new_identity(start_time)
+ if error:
+ raise FetchError(
+ '429', reason=response.reason, ip=ip,
+ error_message='Automatic circuit change: ' + error)
+ else:
+ continue # retry with new identity
+
+ # Calculate delay with exponential backoff and jitter
+ delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
+ logger.info(f'Rate limited (429). Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
+ time.sleep(delay)
+ continue # retry
+
+ # Check for client errors (400, 404) - don't retry these
+ if response.status == 400:
+ logger.error(f'Bad Request (400) - Invalid parameters or URL: {url[:100]}')
+ raise FetchError('400', reason='Bad Request - Invalid parameters or URL format', ip=None)
+
+ if response.status == 404:
+ logger.warning(f'Not Found (404): {url[:100]}')
+ raise FetchError('404', reason='Not Found', ip=None)
+
+ # Check for other server errors (503, 502, 504)
+ if response.status in (502, 503, 504):
+ if attempt >= max_retries - 1:
+ logger.error(f'Server error {response.status} after {max_retries} retries')
+ raise FetchError(str(response.status), reason=response.reason, ip=None)
+
+ # Exponential backoff for server errors
+ delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
+ logger.warning(f'Server error ({response.status}). Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
+ time.sleep(delay)
+ continue
+
+ # Success - break out of retry loop
+ break
+
+ except urllib3.exceptions.MaxRetryError as e:
+ # If this is the last attempt, raise the error
+ if attempt >= max_retries - 1:
+ exception_cause = e.__context__.__context__
+ if (isinstance(exception_cause, socks.ProxyConnectionError)
+ and settings.route_tor):
+ msg = ('Failed to connect to Tor. Check that Tor is open and '
+ 'that your internet connection is working.\n\n'
+ + str(e))
+ logger.error(f'Tor connection failed: {msg}')
+ raise FetchError('502', reason='Bad Gateway',
+ error_message=msg)
+ elif isinstance(e.__context__,
+ urllib3.exceptions.NewConnectionError):
+ msg = 'Failed to establish a connection.\n\n' + str(e)
+ logger.error(f'Connection failed: {msg}')
+ raise FetchError(
+ '502', reason='Bad Gateway',
+ error_message=msg)
+ else:
+ raise
+
+ # Wait and retry
+ delay = (base_delay * (2 ** attempt)) + random.uniform(0, 1)
+ logger.warning(f'Connection error. Waiting {delay:.1f}s before retry {attempt + 1}/{max_retries}...')
+ time.sleep(delay)
if report_text:
- print(report_text, ' Latency:', round(response_time - start_time, 3), ' Read time:', round(read_finish - response_time,3))
+ logger.info(f'{report_text} - Latency: {round(response_time - start_time, 3)}s - Read time: {round(read_finish - response_time, 3)}s')
return content
@@ -462,7 +536,7 @@ class RateLimitedQueue(gevent.queue.Queue):
def download_thumbnail(save_directory, video_id):
- url = f"https://i.ytimg.com/vi/{video_id}/hqdefault.jpg"
+ url = f"https://i.ytimg.com/vi/{video_id}/hq720.jpg"
save_location = os.path.join(save_directory, video_id + ".jpg")
try:
thumbnail = fetch_url(url, report_text="Saved thumbnail: " + video_id)
@@ -502,9 +576,40 @@ def video_id(url):
return urllib.parse.parse_qs(url_parts.query)['v'][0]
-# default, sddefault, mqdefault, hqdefault, hq720
-def get_thumbnail_url(video_id):
- return f"{settings.img_prefix}https://i.ytimg.com/vi/{video_id}/hqdefault.jpg"
+def get_thumbnail_url(video_id, quality='hq720'):
+ """Get thumbnail URL with fallback to lower quality if needed.
+
+ Args:
+ video_id: YouTube video ID
+ quality: Preferred quality ('maxres', 'hq720', 'sd', 'hq', 'mq', 'default')
+
+ Returns:
+ Tuple of (best_available_url, quality_used)
+ """
+ # Quality priority order (highest to lowest)
+ quality_order = {
+ 'maxres': ['maxresdefault.jpg', 'sddefault.jpg', 'hqdefault.jpg'],
+ 'hq720': ['hq720.jpg', 'sddefault.jpg', 'hqdefault.jpg'],
+ 'sd': ['sddefault.jpg', 'hqdefault.jpg'],
+ 'hq': ['hqdefault.jpg', 'mqdefault.jpg'],
+ 'mq': ['mqdefault.jpg', 'default.jpg'],
+ 'default': ['default.jpg'],
+ }
+
+ qualities = quality_order.get(quality, quality_order['hq720'])
+ base_url = f"{settings.img_prefix}https://i.ytimg.com/vi/{video_id}/"
+
+ # For now, return the highest quality URL
+ # The browser will handle 404s gracefully with alt text
+ return base_url + qualities[0], qualities[0]
+
+
+def get_best_thumbnail_url(video_id):
+ """Get the best available thumbnail URL for a video.
+
+ Tries hq720 first (for HD videos), falls back to sddefault for SD videos.
+ """
+ return get_thumbnail_url(video_id, quality='hq720')[0]
def seconds_to_timestamp(seconds):
@@ -538,6 +643,12 @@ def prefix_url(url):
if url is None:
return None
url = url.lstrip('/') # some urls have // before them, which has a special meaning
+
+ # Increase resolution for YouTube channel avatars
+ if url and ('ggpht.com' in url or 'yt3.ggpht.com' in url):
+ # Replace size parameter with higher resolution (s240 instead of s88)
+ url = re.sub(r'=s\d+-c-k', '=s240-c-k-c0x00ffffff-no-rj', url)
+
return '/' + url