aboutsummaryrefslogtreecommitdiffstats
path: root/youtube/yt_data_extract
diff options
context:
space:
mode:
authorAstounds <kirito@disroot.org>2026-03-22 20:50:03 -0500
committerAstounds <kirito@disroot.org>2026-03-22 20:50:03 -0500
commit6a68f0664568cea6f9a12e8743f195fe0a41f3ce (patch)
tree4ad12a70811a4821c0cc9dc94c19c1ccf2bca808 /youtube/yt_data_extract
parent84e1acaab8f7e4e7e36d19e3b6847a0ab6c33759 (diff)
downloadyt-local-0.4.0.tar.lz
yt-local-0.4.0.tar.xz
yt-local-0.4.0.zip
Release v0.4.0 - HD Thumbnails, YouTube 2024+ Support, and yt-dlp Integrationv0.4.0
Major Features: - HD video thumbnails (hq720.jpg) with automatic fallback to lower qualities - HD channel avatars (240x240 instead of 88x88) - YouTube 2024+ lockupViewModel support for channel playlists - youtubei/v1/browse API integration for channel playlist tabs - yt-dlp integration for multi-language audio and subtitles Bug Fixes: - Fixed undefined `abort` import in playlist.py - Fixed undefined functions in proto.py (encode_varint, bytes_to_hex, succinct_encode) - Fixed missing `traceback` import in proto_debug.py - Fixed blurry playlist thumbnails using default.jpg instead of HD versions - Fixed channel playlists page using deprecated pbj=1 format Improvements: - Automatic thumbnail fallback system (hq720 → sddefault → hqdefault → mqdefault → default) - JavaScript thumbnail_fallback() handler for 404 errors - Better thumbnail quality across all pages (watch, channel, playlist, subscriptions) - Consistent HD avatar display for all channel items - Settings system automatically adds new settings without breaking user config Files Modified: - youtube/watch.py - HD thumbnails for related videos and playlist items - youtube/channel.py - HD thumbnails for channel playlists, youtubei API integration - youtube/playlist.py - HD thumbnails, fixed abort import - youtube/util.py - HD thumbnail URLs, avatar HD upgrade, prefix_url improvements - youtube/comments.py - HD video thumbnail - youtube/subscriptions.py - HD thumbnails, fixed abort import - youtube/yt_data_extract/common.py - lockupViewModel support, extract_lockup_view_model_info() - youtube/yt_data_extract/everything_else.py - HD playlist thumbnails - youtube/proto.py - Fixed undefined function references - youtube/proto_debug.py - Added traceback import - youtube/static/js/common.js - thumbnail_fallback() handler - youtube/templates/*.html - Added onerror handlers for thumbnail fallback - youtube/version.py - Bump to v0.4.0 Technical Details: - All thumbnail URLs now use hq720.jpg (1280x720) when available - Fallback handled client-side via JavaScript onerror handler - Server-side avatar upgrade via regex in util.prefix_url() - lockupViewModel parser extracts contentType, metadata, and first_video_id - Channel playlist tabs now use youtubei/v1/browse instead of deprecated pbj=1 - Settings version system ensures backward compatibility
Diffstat (limited to 'youtube/yt_data_extract')
-rw-r--r--youtube/yt_data_extract/common.py90
-rw-r--r--youtube/yt_data_extract/everything_else.py2
2 files changed, 91 insertions, 1 deletions
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py
index 7903db5..6a98280 100644
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -226,6 +226,89 @@ def check_missing_keys(object, *key_sequences):
return None
+
+def extract_lockup_view_model_info(item, additional_info={}):
+ """Extract info from new lockupViewModel format (YouTube 2024+)"""
+ info = {'error': None}
+
+ content_type = item.get('contentType', '')
+ content_id = item.get('contentId', '')
+
+ # Extract title from metadata
+ metadata = item.get('metadata', {})
+ lockup_metadata = metadata.get('lockupMetadataViewModel', {})
+ title_data = lockup_metadata.get('title', {})
+ info['title'] = title_data.get('content', '')
+
+ # Determine type based on contentType
+ if 'PLAYLIST' in content_type:
+ info['type'] = 'playlist'
+ info['playlist_type'] = 'playlist'
+ info['id'] = content_id
+ info['video_count'] = None
+ info['first_video_id'] = None
+
+ # Try to get video count from metadata
+ metadata_rows = lockup_metadata.get('metadata', {})
+ for row in metadata_rows.get('contentMetadataViewModel', {}).get('metadataRows', []):
+ for part in row.get('metadataParts', []):
+ text = part.get('text', {}).get('content', '')
+ if 'video' in text.lower():
+ info['video_count'] = extract_int(text)
+ elif 'VIDEO' in content_type:
+ info['type'] = 'video'
+ info['id'] = content_id
+ info['view_count'] = None
+ info['approx_view_count'] = None
+ info['time_published'] = None
+ info['duration'] = None
+
+ # Extract duration/other info from metadata rows
+ metadata_rows = lockup_metadata.get('metadata', {})
+ for row in metadata_rows.get('contentMetadataViewModel', {}).get('metadataRows', []):
+ for part in row.get('metadataParts', []):
+ text = part.get('text', {}).get('content', '')
+ if 'view' in text.lower():
+ info['approx_view_count'] = extract_approx_int(text)
+ elif 'ago' in text.lower():
+ info['time_published'] = text
+ elif 'CHANNEL' in content_type:
+ info['type'] = 'channel'
+ info['id'] = content_id
+ info['approx_subscriber_count'] = None
+ else:
+ info['type'] = 'unsupported'
+ return info
+
+ # Extract thumbnail from contentImage
+ content_image = item.get('contentImage', {})
+ collection_thumb = content_image.get('collectionThumbnailViewModel', {})
+ primary_thumb = collection_thumb.get('primaryThumbnail', {})
+ thumb_vm = primary_thumb.get('thumbnailViewModel', {})
+ image_sources = thumb_vm.get('image', {}).get('sources', [])
+ if image_sources:
+ info['thumbnail'] = image_sources[0].get('url', '')
+ else:
+ info['thumbnail'] = ''
+
+ # Extract author info if available
+ info['author'] = None
+ info['author_id'] = None
+ info['author_url'] = None
+
+ # Try to get first video ID from inline player data
+ item_playback = item.get('itemPlayback', {})
+ inline_player = item_playback.get('inlinePlayerData', {})
+ on_select = inline_player.get('onSelect', {})
+ innertube_cmd = on_select.get('innertubeCommand', {})
+ watch_endpoint = innertube_cmd.get('watchEndpoint', {})
+ if watch_endpoint.get('videoId'):
+ info['first_video_id'] = watch_endpoint.get('videoId')
+
+ info.update(additional_info)
+ return info
+
+
def extract_item_info(item, additional_info={}):
if not item:
return {'error': 'No item given'}
@@ -243,6 +326,10 @@ def extract_item_info(item, additional_info={}):
info['type'] = 'unsupported'
return info
+ # Handle new lockupViewModel format (YouTube 2024+)
+ if type == 'lockupViewModel':
+ return extract_lockup_view_model_info(item, additional_info)
+
# type looks like e.g. 'compactVideoRenderer' or 'gridVideoRenderer'
# camelCase split, https://stackoverflow.com/a/37697078
type_parts = [s.lower() for s in re.sub(r'([A-Z][a-z]+)', r' \1', type).split()]
@@ -441,6 +528,9 @@ _item_types = {
'channelRenderer',
'compactChannelRenderer',
'gridChannelRenderer',
+
+ # New viewModel format (YouTube 2024+)
+ 'lockupViewModel',
}
def _traverse_browse_renderer(renderer):
diff --git a/youtube/yt_data_extract/everything_else.py b/youtube/yt_data_extract/everything_else.py
index 0f64649..1f5b6a2 100644
--- a/youtube/yt_data_extract/everything_else.py
+++ b/youtube/yt_data_extract/everything_else.py
@@ -229,7 +229,7 @@ def extract_playlist_metadata(polymer_json):
if metadata['first_video_id'] is None:
metadata['thumbnail'] = None
else:
- metadata['thumbnail'] = f"https://i.ytimg.com/vi/{metadata['first_video_id']}/hqdefault.jpg"
+ metadata['thumbnail'] = f"https://i.ytimg.com/vi/{metadata['first_video_id']}/hq720.jpg"
metadata['video_count'] = extract_int(header.get('numVideosText'))
metadata['description'] = extract_str(header.get('descriptionText'), default='')