aboutsummaryrefslogtreecommitdiffstats
path: root/youtube/yt_data_extract
diff options
context:
space:
mode:
Diffstat (limited to 'youtube/yt_data_extract')
-rw-r--r--youtube/yt_data_extract/common.py90
-rw-r--r--youtube/yt_data_extract/everything_else.py2
2 files changed, 91 insertions, 1 deletions
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py
index 7903db5..6a98280 100644
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -226,6 +226,89 @@ def check_missing_keys(object, *key_sequences):
return None
+
+def extract_lockup_view_model_info(item, additional_info={}):
+ """Extract info from new lockupViewModel format (YouTube 2024+)"""
+ info = {'error': None}
+
+ content_type = item.get('contentType', '')
+ content_id = item.get('contentId', '')
+
+ # Extract title from metadata
+ metadata = item.get('metadata', {})
+ lockup_metadata = metadata.get('lockupMetadataViewModel', {})
+ title_data = lockup_metadata.get('title', {})
+ info['title'] = title_data.get('content', '')
+
+ # Determine type based on contentType
+ if 'PLAYLIST' in content_type:
+ info['type'] = 'playlist'
+ info['playlist_type'] = 'playlist'
+ info['id'] = content_id
+ info['video_count'] = None
+ info['first_video_id'] = None
+
+ # Try to get video count from metadata
+ metadata_rows = lockup_metadata.get('metadata', {})
+ for row in metadata_rows.get('contentMetadataViewModel', {}).get('metadataRows', []):
+ for part in row.get('metadataParts', []):
+ text = part.get('text', {}).get('content', '')
+ if 'video' in text.lower():
+ info['video_count'] = extract_int(text)
+ elif 'VIDEO' in content_type:
+ info['type'] = 'video'
+ info['id'] = content_id
+ info['view_count'] = None
+ info['approx_view_count'] = None
+ info['time_published'] = None
+ info['duration'] = None
+
+ # Extract duration/other info from metadata rows
+ metadata_rows = lockup_metadata.get('metadata', {})
+ for row in metadata_rows.get('contentMetadataViewModel', {}).get('metadataRows', []):
+ for part in row.get('metadataParts', []):
+ text = part.get('text', {}).get('content', '')
+ if 'view' in text.lower():
+ info['approx_view_count'] = extract_approx_int(text)
+ elif 'ago' in text.lower():
+ info['time_published'] = text
+ elif 'CHANNEL' in content_type:
+ info['type'] = 'channel'
+ info['id'] = content_id
+ info['approx_subscriber_count'] = None
+ else:
+ info['type'] = 'unsupported'
+ return info
+
+ # Extract thumbnail from contentImage
+ content_image = item.get('contentImage', {})
+ collection_thumb = content_image.get('collectionThumbnailViewModel', {})
+ primary_thumb = collection_thumb.get('primaryThumbnail', {})
+ thumb_vm = primary_thumb.get('thumbnailViewModel', {})
+ image_sources = thumb_vm.get('image', {}).get('sources', [])
+ if image_sources:
+ info['thumbnail'] = image_sources[0].get('url', '')
+ else:
+ info['thumbnail'] = ''
+
+ # Extract author info if available
+ info['author'] = None
+ info['author_id'] = None
+ info['author_url'] = None
+
+ # Try to get first video ID from inline player data
+ item_playback = item.get('itemPlayback', {})
+ inline_player = item_playback.get('inlinePlayerData', {})
+ on_select = inline_player.get('onSelect', {})
+ innertube_cmd = on_select.get('innertubeCommand', {})
+ watch_endpoint = innertube_cmd.get('watchEndpoint', {})
+ if watch_endpoint.get('videoId'):
+ info['first_video_id'] = watch_endpoint.get('videoId')
+
+ info.update(additional_info)
+ return info
+
+
def extract_item_info(item, additional_info={}):
if not item:
return {'error': 'No item given'}
@@ -243,6 +326,10 @@ def extract_item_info(item, additional_info={}):
info['type'] = 'unsupported'
return info
+ # Handle new lockupViewModel format (YouTube 2024+)
+ if type == 'lockupViewModel':
+ return extract_lockup_view_model_info(item, additional_info)
+
# type looks like e.g. 'compactVideoRenderer' or 'gridVideoRenderer'
# camelCase split, https://stackoverflow.com/a/37697078
type_parts = [s.lower() for s in re.sub(r'([A-Z][a-z]+)', r' \1', type).split()]
@@ -441,6 +528,9 @@ _item_types = {
'channelRenderer',
'compactChannelRenderer',
'gridChannelRenderer',
+
+ # New viewModel format (YouTube 2024+)
+ 'lockupViewModel',
}
def _traverse_browse_renderer(renderer):
diff --git a/youtube/yt_data_extract/everything_else.py b/youtube/yt_data_extract/everything_else.py
index 0f64649..1f5b6a2 100644
--- a/youtube/yt_data_extract/everything_else.py
+++ b/youtube/yt_data_extract/everything_else.py
@@ -229,7 +229,7 @@ def extract_playlist_metadata(polymer_json):
if metadata['first_video_id'] is None:
metadata['thumbnail'] = None
else:
- metadata['thumbnail'] = f"https://i.ytimg.com/vi/{metadata['first_video_id']}/hqdefault.jpg"
+ metadata['thumbnail'] = f"https://i.ytimg.com/vi/{metadata['first_video_id']}/hq720.jpg"
metadata['video_count'] = extract_int(header.get('numVideosText'))
metadata['description'] = extract_str(header.get('descriptionText'), default='')