diff options
Diffstat (limited to 'youtube/yt_data_extract')
| -rw-r--r-- | youtube/yt_data_extract/common.py | 90 | ||||
| -rw-r--r-- | youtube/yt_data_extract/everything_else.py | 2 |
2 files changed, 91 insertions, 1 deletions
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py index 7903db5..6a98280 100644 --- a/youtube/yt_data_extract/common.py +++ b/youtube/yt_data_extract/common.py @@ -226,6 +226,89 @@ def check_missing_keys(object, *key_sequences): return None + +def extract_lockup_view_model_info(item, additional_info={}): + """Extract info from new lockupViewModel format (YouTube 2024+)""" + info = {'error': None} + + content_type = item.get('contentType', '') + content_id = item.get('contentId', '') + + # Extract title from metadata + metadata = item.get('metadata', {}) + lockup_metadata = metadata.get('lockupMetadataViewModel', {}) + title_data = lockup_metadata.get('title', {}) + info['title'] = title_data.get('content', '') + + # Determine type based on contentType + if 'PLAYLIST' in content_type: + info['type'] = 'playlist' + info['playlist_type'] = 'playlist' + info['id'] = content_id + info['video_count'] = None + info['first_video_id'] = None + + # Try to get video count from metadata + metadata_rows = lockup_metadata.get('metadata', {}) + for row in metadata_rows.get('contentMetadataViewModel', {}).get('metadataRows', []): + for part in row.get('metadataParts', []): + text = part.get('text', {}).get('content', '') + if 'video' in text.lower(): + info['video_count'] = extract_int(text) + elif 'VIDEO' in content_type: + info['type'] = 'video' + info['id'] = content_id + info['view_count'] = None + info['approx_view_count'] = None + info['time_published'] = None + info['duration'] = None + + # Extract duration/other info from metadata rows + metadata_rows = lockup_metadata.get('metadata', {}) + for row in metadata_rows.get('contentMetadataViewModel', {}).get('metadataRows', []): + for part in row.get('metadataParts', []): + text = part.get('text', {}).get('content', '') + if 'view' in text.lower(): + info['approx_view_count'] = extract_approx_int(text) + elif 'ago' in text.lower(): + info['time_published'] = text + elif 'CHANNEL' in content_type: + info['type'] = 'channel' + info['id'] = content_id + info['approx_subscriber_count'] = None + else: + info['type'] = 'unsupported' + return info + + # Extract thumbnail from contentImage + content_image = item.get('contentImage', {}) + collection_thumb = content_image.get('collectionThumbnailViewModel', {}) + primary_thumb = collection_thumb.get('primaryThumbnail', {}) + thumb_vm = primary_thumb.get('thumbnailViewModel', {}) + image_sources = thumb_vm.get('image', {}).get('sources', []) + if image_sources: + info['thumbnail'] = image_sources[0].get('url', '') + else: + info['thumbnail'] = '' + + # Extract author info if available + info['author'] = None + info['author_id'] = None + info['author_url'] = None + + # Try to get first video ID from inline player data + item_playback = item.get('itemPlayback', {}) + inline_player = item_playback.get('inlinePlayerData', {}) + on_select = inline_player.get('onSelect', {}) + innertube_cmd = on_select.get('innertubeCommand', {}) + watch_endpoint = innertube_cmd.get('watchEndpoint', {}) + if watch_endpoint.get('videoId'): + info['first_video_id'] = watch_endpoint.get('videoId') + + info.update(additional_info) + return info + + def extract_item_info(item, additional_info={}): if not item: return {'error': 'No item given'} @@ -243,6 +326,10 @@ def extract_item_info(item, additional_info={}): info['type'] = 'unsupported' return info + # Handle new lockupViewModel format (YouTube 2024+) + if type == 'lockupViewModel': + return extract_lockup_view_model_info(item, additional_info) + # type looks like e.g. 'compactVideoRenderer' or 'gridVideoRenderer' # camelCase split, https://stackoverflow.com/a/37697078 type_parts = [s.lower() for s in re.sub(r'([A-Z][a-z]+)', r' \1', type).split()] @@ -441,6 +528,9 @@ _item_types = { 'channelRenderer', 'compactChannelRenderer', 'gridChannelRenderer', + + # New viewModel format (YouTube 2024+) + 'lockupViewModel', } def _traverse_browse_renderer(renderer): diff --git a/youtube/yt_data_extract/everything_else.py b/youtube/yt_data_extract/everything_else.py index 0f64649..1f5b6a2 100644 --- a/youtube/yt_data_extract/everything_else.py +++ b/youtube/yt_data_extract/everything_else.py @@ -229,7 +229,7 @@ def extract_playlist_metadata(polymer_json): if metadata['first_video_id'] is None: metadata['thumbnail'] = None else: - metadata['thumbnail'] = f"https://i.ytimg.com/vi/{metadata['first_video_id']}/hqdefault.jpg" + metadata['thumbnail'] = f"https://i.ytimg.com/vi/{metadata['first_video_id']}/hq720.jpg" metadata['video_count'] = extract_int(header.get('numVideosText')) metadata['description'] = extract_str(header.get('descriptionText'), default='') |
