diff options
author | Jesus E <heckyel@riseup.net> | 2023-05-28 21:42:13 -0400 |
---|---|---|
committer | Jesus E <heckyel@riseup.net> | 2023-05-28 21:42:13 -0400 |
commit | aa57ace7420bcbb4712d8aab0736f00115634c4a (patch) | |
tree | b475bda3b6ba4006b3611a931d310c8a57166914 /youtube/yt_data_extract | |
parent | 512798366c935f57cf4c583a1de6bcd9ab7bb680 (diff) | |
download | yt-local-aa57ace7420bcbb4712d8aab0736f00115634c4a.tar.lz yt-local-aa57ace7420bcbb4712d8aab0736f00115634c4a.tar.xz yt-local-aa57ace7420bcbb4712d8aab0736f00115634c4a.zip |
Fix music list extraction
Closes #160
Diffstat (limited to 'youtube/yt_data_extract')
-rw-r--r-- | youtube/yt_data_extract/common.py | 3 | ||||
-rw-r--r-- | youtube/yt_data_extract/watch_extraction.py | 29 |
2 files changed, 32 insertions, 0 deletions
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py index 2e59109..fcefbf7 100644 --- a/youtube/yt_data_extract/common.py +++ b/youtube/yt_data_extract/common.py @@ -296,6 +296,9 @@ def extract_item_info(item, additional_info={}): if primary_type == 'video': info['id'] = item.get('videoId') + if not info['id']: + info['id'] = deep_get(item,'navigationEndpoint', 'watchEndpoint', + 'videoId') info['view_count'] = extract_int(item.get('viewCountText')) # dig into accessibility data to get view_count for videos marked as recommended, and to get time_published diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py index 4f9ec30..2158fec 100644 --- a/youtube/yt_data_extract/watch_extraction.py +++ b/youtube/yt_data_extract/watch_extraction.py @@ -231,6 +231,34 @@ def _extract_metadata_row_info(renderer_content): return info +def _extract_from_music_renderer(renderer_content): + # latest format for the music list + info = { + 'music_list': [], + } + + for carousel in renderer_content.get('carouselLockups', []): + song = {} + carousel = carousel.get('carouselLockupRenderer', {}) + video_renderer = carousel.get('videoLockup', {}) + video_renderer_info = extract_item_info(video_renderer) + video_id = video_renderer_info.get('id') + song['url'] = concat_or_none('https://www.youtube.com/watch?v=', + video_id) + song['title'] = video_renderer_info.get('title') + for row in carousel.get('infoRows', []): + row = row.get('infoRowRenderer', {}) + title = extract_str(row.get('title')) + data = extract_str(row.get('defaultMetadata')) + if title == 'ARTIST': + song['artist'] = data + elif title == 'ALBUM': + song['album'] = data + elif title == 'WRITERS': + song['writers'] = data + info['music_list'].append(song) + return info + def _extract_from_video_metadata(renderer_content): info = _extract_from_video_information_renderer(renderer_content) liberal_dict_update(info, _extract_likes_dislikes(renderer_content)) @@ -254,6 +282,7 @@ visible_extraction_dispatch = { 'slimVideoActionBarRenderer': _extract_likes_dislikes, 'slimOwnerRenderer': _extract_from_owner_renderer, 'videoDescriptionHeaderRenderer': _extract_from_video_header_renderer, + 'videoDescriptionMusicSectionRenderer': _extract_from_music_renderer, 'expandableVideoDescriptionRenderer': _extract_from_description_renderer, 'metadataRowContainerRenderer': _extract_metadata_row_info, # OR just this one, which contains SOME of the above inside it |