diff options
author | Jesus E <heckyel@riseup.net> | 2023-05-28 21:42:13 -0400 |
---|---|---|
committer | Jesus E <heckyel@riseup.net> | 2023-05-28 21:42:13 -0400 |
commit | aa57ace7420bcbb4712d8aab0736f00115634c4a (patch) | |
tree | b475bda3b6ba4006b3611a931d310c8a57166914 /youtube | |
parent | 512798366c935f57cf4c583a1de6bcd9ab7bb680 (diff) | |
download | yt-local-aa57ace7420bcbb4712d8aab0736f00115634c4a.tar.lz yt-local-aa57ace7420bcbb4712d8aab0736f00115634c4a.tar.xz yt-local-aa57ace7420bcbb4712d8aab0736f00115634c4a.zip |
Fix music list extraction
Closes #160
Diffstat (limited to 'youtube')
-rw-r--r-- | youtube/templates/watch.html | 6 | ||||
-rw-r--r-- | youtube/watch.py | 2 | ||||
-rw-r--r-- | youtube/yt_data_extract/common.py | 3 | ||||
-rw-r--r-- | youtube/yt_data_extract/watch_extraction.py | 29 |
4 files changed, 39 insertions, 1 deletions
diff --git a/youtube/templates/watch.html b/youtube/templates/watch.html index 4030a18..9679a28 100644 --- a/youtube/templates/watch.html +++ b/youtube/templates/watch.html @@ -135,7 +135,11 @@ {% for track in music_list %} <tr> {% for attribute in music_attributes %} - <td>{{ track.get(attribute.lower(), '') }}</td> + {% if attribute.lower() == 'title' and track['url'] is not none %} + <td><a href="{{ track['url'] }}">{{ track.get(attribute.lower(), '') }}</a></td> + {% else %} + <td>{{ track.get(attribute.lower(), '') }}</td> + {% endif %} {% endfor %} </tr> {% endfor %} diff --git a/youtube/watch.py b/youtube/watch.py index 53836aa..1db7895 100644 --- a/youtube/watch.py +++ b/youtube/watch.py @@ -688,6 +688,8 @@ def get_watch_page(video_id=None): for item in info['related_videos']: util.prefix_urls(item) util.add_extra_html_info(item) + for song in info['music_list']: + song['url'] = util.prefix_url(song['url']) if info['playlist']: playlist_id = info['playlist']['id'] for item in info['playlist']['items']: diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py index 2e59109..fcefbf7 100644 --- a/youtube/yt_data_extract/common.py +++ b/youtube/yt_data_extract/common.py @@ -296,6 +296,9 @@ def extract_item_info(item, additional_info={}): if primary_type == 'video': info['id'] = item.get('videoId') + if not info['id']: + info['id'] = deep_get(item,'navigationEndpoint', 'watchEndpoint', + 'videoId') info['view_count'] = extract_int(item.get('viewCountText')) # dig into accessibility data to get view_count for videos marked as recommended, and to get time_published diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py index 4f9ec30..2158fec 100644 --- a/youtube/yt_data_extract/watch_extraction.py +++ b/youtube/yt_data_extract/watch_extraction.py @@ -231,6 +231,34 @@ def _extract_metadata_row_info(renderer_content): return info +def _extract_from_music_renderer(renderer_content): + # latest format for the music list + info = { + 'music_list': [], + } + + for carousel in renderer_content.get('carouselLockups', []): + song = {} + carousel = carousel.get('carouselLockupRenderer', {}) + video_renderer = carousel.get('videoLockup', {}) + video_renderer_info = extract_item_info(video_renderer) + video_id = video_renderer_info.get('id') + song['url'] = concat_or_none('https://www.youtube.com/watch?v=', + video_id) + song['title'] = video_renderer_info.get('title') + for row in carousel.get('infoRows', []): + row = row.get('infoRowRenderer', {}) + title = extract_str(row.get('title')) + data = extract_str(row.get('defaultMetadata')) + if title == 'ARTIST': + song['artist'] = data + elif title == 'ALBUM': + song['album'] = data + elif title == 'WRITERS': + song['writers'] = data + info['music_list'].append(song) + return info + def _extract_from_video_metadata(renderer_content): info = _extract_from_video_information_renderer(renderer_content) liberal_dict_update(info, _extract_likes_dislikes(renderer_content)) @@ -254,6 +282,7 @@ visible_extraction_dispatch = { 'slimVideoActionBarRenderer': _extract_likes_dislikes, 'slimOwnerRenderer': _extract_from_owner_renderer, 'videoDescriptionHeaderRenderer': _extract_from_video_header_renderer, + 'videoDescriptionMusicSectionRenderer': _extract_from_music_renderer, 'expandableVideoDescriptionRenderer': _extract_from_description_renderer, 'metadataRowContainerRenderer': _extract_metadata_row_info, # OR just this one, which contains SOME of the above inside it |