aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesus E <heckyel@riseup.net>2023-05-28 21:42:13 -0400
committerJesus E <heckyel@riseup.net>2023-05-28 21:42:13 -0400
commitaa57ace7420bcbb4712d8aab0736f00115634c4a (patch)
treeb475bda3b6ba4006b3611a931d310c8a57166914
parent512798366c935f57cf4c583a1de6bcd9ab7bb680 (diff)
downloadyt-local-aa57ace7420bcbb4712d8aab0736f00115634c4a.tar.lz
yt-local-aa57ace7420bcbb4712d8aab0736f00115634c4a.tar.xz
yt-local-aa57ace7420bcbb4712d8aab0736f00115634c4a.zip
Fix music list extraction
Closes #160
-rw-r--r--youtube/templates/watch.html6
-rw-r--r--youtube/watch.py2
-rw-r--r--youtube/yt_data_extract/common.py3
-rw-r--r--youtube/yt_data_extract/watch_extraction.py29
4 files changed, 39 insertions, 1 deletions
diff --git a/youtube/templates/watch.html b/youtube/templates/watch.html
index 4030a18..9679a28 100644
--- a/youtube/templates/watch.html
+++ b/youtube/templates/watch.html
@@ -135,7 +135,11 @@
{% for track in music_list %}
<tr>
{% for attribute in music_attributes %}
- <td>{{ track.get(attribute.lower(), '') }}</td>
+ {% if attribute.lower() == 'title' and track['url'] is not none %}
+ <td><a href="{{ track['url'] }}">{{ track.get(attribute.lower(), '') }}</a></td>
+ {% else %}
+ <td>{{ track.get(attribute.lower(), '') }}</td>
+ {% endif %}
{% endfor %}
</tr>
{% endfor %}
diff --git a/youtube/watch.py b/youtube/watch.py
index 53836aa..1db7895 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -688,6 +688,8 @@ def get_watch_page(video_id=None):
for item in info['related_videos']:
util.prefix_urls(item)
util.add_extra_html_info(item)
+ for song in info['music_list']:
+ song['url'] = util.prefix_url(song['url'])
if info['playlist']:
playlist_id = info['playlist']['id']
for item in info['playlist']['items']:
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py
index 2e59109..fcefbf7 100644
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -296,6 +296,9 @@ def extract_item_info(item, additional_info={}):
if primary_type == 'video':
info['id'] = item.get('videoId')
+ if not info['id']:
+ info['id'] = deep_get(item,'navigationEndpoint', 'watchEndpoint',
+ 'videoId')
info['view_count'] = extract_int(item.get('viewCountText'))
# dig into accessibility data to get view_count for videos marked as recommended, and to get time_published
diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py
index 4f9ec30..2158fec 100644
--- a/youtube/yt_data_extract/watch_extraction.py
+++ b/youtube/yt_data_extract/watch_extraction.py
@@ -231,6 +231,34 @@ def _extract_metadata_row_info(renderer_content):
return info
+def _extract_from_music_renderer(renderer_content):
+ # latest format for the music list
+ info = {
+ 'music_list': [],
+ }
+
+ for carousel in renderer_content.get('carouselLockups', []):
+ song = {}
+ carousel = carousel.get('carouselLockupRenderer', {})
+ video_renderer = carousel.get('videoLockup', {})
+ video_renderer_info = extract_item_info(video_renderer)
+ video_id = video_renderer_info.get('id')
+ song['url'] = concat_or_none('https://www.youtube.com/watch?v=',
+ video_id)
+ song['title'] = video_renderer_info.get('title')
+ for row in carousel.get('infoRows', []):
+ row = row.get('infoRowRenderer', {})
+ title = extract_str(row.get('title'))
+ data = extract_str(row.get('defaultMetadata'))
+ if title == 'ARTIST':
+ song['artist'] = data
+ elif title == 'ALBUM':
+ song['album'] = data
+ elif title == 'WRITERS':
+ song['writers'] = data
+ info['music_list'].append(song)
+ return info
+
def _extract_from_video_metadata(renderer_content):
info = _extract_from_video_information_renderer(renderer_content)
liberal_dict_update(info, _extract_likes_dislikes(renderer_content))
@@ -254,6 +282,7 @@ visible_extraction_dispatch = {
'slimVideoActionBarRenderer': _extract_likes_dislikes,
'slimOwnerRenderer': _extract_from_owner_renderer,
'videoDescriptionHeaderRenderer': _extract_from_video_header_renderer,
+ 'videoDescriptionMusicSectionRenderer': _extract_from_music_renderer,
'expandableVideoDescriptionRenderer': _extract_from_description_renderer,
'metadataRowContainerRenderer': _extract_metadata_row_info,
# OR just this one, which contains SOME of the above inside it