aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--youtube/shared.css8
-rw-r--r--youtube/watch.py30
-rw-r--r--youtube_dl/extractor/youtube.py43
-rw-r--r--yt_watch_template.html19
4 files changed, 97 insertions, 3 deletions
diff --git a/youtube/shared.css b/youtube/shared.css
index f99baa1..151111d 100644
--- a/youtube/shared.css
+++ b/youtube/shared.css
@@ -227,11 +227,15 @@ address{
grid-column: 1 / span 2;
grid-row: 6;
}
+ .full-item .music-list{
+ grid-row:7;
+ grid-column: 1 / span 2;
+ }
.full-item .comments{
- grid-row: 7;
+ grid-row: 8;
}
.full-item .more-comments{
- grid-row: 8;
+ grid-row: 9;
}
.medium-item-box{
diff --git a/youtube/watch.py b/youtube/watch.py
index c669f5e..cfe827b 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -296,6 +296,10 @@ def extract_info(downloader, *args, **kwargs):
except YoutubeError as e:
return str(e)
+music_list_table_row = Template('''<tr>
+ <td>$attribute</td>
+ <td>$value</td>
+''')
def get_watch_page(query_string):
id = urllib.parse.parse_qs(query_string)['v'][0]
downloader = YoutubeDL(params={'youtube_include_dash_manifest':False})
@@ -343,7 +347,32 @@ def get_watch_page(query_string):
else:
related_videos_html = ''
+ music_list = info['music_list']
+ if len(music_list) == 0:
+ music_list_html = ''
+ else:
+ music_list_html = '''<hr>
+<table>
+ <caption>Music</caption>
+ <tr>
+ <th>Artist</th>
+ <th>Title</th>
+ <th>Album</th>
+ </tr>
+'''
+ for track in music_list:
+ music_list_html += '''<tr>\n'''
+ for attribute in ('artist', 'title', 'album'):
+ try:
+ value = track[attribute]
+ except KeyError:
+ music_list_html += '''<td></td>'''
+ else:
+ music_list_html += '''<td>''' + html.escape(value) + '''</td>'''
+ music_list_html += '''</tr>\n'''
+ music_list_html += '''</table>\n'''
+
download_options = ''
for format in info['formats']:
@@ -371,5 +400,6 @@ def get_watch_page(query_string):
related = related_videos_html,
comments = comments_html,
more_comments_button = more_comments_button,
+ music_list = music_list_html,
)
return page \ No newline at end of file
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index c120cfb..39e4ca5 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -10,6 +10,7 @@ import random
import re
import time
import traceback
+import html
from .common import InfoExtractor, SearchInfoExtractor
from ..jsinterp import JSInterpreter
@@ -1479,6 +1480,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
})
return chapters
+ ul_tag_pattern = re.compile(r'(</?ul)')
+ music_info_pattern = re.compile(r'<h4 class="title">\s*(Song|Music|Artist|Album)\s*</h4>\s*<ul class="content watch-info-tag-list">\s*<li>(?:<a[^>]*>)?([^<]*)(?:</a>)?</li>')
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
@@ -1528,6 +1531,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def extract_view_count(v_info):
return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
+ # Related videos
related_vid_info = self._search_regex(r"""'RELATED_PLAYER_ARGS':\s*(\{.*?\})""", video_webpage, "related_player_args", default='')
if related_vid_info == '':
@@ -1540,6 +1544,44 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
related_vids = (compat_parse_qs(related_item) for related_item in related_vid_info.split(","))
related_vids = [{key : value[0] for key,value in vid.items()} for vid in related_vids]
+ # Music list
+ # Test case: https://www.youtube.com/watch?v=jbkZdRglnKY
+ music_list = []
+ metadata_start = video_webpage.find('<ul class="watch-extras-section">')
+ if metadata_start != -1:
+ metadata_start += 33
+ tag_index = metadata_start
+ open_tags = 1
+ while open_tags > 0:
+ match = self.ul_tag_pattern.search(video_webpage, tag_index)
+ if match is None:
+ print("Couldn't match ul tag")
+ break
+ tag_index = match.end()
+ tag = match.group(1)
+ if tag == "<ul":
+ open_tags += 1
+ else:
+ open_tags -= 1
+ else:
+ last_index = 0
+ metadata = video_webpage[metadata_start:tag_index]
+ current_song = None
+ while True:
+ match = self.music_info_pattern.search(metadata, last_index)
+ if match is None:
+ if current_song is not None:
+ music_list.append(current_song)
+ break
+ title, value = match.group(1), html.unescape(match.group(2))
+ if title in ("Song", "Music"):
+ if current_song is not None:
+ music_list.append(current_song)
+ current_song = {"title": value}
+ else:
+ current_song[title.lower()] = value
+ last_index = match.end()
+
# Get video info
embed_webpage = None
if re.search(r'player-age-gate-content">', video_webpage) is not None:
@@ -2120,6 +2162,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'track': track,
'artist': artist,
'related_vids': related_vids,
+ 'music_list': music_list,
}
diff --git a/yt_watch_template.html b/yt_watch_template.html
index 7d5f4ae..44e5892 100644
--- a/yt_watch_template.html
+++ b/yt_watch_template.html
@@ -23,6 +23,21 @@
.full-item{
grid-column: 2;
}
+ .music-list{
+ background-color: #d0d0d0;
+ }
+ .music-list table,th,td{
+ border: 1px solid;
+ }
+ .music-list th,td{
+ padding-left:4px;
+ padding-right:5px;
+ }
+ .music-list caption{
+ text-align:left;
+ font-weight:bold;
+ margin-bottom:5px;
+ }
.comments{
grid-column: 1 / span 2;
grid-row: 6;
@@ -106,7 +121,9 @@ $download_options
<input class="checkbox" name="video_info_list" value="$video_info" form="playlist-edit" type="checkbox">
<span class="description">$description</span>
-
+ <div class="music-list">
+$music_list
+ </div>
<section class="comments">
$comments
</section>