diff options
-rw-r--r-- | youtube/shared.css | 8 | ||||
-rw-r--r-- | youtube/watch.py | 30 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 43 | ||||
-rw-r--r-- | yt_watch_template.html | 19 |
4 files changed, 97 insertions, 3 deletions
diff --git a/youtube/shared.css b/youtube/shared.css index f99baa1..151111d 100644 --- a/youtube/shared.css +++ b/youtube/shared.css @@ -227,11 +227,15 @@ address{ grid-column: 1 / span 2; grid-row: 6; } + .full-item .music-list{ + grid-row:7; + grid-column: 1 / span 2; + } .full-item .comments{ - grid-row: 7; + grid-row: 8; } .full-item .more-comments{ - grid-row: 8; + grid-row: 9; } .medium-item-box{ diff --git a/youtube/watch.py b/youtube/watch.py index c669f5e..cfe827b 100644 --- a/youtube/watch.py +++ b/youtube/watch.py @@ -296,6 +296,10 @@ def extract_info(downloader, *args, **kwargs): except YoutubeError as e: return str(e) +music_list_table_row = Template('''<tr> + <td>$attribute</td> + <td>$value</td> +''') def get_watch_page(query_string): id = urllib.parse.parse_qs(query_string)['v'][0] downloader = YoutubeDL(params={'youtube_include_dash_manifest':False}) @@ -343,7 +347,32 @@ def get_watch_page(query_string): else: related_videos_html = '' + music_list = info['music_list'] + if len(music_list) == 0: + music_list_html = '' + else: + music_list_html = '''<hr> +<table> + <caption>Music</caption> + <tr> + <th>Artist</th> + <th>Title</th> + <th>Album</th> + </tr> +''' + for track in music_list: + music_list_html += '''<tr>\n''' + for attribute in ('artist', 'title', 'album'): + try: + value = track[attribute] + except KeyError: + music_list_html += '''<td></td>''' + else: + music_list_html += '''<td>''' + html.escape(value) + '''</td>''' + music_list_html += '''</tr>\n''' + music_list_html += '''</table>\n''' + download_options = '' for format in info['formats']: @@ -371,5 +400,6 @@ def get_watch_page(query_string): related = related_videos_html, comments = comments_html, more_comments_button = more_comments_button, + music_list = music_list_html, ) return page
\ No newline at end of file diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c120cfb..39e4ca5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -10,6 +10,7 @@ import random import re import time import traceback +import html from .common import InfoExtractor, SearchInfoExtractor from ..jsinterp import JSInterpreter @@ -1479,6 +1480,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }) return chapters + ul_tag_pattern = re.compile(r'(</?ul)') + music_info_pattern = re.compile(r'<h4 class="title">\s*(Song|Music|Artist|Album)\s*</h4>\s*<ul class="content watch-info-tag-list">\s*<li>(?:<a[^>]*>)?([^<]*)(?:</a>)?</li>') def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) @@ -1528,6 +1531,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def extract_view_count(v_info): return int_or_none(try_get(v_info, lambda x: x['view_count'][0])) + # Related videos related_vid_info = self._search_regex(r"""'RELATED_PLAYER_ARGS':\s*(\{.*?\})""", video_webpage, "related_player_args", default='') if related_vid_info == '': @@ -1540,6 +1544,44 @@ class YoutubeIE(YoutubeBaseInfoExtractor): related_vids = (compat_parse_qs(related_item) for related_item in related_vid_info.split(",")) related_vids = [{key : value[0] for key,value in vid.items()} for vid in related_vids] + # Music list + # Test case: https://www.youtube.com/watch?v=jbkZdRglnKY + music_list = [] + metadata_start = video_webpage.find('<ul class="watch-extras-section">') + if metadata_start != -1: + metadata_start += 33 + tag_index = metadata_start + open_tags = 1 + while open_tags > 0: + match = self.ul_tag_pattern.search(video_webpage, tag_index) + if match is None: + print("Couldn't match ul tag") + break + tag_index = match.end() + tag = match.group(1) + if tag == "<ul": + open_tags += 1 + else: + open_tags -= 1 + else: + last_index = 0 + metadata = video_webpage[metadata_start:tag_index] + current_song = None + while True: + match = self.music_info_pattern.search(metadata, last_index) + if match is None: + if current_song is not None: + music_list.append(current_song) + break + title, value = match.group(1), html.unescape(match.group(2)) + if title in ("Song", "Music"): + if current_song is not None: + music_list.append(current_song) + current_song = {"title": value} + else: + current_song[title.lower()] = value + last_index = match.end() + # Get video info embed_webpage = None if re.search(r'player-age-gate-content">', video_webpage) is not None: @@ -2120,6 +2162,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'track': track, 'artist': artist, 'related_vids': related_vids, + 'music_list': music_list, } diff --git a/yt_watch_template.html b/yt_watch_template.html index 7d5f4ae..44e5892 100644 --- a/yt_watch_template.html +++ b/yt_watch_template.html @@ -23,6 +23,21 @@ .full-item{ grid-column: 2; } + .music-list{ + background-color: #d0d0d0; + } + .music-list table,th,td{ + border: 1px solid; + } + .music-list th,td{ + padding-left:4px; + padding-right:5px; + } + .music-list caption{ + text-align:left; + font-weight:bold; + margin-bottom:5px; + } .comments{ grid-column: 1 / span 2; grid-row: 6; @@ -106,7 +121,9 @@ $download_options <input class="checkbox" name="video_info_list" value="$video_info" form="playlist-edit" type="checkbox"> <span class="description">$description</span> - + <div class="music-list"> +$music_list + </div> <section class="comments"> $comments </section> |