diff options
author | Tom-Oliver Heidel <github@tom-oliver.eu> | 2020-10-31 09:35:00 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-31 09:35:00 +0100 |
commit | cf37b9f875037f03a63b51ec88c9a2eac6e868d9 (patch) | |
tree | 42e89884cf0b057775b10275eacb87d5f7b3d83b /youtube_dlc/extractor/youtube.py | |
parent | 587ceadc8f19aa3c0846d8ee9974e0e3e264e268 (diff) | |
parent | 7fb5f2f29d99fa269988c6586558c7e9d21e432d (diff) | |
download | hypervideo-pre-cf37b9f875037f03a63b51ec88c9a2eac6e868d9.tar.lz hypervideo-pre-cf37b9f875037f03a63b51ec88c9a2eac6e868d9.tar.xz hypervideo-pre-cf37b9f875037f03a63b51ec88c9a2eac6e868d9.zip |
Merge pull request #74 from blackjack4494/master
Release 2020.10.31
Diffstat (limited to 'youtube_dlc/extractor/youtube.py')
-rw-r--r-- | youtube_dlc/extractor/youtube.py | 48 |
1 files changed, 47 insertions, 1 deletions
diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py index 4fb49b864..d605f1e74 100644 --- a/youtube_dlc/extractor/youtube.py +++ b/youtube_dlc/extractor/youtube.py @@ -1406,6 +1406,44 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return self._parse_json( uppercase_escape(config), video_id, fatal=False) + def _get_music_metadata_from_yt_initial(self, yt_initial): + music_metadata = [] + key_map = { + 'Album': 'album', + 'Artist': 'artist', + 'Song': 'track' + } + contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents']) + if type(contents) is list: + for content in contents: + music_track = {} + if type(content) is not dict: + continue + videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer']) + if type(videoSecondaryInfoRenderer) is not dict: + continue + rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows']) + if type(rows) is not list: + continue + for row in rows: + metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer']) + if type(metadataRowRenderer) is not dict: + continue + key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText']) + value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \ + try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text']) + if type(key) is not str or type(value) is not str: + continue + if key in key_map: + if key_map[key] in music_track: + # we've started on a new track + music_metadata.append(music_track) + music_track = {} + music_track[key_map[key]] = value + if len(music_track.keys()): + music_metadata.append(music_track) + return music_metadata + def _get_automatic_captions(self, video_id, webpage): """We need the webpage for getting the captions url, pass it as an argument to speed up the process.""" @@ -2051,7 +2089,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if cipher: if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True): - ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")' + ASSETS_RE = r'(?:"assets":.+?"js":\s*("[^"]+"))|(?:"jsUrl":\s*("[^"]+"))' jsplayer_url_json = self._search_regex( ASSETS_RE, embed_webpage if age_gate else video_webpage, @@ -2328,6 +2366,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if release_year: release_year = int(release_year) + yt_initial = self._get_yt_initial_data(video_id, video_webpage) + if yt_initial: + music_metadata = self._get_music_metadata_from_yt_initial(yt_initial) + if len(music_metadata): + album = music_metadata[0].get('album') + artist = music_metadata[0].get('artist') + track = music_metadata[0].get('track') + m_episode = re.search( r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>', video_webpage) |