aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dlc/extractor/youtube.py
diff options
context:
space:
mode:
authorTom-Oliver Heidel <github@tom-oliver.eu>2020-10-31 09:35:00 +0100
committerGitHub <noreply@github.com>2020-10-31 09:35:00 +0100
commitcf37b9f875037f03a63b51ec88c9a2eac6e868d9 (patch)
tree42e89884cf0b057775b10275eacb87d5f7b3d83b /youtube_dlc/extractor/youtube.py
parent587ceadc8f19aa3c0846d8ee9974e0e3e264e268 (diff)
parent7fb5f2f29d99fa269988c6586558c7e9d21e432d (diff)
downloadhypervideo-pre-cf37b9f875037f03a63b51ec88c9a2eac6e868d9.tar.lz
hypervideo-pre-cf37b9f875037f03a63b51ec88c9a2eac6e868d9.tar.xz
hypervideo-pre-cf37b9f875037f03a63b51ec88c9a2eac6e868d9.zip
Merge pull request #74 from blackjack4494/master
Release 2020.10.31
Diffstat (limited to 'youtube_dlc/extractor/youtube.py')
-rw-r--r--youtube_dlc/extractor/youtube.py48
1 files changed, 47 insertions, 1 deletions
diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py
index 4fb49b864..d605f1e74 100644
--- a/youtube_dlc/extractor/youtube.py
+++ b/youtube_dlc/extractor/youtube.py
@@ -1406,6 +1406,44 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return self._parse_json(
uppercase_escape(config), video_id, fatal=False)
+ def _get_music_metadata_from_yt_initial(self, yt_initial):
+ music_metadata = []
+ key_map = {
+ 'Album': 'album',
+ 'Artist': 'artist',
+ 'Song': 'track'
+ }
+ contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])
+ if type(contents) is list:
+ for content in contents:
+ music_track = {}
+ if type(content) is not dict:
+ continue
+ videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])
+ if type(videoSecondaryInfoRenderer) is not dict:
+ continue
+ rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])
+ if type(rows) is not list:
+ continue
+ for row in rows:
+ metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])
+ if type(metadataRowRenderer) is not dict:
+ continue
+ key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])
+ value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \
+ try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])
+ if type(key) is not str or type(value) is not str:
+ continue
+ if key in key_map:
+ if key_map[key] in music_track:
+ # we've started on a new track
+ music_metadata.append(music_track)
+ music_track = {}
+ music_track[key_map[key]] = value
+ if len(music_track.keys()):
+ music_metadata.append(music_track)
+ return music_metadata
+
def _get_automatic_captions(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an
argument to speed up the process."""
@@ -2051,7 +2089,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if cipher:
if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
- ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
+ ASSETS_RE = r'(?:"assets":.+?"js":\s*("[^"]+"))|(?:"jsUrl":\s*("[^"]+"))'
jsplayer_url_json = self._search_regex(
ASSETS_RE,
embed_webpage if age_gate else video_webpage,
@@ -2328,6 +2366,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if release_year:
release_year = int(release_year)
+ yt_initial = self._get_yt_initial_data(video_id, video_webpage)
+ if yt_initial:
+ music_metadata = self._get_music_metadata_from_yt_initial(yt_initial)
+ if len(music_metadata):
+ album = music_metadata[0].get('album')
+ artist = music_metadata[0].get('artist')
+ track = music_metadata[0].get('track')
+
m_episode = re.search(
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
video_webpage)