diff options
Diffstat (limited to 'youtube_dlc/extractor/youtube.py')
-rw-r--r-- | youtube_dlc/extractor/youtube.py | 74 |
1 files changed, 28 insertions, 46 deletions
diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py index 540f35337..72bc5a0da 100644 --- a/youtube_dlc/extractor/youtube.py +++ b/youtube_dlc/extractor/youtube.py @@ -1335,44 +1335,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return self._parse_json( uppercase_escape(config), video_id, fatal=False) - def _get_music_metadata_from_yt_initial(self, yt_initial): - music_metadata = [] - key_map = { - 'Album': 'album', - 'Artist': 'artist', - 'Song': 'track' - } - contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents']) - if type(contents) is list: - for content in contents: - music_track = {} - if type(content) is not dict: - continue - videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer']) - if type(videoSecondaryInfoRenderer) is not dict: - continue - rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows']) - if type(rows) is not list: - continue - for row in rows: - metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer']) - if type(metadataRowRenderer) is not dict: - continue - key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText']) - value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \ - try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text']) - if type(key) is not str or type(value) is not str: - continue - if key in key_map: - if key_map[key] in music_track: - # we've started on a new track - music_metadata.append(music_track) - music_track = {} - music_track[key_map[key]] = value - if len(music_track.keys()): - music_metadata.append(music_track) - return music_metadata - def _get_automatic_captions(self, video_id, webpage): """We need the webpage for getting the captions url, pass it as an argument to speed up the process.""" @@ -2295,7 +2257,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Youtube Music Auto-generated description release_date = release_year = None if video_description: - mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description) + mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description) if mobj: if not track: track = mobj.group('track').strip() @@ -2312,13 +2274,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if release_year: release_year = int(release_year) - yt_initial = self._get_yt_initial_data(video_id, video_webpage) - if yt_initial: - music_metadata = self._get_music_metadata_from_yt_initial(yt_initial) - if len(music_metadata): - album = music_metadata[0].get('album') - artist = music_metadata[0].get('artist') - track = music_metadata[0].get('track') + yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage) + contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or [] + for content in contents: + rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or [] + multiple_songs = False + for row in rows: + if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True: + multiple_songs = True + break + for row in rows: + mrr = row.get('metadataRowRenderer') or {} + mrr_title = try_get( + mrr, lambda x: x['title']['simpleText'], compat_str) + mrr_contents = try_get( + mrr, lambda x: x['contents'][0], dict) or {} + mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str) + if not (mrr_title and mrr_contents_text): + continue + if mrr_title == 'License': + video_license = mrr_contents_text + elif not multiple_songs: + if mrr_title == 'Album': + album = mrr_contents_text + elif mrr_title == 'Artist': + artist = mrr_contents_text + elif mrr_title == 'Song': + track = mrr_contents_text m_episode = re.search( r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>', |