aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--youtube_dlc/extractor/youtube.py35
1 files changed, 26 insertions, 9 deletions
diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py
index 5fd22081a..0354866ef 100644
--- a/youtube_dlc/extractor/youtube.py
+++ b/youtube_dlc/extractor/youtube.py
@@ -279,6 +279,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
*args, **compat_kwargs(kwargs))
+ def _get_yt_initial_data(self, video_id, webpage):
+ config = self._search_regex(
+ (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
+ r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
+ webpage, 'ytInitialData', default=None)
+ if config:
+ return self._parse_json(
+ uppercase_escape(config), video_id, fatal=False)
+
def _real_initialize(self):
if self._downloader is None:
return
@@ -1397,15 +1406,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return self._parse_json(
uppercase_escape(config), video_id, fatal=False)
- def _get_yt_initial_data(self, video_id, webpage):
- config = self._search_regex(
- (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
- r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
- webpage, 'ytInitialData', default=None)
- if config:
- return self._parse_json(
- uppercase_escape(config), video_id, fatal=False)
-
def _get_automatic_captions(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an
argument to speed up the process."""
@@ -2765,6 +2765,16 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
return zip(ids_in_page, titles_in_page)
+ def _extract_mix_ids_from_yt_initial(self, yt_initial):
+ ids = []
+ playlist_contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['contents'])
+ if type(playlist_contents) is list:
+ for item in playlist_contents:
+ videoId = try_get(item, lambda x: x['playlistPanelVideoRenderer']['videoId'])
+ if type(videoId) is str:
+ ids.append(videoId)
+ return ids
+
def _extract_mix(self, playlist_id):
# The mixes are generated from a single video
# the id of the playlist is just 'RD' + video_id
@@ -2778,6 +2788,13 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
r'''(?xs)data-video-username=".*?".*?
href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
webpage))
+
+ # if no ids in html of page, try using embedded json
+ if (len(new_ids) == 0):
+ yt_initial = self._get_yt_initial_data(playlist_id, webpage)
+ if yt_initial:
+ new_ids = self._extract_mix_ids_from_yt_initial(yt_initial)
+
# Fetch new pages until all the videos are repeated, it seems that
# there are always 51 unique videos.
new_ids = [_id for _id in new_ids if _id not in ids]