Merge pull request #57 from insaneracist/youtube-mix-fix

[youtube] fix: extract mix playlist ids from ytInitialData (#33)
author: Tom-Oliver Heidel <github@tom-oliver.eu> 2020-11-03 10:33:58 +0100
committer: GitHub <noreply@github.com> 2020-11-03 10:33:58 +0100
commit: 7166f47b189b7697d2f164841b676eda6b0ebb42 (patch)
tree: af9a6f800e68ca22d67e849bcac9a151234dfc1c
parent: 471115dbeefb899ee036d3e769da1f90070664b6 (diff)
parent: 5c15c1a0d7c27d34e7d03161c5b27bf923e314cd (diff)
download: hypervideo-pre-7166f47b189b7697d2f164841b676eda6b0ebb42.tar.lz
hypervideo-pre-7166f47b189b7697d2f164841b676eda6b0ebb42.tar.xz
hypervideo-pre-7166f47b189b7697d2f164841b676eda6b0ebb42.zip
1 files changed, 26 insertions, 9 deletions
diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py
index 87a456600..2e70ad6fa 100644
--- a/youtube_dlc/extractor/youtube.py
+++ b/youtube_dlc/extractor/youtube.py
@@ -279,6 +279,15 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
         return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
             *args, **compat_kwargs(kwargs))
 
+    def _get_yt_initial_data(self, video_id, webpage):
+        config = self._search_regex(
+            (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
+             r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
+            webpage, 'ytInitialData', default=None)
+        if config:
+            return self._parse_json(
+                uppercase_escape(config), video_id, fatal=False)
+
     def _real_initialize(self):
         if self._downloader is None:
             return
@@ -1398,15 +1407,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             return self._parse_json(
                 uppercase_escape(config), video_id, fatal=False)
 
-    def _get_yt_initial_data(self, video_id, webpage):
-        config = self._search_regex(
-            (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
-             r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
-            webpage, 'ytInitialData', default=None)
-        if config:
-            return self._parse_json(
-                uppercase_escape(config), video_id, fatal=False)
-
     def _get_music_metadata_from_yt_initial(self, yt_initial):
         music_metadata = []
         key_map = {
@@ -2828,6 +2828,16 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
 
         return zip(ids_in_page, titles_in_page)
 
+    def _extract_mix_ids_from_yt_initial(self, yt_initial):
+        ids = []
+        playlist_contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['contents'], list)
+        if playlist_contents:
+            for item in playlist_contents:
+                videoId = try_get(item, lambda x: x['playlistPanelVideoRenderer']['videoId'], compat_str)
+                if videoId:
+                    ids.append(videoId)
+        return ids
+
     def _extract_mix(self, playlist_id):
         # The mixes are generated from a single video
         # the id of the playlist is just 'RD' + video_id
@@ -2841,6 +2851,13 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
                 r'''(?xs)data-video-username=".*?".*?
                            href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
                 webpage))
+
+            # if no ids in html of page, try using embedded json
+            if (len(new_ids) == 0):
+                yt_initial = self._get_yt_initial_data(playlist_id, webpage)
+                if yt_initial:
+                    new_ids = self._extract_mix_ids_from_yt_initial(yt_initial)
+
             # Fetch new pages until all the videos are repeated, it seems that
             # there are always 51 unique videos.
             new_ids = [_id for _id in new_ids if _id not in ids]
author	Tom-Oliver Heidel <github@tom-oliver.eu>	2020-11-03 10:33:58 +0100
committer	GitHub <noreply@github.com>	2020-11-03 10:33:58 +0100
commit	7166f47b189b7697d2f164841b676eda6b0ebb42 (patch)
tree	af9a6f800e68ca22d67e849bcac9a151234dfc1c
parent	471115dbeefb899ee036d3e769da1f90070664b6 (diff)
parent	5c15c1a0d7c27d34e7d03161c5b27bf923e314cd (diff)
download	hypervideo-pre-7166f47b189b7697d2f164841b676eda6b0ebb42.tar.lz hypervideo-pre-7166f47b189b7697d2f164841b676eda6b0ebb42.tar.xz hypervideo-pre-7166f47b189b7697d2f164841b676eda6b0ebb42.zip