aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl/extractor/youtube.py
diff options
context:
space:
mode:
authorxarantolus <xarantolus@protonmail.com>2020-06-23 08:56:21 +0200
committerxarantolus <xarantolus@protonmail.com>2020-09-06 09:22:30 +0200
commite03b4f3e056b80b99dd4ab4eed12c7089fb80a43 (patch)
tree345f21fb21fe02fd177073e7a8e30da3dc8f961d /youtube_dl/extractor/youtube.py
parent19f671f88b2f45c833a9fc7f6f2f7d9016eccc86 (diff)
downloadhypervideo-pre-e03b4f3e056b80b99dd4ab4eed12c7089fb80a43.tar.lz
hypervideo-pre-e03b4f3e056b80b99dd4ab4eed12c7089fb80a43.tar.xz
hypervideo-pre-e03b4f3e056b80b99dd4ab4eed12c7089fb80a43.zip
[youtube] Make search extraction less dependent on json schema.
If an object looks like a video (it has a `videoId` key), assume that it is.
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
-rw-r--r--youtube_dl/extractor/youtube.py31
1 files changed, 26 insertions, 5 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 22064616a..be0445962 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -3229,16 +3229,37 @@ class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
'only_matching': True,
}]
+ def _find_videos_in_json(self, extracted):
+ videos = []
+
+ def _real_find(obj):
+ if obj is None or isinstance(obj, str):
+ return
+
+ if type(obj) is list:
+ for elem in obj:
+ _real_find(elem)
+
+ if type(obj) is dict:
+ if "videoId" in obj:
+ videos.append(obj)
+ return
+
+ for _, o in obj.items():
+ _real_find(o)
+
+ _real_find(extracted)
+
+ return videos
+
def extract_videos_from_page_impl(self, page, ids_in_page, titles_in_page):
search_response = self._parse_json(self._search_regex(self._SEARCH_DATA, page, 'ytInitialData'), None)
- result_items = try_get(
- search_response,
- lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'])
+ result_items = self._find_videos_in_json(search_response)
for plobj in result_items:
- video_id = try_get(plobj, lambda x: x['videoRenderer']['videoId'])
- video_title = try_get(plobj, lambda x: x['videoRenderer']['title']['runs'][0]['text'])
+ video_id = try_get(plobj, lambda x: x['videoId'])
+ video_title = try_get(plobj, lambda x: x['title']['runs'][0]['text'])
if video_id is None or video_title is None:
# we do not have a videoRenderer or it is empty