[extractor] Simplify search extractors

author: pukkandan <pukkandan.ytdlp@gmail.com> 2021-10-09 02:09:55 +0530
committer: pukkandan <pukkandan.ytdlp@gmail.com> 2021-10-12 15:21:30 +0530
commit: cc16383ff36b3971064bae8106a45d38dbddc31b (patch)
tree: 86f3b35640bdfd12936707b7065852629009ddad
parent: a903d8285c96b2c7ac7915f228a17e84cbfe3ba4 (diff)
download: hypervideo-pre-cc16383ff36b3971064bae8106a45d38dbddc31b.tar.lz
hypervideo-pre-cc16383ff36b3971064bae8106a45d38dbddc31b.tar.xz
hypervideo-pre-cc16383ff36b3971064bae8106a45d38dbddc31b.zip
6 files changed, 27 insertions, 72 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index af0f01f37..d02a808b6 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import base64
 import datetime
 import hashlib
+import itertools
 import json
 import netrc
 import os
@@ -3617,7 +3618,14 @@ class SearchInfoExtractor(InfoExtractor):
             return self._get_n_results(query, n)
 
     def _get_n_results(self, query, n):
-        """Get a specified number of results for a query"""
+        """Get a specified number of results for a query.
+        Either this function or _search_results must be overridden by subclasses """
+        return self.playlist_result(
+            itertools.islice(self._search_results(query), 0, None if n == float('inf') else n),
+            query, query)
+
+    def _search_results(self, query):
+        """Returns an iterator of search results"""
         raise NotImplementedError('This method must be implemented by subclasses')
 
     @property
diff --git a/yt_dlp/extractor/googlesearch.py b/yt_dlp/extractor/googlesearch.py
index 5279fa807..f605c0c35 100644
--- a/yt_dlp/extractor/googlesearch.py
+++ b/yt_dlp/extractor/googlesearch.py
@@ -11,6 +11,7 @@ class GoogleSearchIE(SearchInfoExtractor):
     _MAX_RESULTS = 1000
     IE_NAME = 'video.google:search'
     _SEARCH_KEY = 'gvsearch'
+    _WORKING = False
     _TEST = {
         'url': 'gvsearch15:python language',
         'info_dict': {
@@ -20,16 +21,7 @@ class GoogleSearchIE(SearchInfoExtractor):
         'playlist_count': 15,
     }
 
-    def _get_n_results(self, query, n):
-        """Get a specified number of results for a query"""
-
-        entries = []
-        res = {
-            '_type': 'playlist',
-            'id': query,
-            'title': query,
-        }
-
+    def _search_results(self, query):
         for pagenum in itertools.count():
             webpage = self._download_webpage(
                 'http://www.google.com/search',
@@ -44,16 +36,8 @@ class GoogleSearchIE(SearchInfoExtractor):
 
             for hit_idx, mobj in enumerate(re.finditer(
                     r'<h3 class="r"><a href="([^"]+)"', webpage)):
+                if re.search(f'id="vidthumb{hit_idx + 1}"', webpage):
+                    yield self.url_result(mobj.group(1))
 
-                # Skip playlists
-                if not re.search(r'id="vidthumb%d"' % (hit_idx + 1), webpage):
-                    continue
-
-                entries.append({
-                    '_type': 'url',
-                    'url': mobj.group(1)
-                })
-
-            if (len(entries) >= n) or not re.search(r'id="pnnext"', webpage):
-                res['entries'] = entries[:n]
-                return res
+            if not re.search(r'id="pnnext"', webpage):
+                return
diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
index f19afa485..76f087057 100644
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -709,11 +709,9 @@ class NicovideoSearchIE(SearchInfoExtractor, NicovideoSearchURLIE):
     _SEARCH_KEY = 'nicosearch'
     _TESTS = []
 
-    def _get_n_results(self, query, n):
-        entries = self._entries(self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query)
-        if n < float('inf'):
-            entries = itertools.islice(entries, 0, n)
-        return self.playlist_result(entries, query, query)
+    def _search_results(self, query):
+        return self._entries(
+            self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query)
 
 
 class NicovideoSearchDateIE(NicovideoSearchIE):
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
index ad3a32a02..e89383ff1 100644
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -880,25 +880,14 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
         })
         next_url = update_url_query(self._API_V2_BASE + endpoint, query)
 
-        collected_results = 0
-
         for i in itertools.count(1):
             response = self._download_json(
-                next_url, collection_id, 'Downloading page {0}'.format(i),
+                next_url, collection_id, f'Downloading page {i}',
                 'Unable to download API page', headers=self._HEADERS)
 
-            collection = response.get('collection', [])
-            if not collection:
-                break
-
-            collection = list(filter(bool, collection))
-            collected_results += len(collection)
-
-            for item in collection:
-                yield self.url_result(item['uri'], SoundcloudIE.ie_key())
-
-            if not collection or collected_results >= limit:
-                break
+            for item in response.get('collection') or []:
+                if item:
+                    yield self.url_result(item['uri'], SoundcloudIE.ie_key())
 
             next_url = response.get('next_href')
             if not next_url:
@@ -906,4 +895,4 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
 
     def _get_n_results(self, query, n):
         tracks = self._get_collection('search/tracks', query, limit=n, q=query)
-        return self.playlist_result(tracks, playlist_title=query)
+        return self.playlist_result(tracks, query, query)
diff --git a/yt_dlp/extractor/yahoo.py b/yt_dlp/extractor/yahoo.py
index 741efefc8..53556de00 100644
--- a/yt_dlp/extractor/yahoo.py
+++ b/yt_dlp/extractor/yahoo.py
@@ -334,31 +334,15 @@ class YahooSearchIE(SearchInfoExtractor):
     IE_NAME = 'screen.yahoo:search'
     _SEARCH_KEY = 'yvsearch'
 
-    def _get_n_results(self, query, n):
-        """Get a specified number of results for a query"""
-        entries = []
+    def _search_results(self, query):
         for pagenum in itertools.count(0):
             result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
             info = self._download_json(result_url, query,
                                        note='Downloading results page ' + str(pagenum + 1))
-            m = info['m']
-            results = info['results']
-
-            for (i, r) in enumerate(results):
-                if (pagenum * 30) + i >= n:
-                    break
-                mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
-                e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
-                entries.append(e)
-            if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)):
+            yield from (self.url_result(result['rurl']) for result in info['results'])
+            if info['m']['last'] >= info['m']['total'] - 1:
                 break
 
-        return {
-            '_type': 'playlist',
-            'id': query,
-            'entries': entries,
-        }
-
 
 class YahooGyaOPlayerIE(InfoExtractor):
     IE_NAME = 'yahoo:gyao:player'
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 97d02dc0b..41fd0aef7 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -4615,11 +4615,10 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
     _SEARCH_PARAMS = None
     _TESTS = []
 
-    def _entries(self, query, n):
+    def _search_results(self, query):
         data = {'query': query}
         if self._SEARCH_PARAMS:
             data['params'] = self._SEARCH_PARAMS
-        total = 0
         continuation = {}
         for page_num in itertools.count(1):
             data.update(continuation)
@@ -4662,17 +4661,10 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
                         continue
 
                     yield self._extract_video(video)
-                    total += 1
-                    if total == n:
-                        return
 
             if not continuation:
                 break
 
-    def _get_n_results(self, query, n):
-        """Get a specified number of results for a query"""
-        return self.playlist_result(self._entries(query, n), query, query)
-
 
 class YoutubeSearchDateIE(YoutubeSearchIE):
     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
author	pukkandan <pukkandan.ytdlp@gmail.com>	2021-10-09 02:09:55 +0530
committer	pukkandan <pukkandan.ytdlp@gmail.com>	2021-10-12 15:21:30 +0530
commit	cc16383ff36b3971064bae8106a45d38dbddc31b (patch)
tree	86f3b35640bdfd12936707b7065852629009ddad
parent	a903d8285c96b2c7ac7915f228a17e84cbfe3ba4 (diff)
download	hypervideo-pre-cc16383ff36b3971064bae8106a45d38dbddc31b.tar.lz hypervideo-pre-cc16383ff36b3971064bae8106a45d38dbddc31b.tar.xz hypervideo-pre-cc16383ff36b3971064bae8106a45d38dbddc31b.zip