diff options
author | pukkandan <pukkandan.ytdlp@gmail.com> | 2021-10-09 02:09:55 +0530 |
---|---|---|
committer | pukkandan <pukkandan.ytdlp@gmail.com> | 2021-10-12 15:21:30 +0530 |
commit | cc16383ff36b3971064bae8106a45d38dbddc31b (patch) | |
tree | 86f3b35640bdfd12936707b7065852629009ddad | |
parent | a903d8285c96b2c7ac7915f228a17e84cbfe3ba4 (diff) | |
download | hypervideo-pre-cc16383ff36b3971064bae8106a45d38dbddc31b.tar.lz hypervideo-pre-cc16383ff36b3971064bae8106a45d38dbddc31b.tar.xz hypervideo-pre-cc16383ff36b3971064bae8106a45d38dbddc31b.zip |
[extractor] Simplify search extractors
-rw-r--r-- | yt_dlp/extractor/common.py | 10 | ||||
-rw-r--r-- | yt_dlp/extractor/googlesearch.py | 28 | ||||
-rw-r--r-- | yt_dlp/extractor/niconico.py | 8 | ||||
-rw-r--r-- | yt_dlp/extractor/soundcloud.py | 21 | ||||
-rw-r--r-- | yt_dlp/extractor/yahoo.py | 22 | ||||
-rw-r--r-- | yt_dlp/extractor/youtube.py | 10 |
6 files changed, 27 insertions, 72 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index af0f01f37..d02a808b6 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import base64 import datetime import hashlib +import itertools import json import netrc import os @@ -3617,7 +3618,14 @@ class SearchInfoExtractor(InfoExtractor): return self._get_n_results(query, n) def _get_n_results(self, query, n): - """Get a specified number of results for a query""" + """Get a specified number of results for a query. + Either this function or _search_results must be overridden by subclasses """ + return self.playlist_result( + itertools.islice(self._search_results(query), 0, None if n == float('inf') else n), + query, query) + + def _search_results(self, query): + """Returns an iterator of search results""" raise NotImplementedError('This method must be implemented by subclasses') @property diff --git a/yt_dlp/extractor/googlesearch.py b/yt_dlp/extractor/googlesearch.py index 5279fa807..f605c0c35 100644 --- a/yt_dlp/extractor/googlesearch.py +++ b/yt_dlp/extractor/googlesearch.py @@ -11,6 +11,7 @@ class GoogleSearchIE(SearchInfoExtractor): _MAX_RESULTS = 1000 IE_NAME = 'video.google:search' _SEARCH_KEY = 'gvsearch' + _WORKING = False _TEST = { 'url': 'gvsearch15:python language', 'info_dict': { @@ -20,16 +21,7 @@ class GoogleSearchIE(SearchInfoExtractor): 'playlist_count': 15, } - def _get_n_results(self, query, n): - """Get a specified number of results for a query""" - - entries = [] - res = { - '_type': 'playlist', - 'id': query, - 'title': query, - } - + def _search_results(self, query): for pagenum in itertools.count(): webpage = self._download_webpage( 'http://www.google.com/search', @@ -44,16 +36,8 @@ class GoogleSearchIE(SearchInfoExtractor): for hit_idx, mobj in enumerate(re.finditer( r'<h3 class="r"><a href="([^"]+)"', webpage)): + if re.search(f'id="vidthumb{hit_idx + 1}"', webpage): + yield self.url_result(mobj.group(1)) - # Skip playlists - if not re.search(r'id="vidthumb%d"' % (hit_idx + 1), webpage): - continue - - entries.append({ - '_type': 'url', - 'url': mobj.group(1) - }) - - if (len(entries) >= n) or not re.search(r'id="pnnext"', webpage): - res['entries'] = entries[:n] - return res + if not re.search(r'id="pnnext"', webpage): + return diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index f19afa485..76f087057 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -709,11 +709,9 @@ class NicovideoSearchIE(SearchInfoExtractor, NicovideoSearchURLIE): _SEARCH_KEY = 'nicosearch' _TESTS = [] - def _get_n_results(self, query, n): - entries = self._entries(self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query) - if n < float('inf'): - entries = itertools.islice(entries, 0, n) - return self.playlist_result(entries, query, query) + def _search_results(self, query): + return self._entries( + self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query) class NicovideoSearchDateIE(NicovideoSearchIE): diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index ad3a32a02..e89383ff1 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -880,25 +880,14 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): }) next_url = update_url_query(self._API_V2_BASE + endpoint, query) - collected_results = 0 - for i in itertools.count(1): response = self._download_json( - next_url, collection_id, 'Downloading page {0}'.format(i), + next_url, collection_id, f'Downloading page {i}', 'Unable to download API page', headers=self._HEADERS) - collection = response.get('collection', []) - if not collection: - break - - collection = list(filter(bool, collection)) - collected_results += len(collection) - - for item in collection: - yield self.url_result(item['uri'], SoundcloudIE.ie_key()) - - if not collection or collected_results >= limit: - break + for item in response.get('collection') or []: + if item: + yield self.url_result(item['uri'], SoundcloudIE.ie_key()) next_url = response.get('next_href') if not next_url: @@ -906,4 +895,4 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): def _get_n_results(self, query, n): tracks = self._get_collection('search/tracks', query, limit=n, q=query) - return self.playlist_result(tracks, playlist_title=query) + return self.playlist_result(tracks, query, query) diff --git a/yt_dlp/extractor/yahoo.py b/yt_dlp/extractor/yahoo.py index 741efefc8..53556de00 100644 --- a/yt_dlp/extractor/yahoo.py +++ b/yt_dlp/extractor/yahoo.py @@ -334,31 +334,15 @@ class YahooSearchIE(SearchInfoExtractor): IE_NAME = 'screen.yahoo:search' _SEARCH_KEY = 'yvsearch' - def _get_n_results(self, query, n): - """Get a specified number of results for a query""" - entries = [] + def _search_results(self, query): for pagenum in itertools.count(0): result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30) info = self._download_json(result_url, query, note='Downloading results page ' + str(pagenum + 1)) - m = info['m'] - results = info['results'] - - for (i, r) in enumerate(results): - if (pagenum * 30) + i >= n: - break - mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r) - e = self.url_result('http://' + mobj.group('url'), 'Yahoo') - entries.append(e) - if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)): + yield from (self.url_result(result['rurl']) for result in info['results']) + if info['m']['last'] >= info['m']['total'] - 1: break - return { - '_type': 'playlist', - 'id': query, - 'entries': entries, - } - class YahooGyaOPlayerIE(InfoExtractor): IE_NAME = 'yahoo:gyao:player' diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 97d02dc0b..41fd0aef7 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4615,11 +4615,10 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE): _SEARCH_PARAMS = None _TESTS = [] - def _entries(self, query, n): + def _search_results(self, query): data = {'query': query} if self._SEARCH_PARAMS: data['params'] = self._SEARCH_PARAMS - total = 0 continuation = {} for page_num in itertools.count(1): data.update(continuation) @@ -4662,17 +4661,10 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE): continue yield self._extract_video(video) - total += 1 - if total == n: - return if not continuation: break - def _get_n_results(self, query, n): - """Get a specified number of results for a query""" - return self.playlist_result(self._entries(query, n), query, query) - class YoutubeSearchDateIE(YoutubeSearchIE): IE_NAME = YoutubeSearchIE.IE_NAME + ':date' |