diff options
author | pukkandan <pukkandan.ytdlp@gmail.com> | 2022-05-11 21:24:44 +0530 |
---|---|---|
committer | pukkandan <pukkandan.ytdlp@gmail.com> | 2022-05-13 00:23:26 +0530 |
commit | 82d020804de938bb7e87bd6bbc4961757b892cd0 (patch) | |
tree | 6ef8184da2e2ebe93ea235df8d92b644470e8690 /yt_dlp | |
parent | 7ddbf09c2570d79baed8154e07ff7ee404ccc17c (diff) | |
download | hypervideo-pre-82d020804de938bb7e87bd6bbc4961757b892cd0.tar.lz hypervideo-pre-82d020804de938bb7e87bd6bbc4961757b892cd0.tar.xz hypervideo-pre-82d020804de938bb7e87bd6bbc4961757b892cd0.zip |
[extractor] Use classmethod/property where possible
and refactor lazy extractors accordingly.
This reduces the need to create extractor instances
Diffstat (limited to 'yt_dlp')
-rw-r--r-- | yt_dlp/__init__.py | 16 | ||||
-rw-r--r-- | yt_dlp/extractor/__init__.py | 14 | ||||
-rw-r--r-- | yt_dlp/extractor/common.py | 58 | ||||
-rw-r--r-- | yt_dlp/extractor/drtv.py | 1 | ||||
-rw-r--r-- | yt_dlp/extractor/testurl.py | 43 | ||||
-rw-r--r-- | yt_dlp/extractor/youtube.py | 8 | ||||
-rw-r--r-- | yt_dlp/utils.py | 1 |
7 files changed, 70 insertions, 71 deletions
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 924604631..0a8bf37b6 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -11,7 +11,7 @@ import sys from .compat import compat_getpass, compat_os_name, compat_shlex_quote from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .downloader import FileDownloader -from .extractor import list_extractors +from .extractor import GenericIE, list_extractor_classes from .extractor.adobepass import MSO_INFO from .extractor.common import InfoExtractor from .options import parseOpts @@ -76,14 +76,20 @@ def get_urls(urls, batchfile, verbose): def print_extractor_information(opts, urls): out = '' if opts.list_extractors: - for ie in list_extractors(opts.age_limit): + urls = dict.fromkeys(urls, False) + for ie in list_extractor_classes(opts.age_limit): out += ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n' - out += ''.join(f' {url}\n' for url in filter(ie.suitable, urls)) + if ie == GenericIE: + matched_urls = [url for url, matched in urls.items() if not matched] + else: + matched_urls = tuple(filter(ie.suitable, urls.keys())) + urls.update(dict.fromkeys(matched_urls, True)) + out += ''.join(f' {url}\n' for url in matched_urls) elif opts.list_extractor_descriptions: _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow') out = '\n'.join( ie.description(markdown=False, search_examples=_SEARCHES) - for ie in list_extractors(opts.age_limit) if ie.working() and ie.IE_DESC is not False) + '\n' + for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False) elif opts.ap_list_mso: out = 'Supported TV Providers:\n%s\n' % render_table( ['mso', 'mso name'], @@ -862,7 +868,7 @@ def main(argv=None): sys.exit(f'\nERROR: {e}') -from .extractor import gen_extractors +from .extractor import gen_extractors, list_extractors __all__ = [ 'main', 'YoutubeDL', diff --git a/yt_dlp/extractor/__init__.py b/yt_dlp/extractor/__init__.py index 506ffe87c..afd3d05ac 100644 --- a/yt_dlp/extractor/__init__.py +++ b/yt_dlp/extractor/__init__.py @@ -37,11 +37,17 @@ def gen_extractors(): return [klass() for klass in gen_extractor_classes()] -def list_extractors(age_limit): +def list_extractor_classes(age_limit=None): """Return a list of extractors that are suitable for the given age, sorted by extractor name""" - return sorted(filter( - lambda ie: ie.is_suitable(age_limit), - gen_extractors()), key=lambda ie: ie.IE_NAME.lower()) + yield from sorted(filter( + lambda ie: ie.is_suitable(age_limit) and ie != GenericIE, # noqa: F405 + gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower()) + yield GenericIE # noqa: F405 + + +def list_extractors(age_limit=None): + """Return a list of extractor instances that are suitable for the given age, sorted by extractor name""" + return [ie() for ie in list_extractor_classes(age_limit)] def get_info_extractor(ie_name): diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 23d57ddaf..e2460b36a 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -40,6 +40,7 @@ from ..utils import ( age_restricted, base_url, bug_reports_message, + classproperty, clean_html, determine_ext, determine_protocol, @@ -710,9 +711,9 @@ class InfoExtractor: """A string for getting the InfoExtractor with get_info_extractor""" return cls.__name__[:-2] - @property - def IE_NAME(self): - return type(self).__name__[:-2] + @classproperty + def IE_NAME(cls): + return cls.__name__[:-2] @staticmethod def __can_accept_status_code(err, expected_status): @@ -3624,56 +3625,57 @@ class InfoExtractor: self._set_cookie(domain, cookie, value) break - def get_testcases(self, include_onlymatching=False): - t = getattr(self, '_TEST', None) + @classmethod + def get_testcases(cls, include_onlymatching=False): + t = getattr(cls, '_TEST', None) if t: - assert not hasattr(self, '_TESTS'), \ - '%s has _TEST and _TESTS' % type(self).__name__ + assert not hasattr(cls, '_TESTS'), f'{cls.ie_key()}IE has _TEST and _TESTS' tests = [t] else: - tests = getattr(self, '_TESTS', []) + tests = getattr(cls, '_TESTS', []) for t in tests: if not include_onlymatching and t.get('only_matching', False): continue - t['name'] = type(self).__name__[:-len('IE')] + t['name'] = cls.ie_key() yield t - def is_suitable(self, age_limit): + @classmethod + def is_suitable(cls, age_limit): """ Test whether the extractor is generally suitable for the given age limit (i.e. pornographic sites are not, all others usually are) """ any_restricted = False - for tc in self.get_testcases(include_onlymatching=False): + for tc in cls.get_testcases(include_onlymatching=False): if tc.get('playlist', []): tc = tc['playlist'][0] - is_restricted = age_restricted( - tc.get('info_dict', {}).get('age_limit'), age_limit) + is_restricted = age_restricted(tc.get('info_dict', {}).get('age_limit'), age_limit) if not is_restricted: return True any_restricted = any_restricted or is_restricted return not any_restricted - def description(self, *, markdown=True, search_examples=None): + @classmethod + def description(cls, *, markdown=True, search_examples=None): """Description of the extractor""" desc = '' - if self._NETRC_MACHINE: + if cls._NETRC_MACHINE: if markdown: - desc += f' [<abbr title="netrc machine"><em>{self._NETRC_MACHINE}</em></abbr>]' + desc += f' [<abbr title="netrc machine"><em>{cls._NETRC_MACHINE}</em></abbr>]' else: - desc += f' [{self._NETRC_MACHINE}]' - if self.IE_DESC is False: + desc += f' [{cls._NETRC_MACHINE}]' + if cls.IE_DESC is False: desc += ' [HIDDEN]' - elif self.IE_DESC: - desc += f' {self.IE_DESC}' - if self.SEARCH_KEY: - desc += f'; "{self.SEARCH_KEY}:" prefix' + elif cls.IE_DESC: + desc += f' {cls.IE_DESC}' + if cls.SEARCH_KEY: + desc += f'; "{cls.SEARCH_KEY}:" prefix' if search_examples: _COUNTS = ('', '5', '10', 'all') - desc += f' (Example: "{self.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")' - if not self.working(): + desc += f' (Example: "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")' + if not cls.working(): desc += ' (**Currently broken**)' if markdown else ' (Currently broken)' - name = f' - **{self.IE_NAME}**' if markdown else self.IE_NAME + name = f' - **{cls.IE_NAME}**' if markdown else cls.IE_NAME return f'{name}:{desc}' if desc else name def extract_subtitles(self, *args, **kwargs): @@ -3849,6 +3851,6 @@ class SearchInfoExtractor(InfoExtractor): """Returns an iterator of search results""" raise NotImplementedError('This method must be implemented by subclasses') - @property - def SEARCH_KEY(self): - return self._SEARCH_KEY + @classproperty + def SEARCH_KEY(cls): + return cls._SEARCH_KEY diff --git a/yt_dlp/extractor/drtv.py b/yt_dlp/extractor/drtv.py index 74c40efd9..708b72fae 100644 --- a/yt_dlp/extractor/drtv.py +++ b/yt_dlp/extractor/drtv.py @@ -18,6 +18,7 @@ from ..utils import ( url_or_none, ) + class DRTVIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// diff --git a/yt_dlp/extractor/testurl.py b/yt_dlp/extractor/testurl.py index 140fa4a96..32cae429e 100644 --- a/yt_dlp/extractor/testurl.py +++ b/yt_dlp/extractor/testurl.py @@ -8,55 +8,36 @@ class TestURLIE(InfoExtractor): """ Allows addressing of the test cases as test:yout.*be_1 """ IE_DESC = False # Do not list - _VALID_URL = r'test(?:url)?:(?P<id>(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?)$' + _VALID_URL = r'test(?:url)?:(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?$' def _real_extract(self, url): - from ..extractor import gen_extractors + from ..extractor import gen_extractor_classes - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - extractor_id = mobj.group('extractor') - all_extractors = gen_extractors() + extractor_id, num = self._match_valid_url(url).group('extractor', 'num') rex = re.compile(extractor_id, flags=re.IGNORECASE) - matching_extractors = [ - e for e in all_extractors if rex.search(e.IE_NAME)] + matching_extractors = [e for e in gen_extractor_classes() if rex.search(e.IE_NAME)] if len(matching_extractors) == 0: - raise ExtractorError( - 'No extractors matching %r found' % extractor_id, - expected=True) + raise ExtractorError('No extractors matching {extractor_id!r} found', expected=True) elif len(matching_extractors) > 1: - # Is it obvious which one to pick? - try: + try: # Check for exact match extractor = next( ie for ie in matching_extractors if ie.IE_NAME.lower() == extractor_id.lower()) except StopIteration: raise ExtractorError( - ('Found multiple matching extractors: %s' % - ' '.join(ie.IE_NAME for ie in matching_extractors)), + 'Found multiple matching extractors: %s' % ' '.join(ie.IE_NAME for ie in matching_extractors), expected=True) else: extractor = matching_extractors[0] - num_str = mobj.group('num') - num = int(num_str) if num_str else 0 - - testcases = [] - t = getattr(extractor, '_TEST', None) - if t: - testcases.append(t) - testcases.extend(getattr(extractor, '_TESTS', [])) - + testcases = tuple(extractor.get_testcases(True)) try: - tc = testcases[num] + tc = testcases[int(num or 0)] except IndexError: raise ExtractorError( - ('Test case %d not found, got only %d tests' % - (num, len(testcases))), - expected=True) - - self.to_screen('Test URL: %s' % tc['url']) + f'Test case {num or 0} not found, got only {len(testcases)} tests', expected=True) - return self.url_result(tc['url'], video_id=video_id) + self.to_screen(f'Test URL: {tc["url"]}') + return self.url_result(tc['url']) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 907b079ec..97c0a2f15 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -31,6 +31,7 @@ from ..utils import ( NO_DEFAULT, ExtractorError, bug_reports_message, + classproperty, clean_html, datetime_from_str, dict_get, @@ -5781,16 +5782,17 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor): class YoutubeFeedsInfoExtractor(InfoExtractor): """ Base class for feed extractors - Subclasses must define the _FEED_NAME property. + Subclasses must re-define the _FEED_NAME property. """ _LOGIN_REQUIRED = True + _FEED_NAME = 'feeds' def _real_initialize(self): YoutubeBaseInfoExtractor._check_login_required(self) - @property + @classproperty def IE_NAME(self): - return 'youtube:%s' % self._FEED_NAME + return f'youtube:{self._FEED_NAME}' def _real_extract(self, url): return self.url_result( diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index ba73c2191..82eb30af6 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -5321,6 +5321,7 @@ def merge_headers(*dicts): class classproperty: def __init__(self, f): + functools.update_wrapper(self, f) self.f = f def __get__(self, _, cls): |