3 files changed, 123 insertions, 101 deletions
diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py
index e4b4f5825..6d9b27742 100644
--- a/devscripts/lazy_load_template.py
+++ b/devscripts/lazy_load_template.py
@@ -1,30 +1,28 @@
+import importlib
+import random
 import re
 
-from ..utils import bug_reports_message, write_string
+from ..utils import bug_reports_message, classproperty, write_string
 
 
 class LazyLoadMetaClass(type):
     def __getattr__(cls, name):
-        if '_real_class' not in cls.__dict__:
+        # "is_suitable" requires "_TESTS". However, they bloat the lazy_extractors
+        if '_real_class' not in cls.__dict__ and name not in ('is_suitable', 'get_testcases'):
             write_string(
                 'WARNING: Falling back to normal extractor since lazy extractor '
-                f'{cls.__name__} does not have attribute {name}{bug_reports_message()}')
-        return getattr(cls._get_real_class(), name)
+                f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n')
+        return getattr(cls.real_class, name)
 
 
 class LazyLoadExtractor(metaclass=LazyLoadMetaClass):
-    _module = None
-    _WORKING = True
-
-    @classmethod
-    def _get_real_class(cls):
+    @classproperty
+    def real_class(cls):
         if '_real_class' not in cls.__dict__:
-            mod = __import__(cls._module, fromlist=(cls.__name__,))
-            cls._real_class = getattr(mod, cls.__name__)
+            cls._real_class = getattr(importlib.import_module(cls._module), cls.__name__)
         return cls._real_class
 
     def __new__(cls, *args, **kwargs):
-        real_cls = cls._get_real_class()
-        instance = real_cls.__new__(real_cls)
+        instance = cls.real_class.__new__(cls.real_class)
         instance.__init__(*args, **kwargs)
         return instance
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index 6dc8fed90..8ddc54b9b 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -1,101 +1,125 @@
 #!/usr/bin/env python3
 import os
+import optparse
 import sys
 from inspect import getsource
 
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-lazy_extractors_filename = sys.argv[1] if len(sys.argv) > 1 else 'yt_dlp/extractor/lazy_extractors.py'
-if os.path.exists(lazy_extractors_filename):
-    os.remove(lazy_extractors_filename)
 
-# Block plugins from loading
-plugins_dirname = 'ytdlp_plugins'
-plugins_blocked_dirname = 'ytdlp_plugins_blocked'
-if os.path.exists(plugins_dirname):
-    os.rename(plugins_dirname, plugins_blocked_dirname)
-
-from yt_dlp.extractor import _ALL_CLASSES
-from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
-
-if os.path.exists(plugins_blocked_dirname):
-    os.rename(plugins_blocked_dirname, plugins_dirname)
-
-with open('devscripts/lazy_load_template.py', encoding='utf-8') as f:
-    module_template = f.read()
-
-CLASS_PROPERTIES = ['ie_key', 'working', '_match_valid_url', 'suitable', '_match_id', 'get_temp_id']
-module_contents = [
-    module_template,
-    *[getsource(getattr(InfoExtractor, k)) for k in CLASS_PROPERTIES],
-    '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n']
-
-ie_template = '''
+NO_ATTR = object()
+STATIC_CLASS_PROPERTIES = ['IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_WORKING', '_NETRC_MACHINE']
+CLASS_METHODS = [
+    'ie_key', 'working', 'description', 'suitable', '_match_valid_url', '_match_id', 'get_temp_id',
+]
+IE_TEMPLATE = '''
 class {name}({bases}):
-    _module = '{module}'
+    _module = {module!r}
 '''
-
-
-def get_base_name(base):
-    if base is InfoExtractor:
-        return 'LazyLoadExtractor'
-    elif base is SearchInfoExtractor:
-        return 'LazyLoadSearchExtractor'
-    else:
-        return base.__name__
-
-
-def build_lazy_ie(ie, name):
-    s = ie_template.format(
-        name=name,
-        bases=', '.join(map(get_base_name, ie.__bases__)),
-        module=ie.__module__)
+with open('devscripts/lazy_load_template.py', encoding='utf-8') as f:
+    MODULE_TEMPLATE = f.read()
+
+
+def main():
+    parser = optparse.OptionParser(usage='%prog [OUTFILE.py]')
+    args = parser.parse_args()[1] or ['yt_dlp/extractor/lazy_extractors.py']
+    if len(args) != 1:
+        parser.error('Expected only an output filename')
+
+    lazy_extractors_filename = args[0]
+    if os.path.exists(lazy_extractors_filename):
+        os.remove(lazy_extractors_filename)
+
+    _ALL_CLASSES = get_all_ies()  # Must be before import
+
+    from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
+
+    DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR})
+    module_src = '\n'.join((
+        MODULE_TEMPLATE,
+        '    _module = None',
+        *extra_ie_code(DummyInfoExtractor),
+        '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n',
+        *build_ies(_ALL_CLASSES, (InfoExtractor, SearchInfoExtractor), DummyInfoExtractor),
+    ))
+
+    with open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
+        f.write(f'{module_src}\n')
+
+
+def get_all_ies():
+    PLUGINS_DIRNAME = 'ytdlp_plugins'
+    BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked'
+    if os.path.exists(PLUGINS_DIRNAME):
+        os.rename(PLUGINS_DIRNAME, BLOCKED_DIRNAME)
+    try:
+        from yt_dlp.extractor import _ALL_CLASSES
+    finally:
+        if os.path.exists(BLOCKED_DIRNAME):
+            os.rename(BLOCKED_DIRNAME, PLUGINS_DIRNAME)
+    return _ALL_CLASSES
+
+
+def extra_ie_code(ie, base=None):
+    for var in STATIC_CLASS_PROPERTIES:
+        val = getattr(ie, var)
+        if val != (getattr(base, var) if base else NO_ATTR):
+            yield f'    {var} = {val!r}'
+    yield ''
+
+    for name in CLASS_METHODS:
+        f = getattr(ie, name)
+        if not base or f.__func__ != getattr(base, name).__func__:
+            yield getsource(f)
+
+
+def build_ies(ies, bases, attr_base):
+    names = []
+    for ie in sort_ies(ies, bases):
+        yield build_lazy_ie(ie, ie.__name__, attr_base)
+        if ie in ies:
+            names.append(ie.__name__)
+
+    yield f'\n_ALL_CLASSES = [{", ".join(names)}]'
+
+
+def sort_ies(ies, ignored_bases):
+    """find the correct sorting and add the required base classes so that subclasses can be correctly created"""
+    classes, returned_classes = ies[:-1], set()
+    assert ies[-1].__name__ == 'GenericIE', 'Last IE must be GenericIE'
+    while classes:
+        for c in classes[:]:
+            bases = set(c.__bases__) - {object, *ignored_bases}
+            restart = False
+            for b in bases:
+                if b not in classes and b not in returned_classes:
+                    assert b.__name__ != 'GenericIE', 'Cannot inherit from GenericIE'
+                    classes.insert(0, b)
+                    restart = True
+            if restart:
+                break
+            if bases <= returned_classes:
+                yield c
+                returned_classes.add(c)
+                classes.remove(c)
+                break
+    yield ies[-1]
+
+
+def build_lazy_ie(ie, name, attr_base):
+    bases = ', '.join({
+        'InfoExtractor': 'LazyLoadExtractor',
+        'SearchInfoExtractor': 'LazyLoadSearchExtractor',
+    }.get(base.__name__, base.__name__) for base in ie.__bases__)
+
+    s = IE_TEMPLATE.format(name=name, module=ie.__module__, bases=bases)
     valid_url = getattr(ie, '_VALID_URL', None)
     if not valid_url and hasattr(ie, '_make_valid_url'):
         valid_url = ie._make_valid_url()
     if valid_url:
         s += f'    _VALID_URL = {valid_url!r}\n'
-    if not ie._WORKING:
-        s += '    _WORKING = False\n'
-    if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
-        s += f'\n{getsource(ie.suitable)}'
-    return s
-
-
-# find the correct sorting and add the required base classes so that subclasses
-# can be correctly created
-classes = _ALL_CLASSES[:-1]
-ordered_cls = []
-while classes:
-    for c in classes[:]:
-        bases = set(c.__bases__) - {object, InfoExtractor, SearchInfoExtractor}
-        stop = False
-        for b in bases:
-            if b not in classes and b not in ordered_cls:
-                if b.__name__ == 'GenericIE':
-                    exit()
-                classes.insert(0, b)
-                stop = True
-        if stop:
-            break
-        if all(b in ordered_cls for b in bases):
-            ordered_cls.append(c)
-            classes.remove(c)
-            break
-ordered_cls.append(_ALL_CLASSES[-1])
-
-names = []
-for ie in ordered_cls:
-    name = ie.__name__
-    src = build_lazy_ie(ie, name)
-    module_contents.append(src)
-    if ie in _ALL_CLASSES:
-        names.append(name)
-
-module_contents.append(
-    '\n_ALL_CLASSES = [{}]'.format(', '.join(names)))
-
-module_src = '\n'.join(module_contents) + '\n'
-
-with open(lazy_extractors_filename, 'wt', encoding='utf-8') as f:
-    f.write(module_src)
+    return s + '\n'.join(extra_ie_code(ie, attr_base))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py
index 5531fec4d..d8c53c5e1 100644
--- a/devscripts/make_supportedsites.py
+++ b/devscripts/make_supportedsites.py
@@ -5,7 +5,7 @@ import sys
 
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from yt_dlp.extractor import list_extractors
+from yt_dlp.extractor import list_extractor_classes
 
 
 def main():
@@ -14,7 +14,7 @@ def main():
     if len(args) != 1:
         parser.error('Expected an output filename')
 
-    out = '\n'.join(ie.description() for ie in list_extractors(None) if ie.IE_DESC is not False)
+    out = '\n'.join(ie.description() for ie in list_extractor_classes() if ie.IE_DESC is not False)
 
     with open(args[0], 'w', encoding='utf-8') as outf:
         outf.write(f'# Supported sites\n{out}\n')