aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpukkandan <pukkandan.ytdlp@gmail.com>2022-08-24 05:42:16 +0530
committerpukkandan <pukkandan.ytdlp@gmail.com>2022-08-24 07:47:51 +0530
commitfe7866d0ed6bfa3904ce12b049a3424fdc0ea1fa (patch)
treece07f269160c2cf3c971ee83e82f19feceffcfeb
parent5314b521925498356e78652fe59866116d56e1d1 (diff)
downloadhypervideo-pre-fe7866d0ed6bfa3904ce12b049a3424fdc0ea1fa.tar.lz
hypervideo-pre-fe7866d0ed6bfa3904ce12b049a3424fdc0ea1fa.tar.xz
hypervideo-pre-fe7866d0ed6bfa3904ce12b049a3424fdc0ea1fa.zip
Add option `--use-extractors`
Deprecates `--force-generic-extractor` Closes #3234, Closes #2044 Related: #4307, #1791
-rw-r--r--README.md9
-rw-r--r--yt_dlp/YoutubeDL.py41
-rw-r--r--yt_dlp/__init__.py1
-rw-r--r--yt_dlp/extractor/common.py13
-rw-r--r--yt_dlp/options.py12
5 files changed, 58 insertions, 18 deletions
diff --git a/README.md b/README.md
index 7cfeec4f1..aab20c079 100644
--- a/README.md
+++ b/README.md
@@ -375,7 +375,13 @@ You can also fork the project on github and run your fork's [build workflow](.gi
--list-extractors List all supported extractors and exit
--extractor-descriptions Output descriptions of all supported
extractors and exit
- --force-generic-extractor Force extraction to use the generic extractor
+ --use-extractors, --ies NAMES Extractor names to use separated by commas.
+ You can also use regexes, "all", "default"
+ and "end" (end URL matching); e.g. --ies
+ "holodex.*,end,youtube". Prefix the name
+ with a "-" to exclude it, e.g. --ies
+ default,-generic. Use --list-extractors for
+ a list of available extractor names
--default-search PREFIX Use this prefix for unqualified URLs. E.g.
"gvsearch2:python" downloads two videos from
google videos for the search term "python".
@@ -2058,6 +2064,7 @@ While these options are redundant, they are still expected to be used due to the
#### Not recommended
While these options still work, their use is not recommended since there are other alternatives to achieve the same
+ --force-generic-extractor --ies generic,default
--exec-before-download CMD --exec "before_dl:CMD"
--no-exec-before-download --no-exec
--all-formats -f all
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 872e0bdc3..a3d562042 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -29,6 +29,7 @@ from .cookies import load_cookies
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
from .downloader.rtmp import rtmpdump_version
from .extractor import gen_extractor_classes, get_info_extractor
+from .extractor.common import UnsupportedURLIE
from .extractor.openload import PhantomJSwrapper
from .minicurses import format_text
from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
@@ -237,7 +238,7 @@ class YoutubeDL:
Default is 'only_download' for CLI, but False for API
skip_playlist_after_errors: Number of allowed failures until the rest of
the playlist is skipped
- force_generic_extractor: Force downloader to use the generic extractor
+ allowed_extractors: List of regexes to match against extractor names that are allowed
overwrites: Overwrite all video and metadata files if True,
overwrite only non-video files if None
and don't overwrite any file if False
@@ -477,6 +478,8 @@ class YoutubeDL:
The following options are deprecated and may be removed in the future:
+ force_generic_extractor: Force downloader to use the generic extractor
+ - Use allowed_extractors = ['generic', 'default']
playliststart: - Use playlist_items
Playlist item to start at.
playlistend: - Use playlist_items
@@ -758,13 +761,6 @@ class YoutubeDL:
self._ies_instances[ie_key] = ie
ie.set_downloader(self)
- def _get_info_extractor_class(self, ie_key):
- ie = self._ies.get(ie_key)
- if ie is None:
- ie = get_info_extractor(ie_key)
- self.add_info_extractor(ie)
- return ie
-
def get_info_extractor(self, ie_key):
"""
Get an instance of an IE with name ie_key, it will try to get one from
@@ -781,8 +777,19 @@ class YoutubeDL:
"""
Add the InfoExtractors returned by gen_extractors to the end of the list
"""
- for ie in gen_extractor_classes():
- self.add_info_extractor(ie)
+ all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
+ all_ies['end'] = UnsupportedURLIE()
+ try:
+ ie_names = orderedSet_from_options(
+ self.params.get('allowed_extractors', ['default']), {
+ 'all': list(all_ies),
+ 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
+ }, use_regex=True)
+ except re.error as e:
+ raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
+ for name in ie_names:
+ self.add_info_extractor(all_ies[name])
+ self.write_debug(f'Loaded {len(ie_names)} extractors')
def add_post_processor(self, pp, when='post_process'):
"""Add a PostProcessor object to the end of the chain."""
@@ -1413,11 +1420,11 @@ class YoutubeDL:
ie_key = 'Generic'
if ie_key:
- ies = {ie_key: self._get_info_extractor_class(ie_key)}
+ ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
else:
ies = self._ies
- for ie_key, ie in ies.items():
+ for key, ie in ies.items():
if not ie.suitable(url):
continue
@@ -1426,14 +1433,16 @@ class YoutubeDL:
'and will probably not work.')
temp_id = ie.get_temp_id(url)
- if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
- self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
+ if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
+ self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
if self.params.get('break_on_existing', False):
raise ExistingVideoReached()
break
- return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
+ return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
else:
- self.report_error('no suitable InfoExtractor for URL %s' % url)
+ extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
+ self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
+ tb=False if extractors_restricted else None)
def _handle_extraction_exceptions(func):
@functools.wraps(func)
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index 317dd2623..e9234e6f4 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -766,6 +766,7 @@ def parse_options(argv=None):
'windowsfilenames': opts.windowsfilenames,
'ignoreerrors': opts.ignoreerrors,
'force_generic_extractor': opts.force_generic_extractor,
+ 'allowed_extractors': opts.allowed_extractors or ['default'],
'ratelimit': opts.ratelimit,
'throttledratelimit': opts.throttledratelimit,
'overwrites': opts.overwrites,
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index a534703e5..6337a13a4 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -480,6 +480,9 @@ class InfoExtractor:
will be used by geo restriction bypass mechanism similarly
to _GEO_COUNTRIES.
+ The _ENABLED attribute should be set to False for IEs that
+ are disabled by default and must be explicitly enabled.
+
The _WORKING attribute should be set to False for broken IEs
in order to warn the users and skip the tests.
"""
@@ -491,6 +494,7 @@ class InfoExtractor:
_GEO_COUNTRIES = None
_GEO_IP_BLOCKS = None
_WORKING = True
+ _ENABLED = True
_NETRC_MACHINE = None
IE_DESC = None
SEARCH_KEY = None
@@ -3941,3 +3945,12 @@ class SearchInfoExtractor(InfoExtractor):
@classproperty
def SEARCH_KEY(cls):
return cls._SEARCH_KEY
+
+
+class UnsupportedURLIE(InfoExtractor):
+ _VALID_URL = '.*'
+ _ENABLED = False
+ IE_DESC = False
+
+ def _real_extract(self, url):
+ raise UnsupportedError(url)
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index 0cddb7fd5..bee531d1b 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -354,9 +354,19 @@ def create_parser():
action='store_true', dest='list_extractor_descriptions', default=False,
help='Output descriptions of all supported extractors and exit')
general.add_option(
+ '--use-extractors', '--ies',
+ action='callback', dest='allowed_extractors', metavar='NAMES', type='str',
+ default=[], callback=_list_from_options_callback,
+ help=(
+ 'Extractor names to use separated by commas. '
+ 'You can also use regexes, "all", "default" and "end" (end URL matching); '
+ 'e.g. --ies "holodex.*,end,youtube". '
+ 'Prefix the name with a "-" to exclude it, e.g. --ies default,-generic. '
+ 'Use --list-extractors for a list of available extractor names'))
+ general.add_option(
'--force-generic-extractor',
action='store_true', dest='force_generic_extractor', default=False,
- help='Force extraction to use the generic extractor')
+ help=optparse.SUPPRESS_HELP)
general.add_option(
'--default-search',
dest='default_search', metavar='PREFIX',