diff options
Diffstat (limited to 'hypervideo_dl/__init__.py')
-rw-r--r-- | hypervideo_dl/__init__.py | 170 |
1 files changed, 113 insertions, 57 deletions
diff --git a/hypervideo_dl/__init__.py b/hypervideo_dl/__init__.py index 8ac1c0c..60b012f 100644 --- a/hypervideo_dl/__init__.py +++ b/hypervideo_dl/__init__.py @@ -9,6 +9,7 @@ import optparse import os import re import sys +import traceback from .compat import compat_shlex_quote, workaround_optparse_bug9161 from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS @@ -49,11 +50,11 @@ from .utils import ( read_stdin, render_table, setproctitle, - std_headers, traverse_obj, variadic, write_string, ) +from .utils.networking import std_headers from .YoutubeDL import YoutubeDL @@ -82,17 +83,20 @@ def get_urls(urls, batchfile, verbose): def print_extractor_information(opts, urls): - # Importing GenericIE is currently slow since it imports other extractors - # TODO: Move this back to module level after generalization of embed detection - from .extractor.generic import GenericIE - out = '' if opts.list_extractors: - for ie in list_extractors(opts.age_limit): - write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n', out=sys.stdout) - matchedUrls = [url for url in urls if ie.suitable(url)] - for mu in matchedUrls: - write_string(' ' + mu + '\n', out=sys.stdout) + # Importing GenericIE is currently slow since it imports YoutubeIE + from .extractor.generic import GenericIE + + urls = dict.fromkeys(urls, False) + for ie in list_extractor_classes(opts.age_limit): + out += ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n' + if ie == GenericIE: + matched_urls = [url for url, matched in urls.items() if not matched] + else: + matched_urls = tuple(filter(ie.suitable, urls.keys())) + urls.update(dict.fromkeys(matched_urls, True)) + out += ''.join(f' {url}\n' for url in matched_urls) elif opts.list_extractor_descriptions: for ie in list_extractors(opts.age_limit): if not ie.working(): @@ -180,8 +184,8 @@ def validate_options(opts): raise ValueError(f'{max_name} "{max_val}" must be must be greater than or equal to {min_name} "{min_val}"') # Usernames and passwords - validate(not opts.usenetrc or (opts.username is None and opts.password is None), - '.netrc', msg='using {name} conflicts with giving username/password') + validate(sum(map(bool, (opts.usenetrc, opts.netrc_cmd, opts.username))) <= 1, '.netrc', + msg='{name}, netrc command and username/password are mutually exclusive options') validate(opts.password is None or opts.username is not None, 'account username', msg='{name} missing') validate(opts.ap_password is None or opts.ap_username is not None, 'TV Provider account username', msg='{name} missing') @@ -309,34 +313,60 @@ def validate_options(opts): if outtmpl_default == '': opts.skip_download = None del opts.outtmpl['default'] - if outtmpl_default and not os.path.splitext(outtmpl_default)[1] and opts.extractaudio: - raise ValueError( - 'Cannot download a video and extract audio into the same file! ' - f'Use "{outtmpl_default}.%(ext)s" instead of "{outtmpl_default}" as the output template') - def parse_chapters(name, value): - chapters, ranges = [], [] + def parse_chapters(name, value, advanced=False): + parse_timestamp = lambda x: float('inf') if x in ('inf', 'infinite') else parse_duration(x) + TIMESTAMP_RE = r'''(?x)(?: + (?P<start_sign>-?)(?P<start>[^-]+) + )?\s*-\s*(?: + (?P<end_sign>-?)(?P<end>[^-]+) + )?''' + + chapters, ranges, from_url = [], [], False for regex in value or []: - if regex.startswith('*'): - for range in regex[1:].split(','): - dur = tuple(map(parse_duration, range.strip().split('-'))) - if len(dur) == 2 and all(t is not None for t in dur): - ranges.append(dur) - else: - raise ValueError(f'invalid {name} time range "{regex}". Must be of the form *start-end') + if advanced and regex == '*from-url': + from_url = True + continue + elif not regex.startswith('*'): + try: + chapters.append(re.compile(regex)) + except re.error as err: + raise ValueError(f'invalid {name} regex "{regex}" - {err}') continue - try: - chapters.append(re.compile(regex)) - except re.error as err: - raise ValueError(f'invalid {name} regex "{regex}" - {err}') - return chapters, ranges - opts.remove_chapters, opts.remove_ranges = parse_chapters('--remove-chapters', opts.remove_chapters) - opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges)) + for range_ in map(str.strip, regex[1:].split(',')): + mobj = range_ != '-' and re.fullmatch(TIMESTAMP_RE, range_) + dur = mobj and [parse_timestamp(mobj.group('start') or '0'), parse_timestamp(mobj.group('end') or 'inf')] + signs = mobj and (mobj.group('start_sign'), mobj.group('end_sign')) + + err = None + if None in (dur or [None]): + err = 'Must be of the form "*start-end"' + elif not advanced and any(signs): + err = 'Negative timestamps are not allowed' + else: + dur[0] *= -1 if signs[0] else 1 + dur[1] *= -1 if signs[1] else 1 + if dur[1] == float('-inf'): + err = '"-inf" is not a valid end' + if err: + raise ValueError(f'invalid {name} time range "{regex}". {err}') + ranges.append(dur) + + return chapters, ranges, from_url + + opts.remove_chapters, opts.remove_ranges, _ = parse_chapters('--remove-chapters', opts.remove_chapters) + opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges, True)) # Cookies from browser if opts.cookiesfrombrowser: - mobj = re.match(r'(?P<name>[^+:]+)(\s*\+\s*(?P<keyring>[^:]+))?(\s*:(?P<profile>.+))?', opts.cookiesfrombrowser) + container = None + mobj = re.fullmatch(r'''(?x) + (?P<name>[^+:]+) + (?:\s*\+\s*(?P<keyring>[^:]+))? + (?:\s*:\s*(?!:)(?P<profile>.+?))? + (?:\s*::\s*(?P<container>.+))? + ''', opts.cookiesfrombrowser) if mobj is None: raise ValueError(f'invalid cookies from browser arguments: {opts.cookiesfrombrowser}') browser_name, keyring, profile = mobj.group('name', 'keyring', 'profile') @@ -370,10 +400,12 @@ def validate_options(opts): raise ValueError(f'{cmd} is invalid; {err}') yield action - parse_metadata = opts.parse_metadata or [] if opts.metafromtitle is not None: - parse_metadata.append('title:%s' % opts.metafromtitle) - opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, parse_metadata))) + opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle) + opts.parse_metadata = { + k: list(itertools.chain(*map(metadataparser_actions, v))) + for k, v in opts.parse_metadata.items() + } # Other options if opts.playlist_items is not None: @@ -382,14 +414,19 @@ def validate_options(opts): except Exception as err: raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}') - geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country - if geo_bypass_code is not None: + opts.geo_bypass_country, opts.geo_bypass_ip_block = None, None + if opts.geo_bypass.lower() not in ('default', 'never'): try: - GeoUtils.random_ipv4(geo_bypass_code) + GeoUtils.random_ipv4(opts.geo_bypass) except Exception: - raise ValueError('unsupported geo-bypass country or ip-block') + raise ValueError(f'Unsupported --xff "{opts.geo_bypass}"') + if len(opts.geo_bypass) == 2: + opts.geo_bypass_country = opts.geo_bypass + else: + opts.geo_bypass_ip_block = opts.geo_bypass + opts.geo_bypass = opts.geo_bypass.lower() != 'never' - opts.match_filter = match_filter_func(opts.match_filter) + opts.match_filter = match_filter_func(opts.match_filter, opts.breaking_match_filter) if opts.download_archive is not None: opts.download_archive = expand_path(opts.download_archive) @@ -413,6 +450,10 @@ def validate_options(opts): elif ed and proto == 'default': default_downloader = ed.get_basename() + for policy in opts.color.values(): + if policy not in ('always', 'auto', 'no_color', 'never'): + raise ValueError(f'"{policy}" is not a valid color policy') + warnings, deprecation_warnings = [], [] # Common mistake: -f best @@ -540,11 +581,11 @@ def validate_options(opts): def get_postprocessors(opts): yield from opts.add_postprocessors - if opts.parse_metadata: + for when, actions in opts.parse_metadata.items(): yield { 'key': 'MetadataParser', - 'actions': opts.parse_metadata, - 'when': 'pre_process' + 'actions': actions, + 'when': when } sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove if sponsorblock_query: @@ -677,9 +718,13 @@ def parse_options(argv=None): postprocessors = list(get_postprocessors(opts)) - any_getting = (any(opts.forceprint.values()) or opts.dumpjson or opts.dump_single_json - or opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail - or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration) + print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:]) + any_getting = any(getattr(opts, k) for k in ( + 'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', + 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' + )) + if opts.quiet is None: + opts.quiet = any_getting or opts.print_json or bool(opts.forceprint) any_printing = opts.print_json @@ -692,6 +737,7 @@ def parse_options(argv=None): return parser, opts, urls, { 'usenetrc': opts.usenetrc, 'netrc_location': opts.netrc_location, + 'netrc_cmd': opts.netrc_cmd, 'username': opts.username, 'password': opts.password, 'twofactor': opts.twofactor, @@ -699,7 +745,10 @@ def parse_options(argv=None): 'ap_mso': opts.ap_mso, 'ap_username': opts.ap_username, 'ap_password': opts.ap_password, - 'quiet': (opts.quiet or any_getting or any_printing), + 'client_certificate': opts.client_certificate, + 'client_certificate_key': opts.client_certificate_key, + 'client_certificate_password': opts.client_certificate_password, + 'quiet': opts.quiet, 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, 'forcetitle': opts.gettitle, @@ -810,6 +859,7 @@ def parse_options(argv=None): 'legacyserverconnect': opts.legacy_server_connect, 'nocheckcertificate': opts.no_check_certificate, 'prefer_insecure': opts.prefer_insecure, + 'enable_file_urls': opts.enable_file_urls, 'http_headers': opts.headers, 'proxy': opts.proxy, 'socket_timeout': opts.socket_timeout, @@ -844,7 +894,7 @@ def parse_options(argv=None): 'playlist_items': opts.playlist_items, 'xattr_set_filesize': opts.xattr_set_filesize, 'match_filter': opts.match_filter, - 'no_color': opts.no_color, + 'color': opts.color, 'ffmpeg_location': opts.ffmpeg_location, 'hls_prefer_native': opts.hls_prefer_native, 'hls_use_mpegts': opts.hls_use_mpegts, @@ -890,14 +940,18 @@ def _real_main(argv=None): if opts.rm_cachedir: ydl.cache.remove() - updater = Updater(ydl) - if opts.update_self and updater.update() and actual_use: - if updater.cmd: - return updater.restart() - # This code is reachable only for zip variant in py < 3.10 - # It makes sense to exit here, but the old behavior is to continue - ydl.report_warning('Restart hypervideo to use the updated version') - # return 100, 'ERROR: The program must exit for the update to complete' + try: + updater = Updater(ydl, opts.update_self) + if opts.update_self and updater.update() and actual_use: + if updater.cmd: + return updater.restart() + # This code is reachable only for zip variant in py < 3.10 + # It makes sense to exit here, but the old behavior is to continue + ydl.report_warning('Restart hypervideo to use the updated version') + # return 100, 'ERROR: The program must exit for the update to complete' + except Exception: + traceback.print_exc() + ydl._download_retcode = 100 if not actual_use: if pre_process: @@ -911,6 +965,8 @@ def _real_main(argv=None): parser.destroy() try: if opts.load_info_filename is not None: + if all_urls: + ydl.report_warning('URLs are ignored due to --load-info-json') return ydl.download_with_info_file(expand_path(opts.load_info_filename)) else: return ydl.download(all_urls) |