diff options
author | Jesús <heckyel@hyperbola.info> | 2022-03-30 01:24:15 +0800 |
---|---|---|
committer | Jesús <heckyel@hyperbola.info> | 2022-03-30 01:24:15 +0800 |
commit | 950cc067b8c41ac246deb4725177a372c95d8341 (patch) | |
tree | 66d5284ff86faec8c3000be1e7d0bd856b4c4dbe /yt_dlp/utils.py | |
parent | 7a74bc5d1e54299e51b73492e09c70da994f4b35 (diff) | |
parent | e7870111e83033e0ac728d5a2d565d1eb146c335 (diff) | |
download | hypervideo-pre-950cc067b8c41ac246deb4725177a372c95d8341.tar.lz hypervideo-pre-950cc067b8c41ac246deb4725177a372c95d8341.tar.xz hypervideo-pre-950cc067b8c41ac246deb4725177a372c95d8341.zip |
updated from upstream | 30/03/2022 at 01:24
Diffstat (limited to 'yt_dlp/utils.py')
-rw-r--r-- | yt_dlp/utils.py | 146 |
1 files changed, 92 insertions, 54 deletions
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index da6f27801..62a1800d4 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -85,6 +85,12 @@ from .socks import ( sockssocket, ) +try: + import certifi + has_certifi = True +except ImportError: + has_certifi = False + def register_socks_protocols(): # "Register" SOCKS protocols @@ -153,7 +159,6 @@ if compat_brotli: std_headers = { 'User-Agent': random_user_agent(), 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Encoding': ', '.join(SUPPORTED_ENCODINGS), 'Accept-Language': 'en-us,en;q=0.5', 'Sec-Fetch-Mode': 'navigate', } @@ -700,36 +705,40 @@ def timeconvert(timestr): return timestamp -def sanitize_filename(s, restricted=False, is_id=False): +def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT): """Sanitizes a string so it could be used as part of a filename. - If restricted is set, use a stricter subset of allowed characters. - Set is_id if this is not an arbitrary string, but an ID that should be kept - if possible. + @param restricted Use a stricter subset of allowed characters + @param is_id Whether this is an ID that should be kept unchanged if possible. + If unset, yt-dlp's new sanitization rules are in effect """ + if s == '': + return '' + def replace_insane(char): if restricted and char in ACCENT_CHARS: return ACCENT_CHARS[char] elif not restricted and char == '\n': - return ' ' + return '\0 ' elif char == '?' or ord(char) < 32 or ord(char) == 127: return '' elif char == '"': return '' if restricted else '\'' elif char == ':': - return '_-' if restricted else ' -' + return '\0_\0-' if restricted else '\0 \0-' elif char in '\\/|*<>': - return '_' - if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()): - return '_' - if restricted and ord(char) > 127: - return '_' + return '\0_' + if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127): + return '\0_' return char - if s == '': - return '' - # Handle timestamps - s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) + s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps result = ''.join(map(replace_insane, s)) + if is_id is NO_DEFAULT: + result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result) # Remove repeated substitute chars + STRIP_RE = '(?:\0.|[ _-])*' + result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result) # Remove substitute chars from start/end + result = result.replace('\0', '') or '_' + if not is_id: while '__' in result: result = result.replace('__', '_') @@ -1010,20 +1019,23 @@ def make_HTTPS_handler(params, **kwargs): context.options |= 4 # SSL_OP_LEGACY_SERVER_CONNECT context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE if opts_check_certificate: - try: - context.load_default_certs() - # Work around the issue in load_default_certs when there are bad certificates. See: - # https://github.com/yt-dlp/yt-dlp/issues/1060, - # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312 - except ssl.SSLError: - # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151 - if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'): - # Create a new context to discard any certificates that were already loaded - context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) - context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED - for storename in ('CA', 'ROOT'): - _ssl_load_windows_store_certs(context, storename) - context.set_default_verify_paths() + if has_certifi and 'no-certifi' not in params.get('compat_opts', []): + context.load_verify_locations(cafile=certifi.where()) + else: + try: + context.load_default_certs() + # Work around the issue in load_default_certs when there are bad certificates. See: + # https://github.com/yt-dlp/yt-dlp/issues/1060, + # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312 + except ssl.SSLError: + # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151 + if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'): + # Create a new context to discard any certificates that were already loaded + context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED + for storename in ('CA', 'ROOT'): + _ssl_load_windows_store_certs(context, storename) + context.set_default_verify_paths() return YoutubeDLHTTPSHandler(params, context=context, **kwargs) @@ -1392,6 +1404,9 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): if h.capitalize() not in req.headers: req.add_header(h, v) + if 'Accept-encoding' not in req.headers: + req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS)) + req.headers = handle_youtubedl_headers(req.headers) if sys.version_info < (2, 7) and '#' in req.get_full_url(): @@ -2629,23 +2644,23 @@ def parse_duration(s): m = re.match( r'''(?ix)(?:P? (?: - [0-9]+\s*y(?:ears?)?\s* + [0-9]+\s*y(?:ears?)?,?\s* )? (?: - [0-9]+\s*m(?:onths?)?\s* + [0-9]+\s*m(?:onths?)?,?\s* )? (?: - [0-9]+\s*w(?:eeks?)?\s* + [0-9]+\s*w(?:eeks?)?,?\s* )? (?: - (?P<days>[0-9]+)\s*d(?:ays?)?\s* + (?P<days>[0-9]+)\s*d(?:ays?)?,?\s* )? T)? (?: - (?P<hours>[0-9]+)\s*h(?:ours?)?\s* + (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s* )? (?: - (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s* + (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s* )? (?: (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s* @@ -2698,7 +2713,9 @@ def check_executable(exe, args=[]): return exe -def _get_exe_version_output(exe, args): +def _get_exe_version_output(exe, args, *, to_screen=None): + if to_screen: + to_screen(f'Checking exe version: {shell_quote([exe] + args)}') try: # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers # SIGTTOU if yt-dlp is run in the background. @@ -3090,16 +3107,16 @@ def try_get(src, getter, expected_type=None): return v +def filter_dict(dct, cndn=lambda _, v: v is not None): + return {k: v for k, v in dct.items() if cndn(k, v)} + + def merge_dicts(*dicts): merged = {} for a_dict in dicts: for k, v in a_dict.items(): - if v is None: - continue - if (k not in merged - or (isinstance(v, compat_str) and v - and isinstance(merged[k], compat_str) - and not merged[k])): + if (v is not None and k not in merged + or isinstance(v, str) and merged[k] == ''): merged[k] = v return merged @@ -3534,6 +3551,11 @@ def _match_one(filter_part, dct, incomplete): '=': operator.eq, } + if isinstance(incomplete, bool): + is_incomplete = lambda _: incomplete + else: + is_incomplete = lambda k: k in incomplete + operator_rex = re.compile(r'''(?x)\s* (?P<key>[a-z_]+) \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* @@ -3572,7 +3594,7 @@ def _match_one(filter_part, dct, incomplete): if numeric_comparison is not None and m['op'] in STRING_OPERATORS: raise ValueError('Operator %s only supports string values!' % m['op']) if actual_value is None: - return incomplete or m['none_inclusive'] + return is_incomplete(m['key']) or m['none_inclusive'] return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison) UNARY_OPERATORS = { @@ -3587,7 +3609,7 @@ def _match_one(filter_part, dct, incomplete): if m: op = UNARY_OPERATORS[m.group('op')] actual_value = dct.get(m.group('key')) - if incomplete and actual_value is None: + if is_incomplete(m.group('key')) and actual_value is None: return True return op(actual_value) @@ -3595,24 +3617,29 @@ def _match_one(filter_part, dct, incomplete): def match_str(filter_str, dct, incomplete=False): - """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false - When incomplete, all conditions passes on missing fields + """ Filter a dictionary with a simple string syntax. + @returns Whether the filter passes + @param incomplete Set of keys that is expected to be missing from dct. + Can be True/False to indicate all/none of the keys may be missing. + All conditions on incomplete keys pass if the key is missing """ return all( _match_one(filter_part.replace(r'\&', '&'), dct, incomplete) for filter_part in re.split(r'(?<!\\)&', filter_str)) -def match_filter_func(filter_str): - if filter_str is None: +def match_filter_func(filters): + if not filters: return None + filters = variadic(filters) def _match_func(info_dict, *args, **kwargs): - if match_str(filter_str, info_dict, *args, **kwargs): + if any(match_str(f, info_dict, *args, **kwargs) for f in filters): return None else: - video_title = info_dict.get('title', info_dict.get('id', 'video')) - return '%s does not pass filter %s, skipping ..' % (video_title, filter_str) + video_title = info_dict.get('title') or info_dict.get('id') or 'video' + filter_str = ') | ('.join(map(str.strip, filters)) + return f'{video_title} does not pass filter ({filter_str}), skipping ..' return _match_func @@ -5423,15 +5450,18 @@ class Config: class WebSocketsWrapper(): """Wraps websockets module to use in non-async scopes""" - def __init__(self, url, headers=None): + def __init__(self, url, headers=None, connect=True): self.loop = asyncio.events.new_event_loop() self.conn = compat_websockets.connect( url, extra_headers=headers, ping_interval=None, close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf')) + if connect: + self.__enter__() atexit.register(self.__exit__, None, None, None) def __enter__(self): - self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop) + if not self.pool: + self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop) return self def send(self, *args): @@ -5491,3 +5521,11 @@ has_websockets = bool(compat_websockets) def merge_headers(*dicts): """Merge dicts of http headers case insensitively, prioritizing the latter ones""" return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))} + + +class classproperty: + def __init__(self, f): + self.f = f + + def __get__(self, _, cls): + return self.f(cls) |