diff options
Diffstat (limited to 'yt_dlp/YoutubeDL.py')
-rw-r--r-- | yt_dlp/YoutubeDL.py | 123 |
1 files changed, 68 insertions, 55 deletions
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index d542d22e6..ed1881da5 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -67,6 +67,7 @@ from .utils import ( float_or_none, format_bytes, format_field, + format_decimal_suffix, formatSeconds, GeoRestrictedError, get_domain, @@ -315,10 +316,10 @@ class YoutubeDL(object): break_per_url: Whether break_on_reject and break_on_existing should act on each input URL as opposed to for the entire queue cookiefile: File name where cookies should be read from and dumped to - cookiesfrombrowser: A tuple containing the name of the browser and the profile - name/path from where cookies are loaded. - Eg: ('chrome', ) or ('vivaldi', 'default') - nocheckcertificate:Do not verify SSL certificates + cookiesfrombrowser: A tuple containing the name of the browser, the profile + name/pathfrom where cookies are loaded, and the name of the + keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT') + nocheckcertificate: Do not verify SSL certificates prefer_insecure: Use HTTP instead of HTTPS to retrieve information. At the moment, this is only supported by YouTube. proxy: URL of the proxy server to use @@ -448,8 +449,8 @@ class YoutubeDL(object): The following parameters are not used by YoutubeDL itself, they are used by the downloader (see yt_dlp/downloader/common.py): nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize, - max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl, - noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size, + max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries, + continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size, external_downloader_args, concurrent_fragment_downloads. The following options are used by the post processors: @@ -1004,7 +1005,7 @@ class YoutubeDL(object): def validate_outtmpl(cls, outtmpl): ''' @return None or Exception object ''' outtmpl = re.sub( - STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'), + STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'), lambda mobj: f'{mobj.group(0)[:-1]}s', cls._outtmpl_expandpath(outtmpl)) try: @@ -1020,8 +1021,12 @@ class YoutubeDL(object): info_dict.pop(key, None) return info_dict - def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): - """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """ + def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): + """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict + @param sanitize Whether to sanitize the output as a filename. + For backward compatibility, a function can also be passed + """ + info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set info_dict = self._copy_infodict(info_dict) @@ -1042,7 +1047,7 @@ class YoutubeDL(object): } TMPL_DICT = {} - EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]')) + EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]')) MATH_FUNCTIONS = { '+': float.__add__, '-': float.__sub__, @@ -1050,7 +1055,7 @@ class YoutubeDL(object): # Field is of the form key1.key2... # where keys (except first) can be string, int or slice FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)') - MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?') + MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?') MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys())) INTERNAL_FORMAT_RE = re.compile(r'''(?x) (?P<negate>-)? @@ -1106,6 +1111,13 @@ class YoutubeDL(object): na = self.params.get('outtmpl_na_placeholder', 'NA') + def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')): + return sanitize_filename(str(value), restricted=restricted, + is_id=re.search(r'(^|[_.])id(\.|$)', key)) + + sanitizer = sanitize if callable(sanitize) else filename_sanitizer + sanitize = bool(sanitize) + def _dumpjson_default(obj): if isinstance(obj, (set, LazyList)): return list(obj) @@ -1116,7 +1128,7 @@ class YoutubeDL(object): return outer_mobj.group(0) key = outer_mobj.group('key') mobj = re.match(INTERNAL_FORMAT_RE, key) - initial_field = mobj.group('fields').split('.')[-1] if mobj else '' + initial_field = mobj.group('fields') if mobj else '' value, replacement, default = None, None, na while mobj: mobj = mobj.groupdict() @@ -1138,7 +1150,7 @@ class YoutubeDL(object): str_fmt = f'{fmt[:-1]}s' if fmt[-1] == 'l': # list delim = '\n' if '#' in flags else ', ' - value, fmt = delim.join(variadic(value)), str_fmt + value, fmt = delim.join(variadic(value, allowed_types=(str, bytes))), str_fmt elif fmt[-1] == 'j': # json value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt elif fmt[-1] == 'q': # quoted @@ -1152,6 +1164,10 @@ class YoutubeDL(object): # "+" = compatibility equivalence, "#" = NFD 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'), value), str_fmt + elif fmt[-1] == 'D': # decimal suffix + value, fmt = format_decimal_suffix(value, f'%{fmt[:-1]}f%s' if fmt[:-1] else '%d%s'), 's' + elif fmt[-1] == 'S': # filename sanitization + value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt elif fmt[-1] == 'c': if value: value = str(value)[0] @@ -1168,7 +1184,7 @@ class YoutubeDL(object): # So we convert it to repr first value, fmt = repr(value), str_fmt if fmt[-1] in 'csr': - value = sanitize(initial_field, value) + value = sanitizer(initial_field, value) key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format')) TMPL_DICT[key] = value @@ -1182,12 +1198,8 @@ class YoutubeDL(object): def _prepare_filename(self, info_dict, tmpl_type='default'): try: - sanitize = lambda k, v: sanitize_filename( - compat_str(v), - restricted=self.params.get('restrictfilenames'), - is_id=(k == 'id' or k.endswith('_id'))) outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])) - filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize) + filename = self.evaluate_outtmpl(outtmpl, info_dict, True) force_ext = OUTTMPL_TYPES.get(tmpl_type) if filename and force_ext is not None: @@ -1335,31 +1347,33 @@ class YoutubeDL(object): def __handle_extraction_exceptions(func): @functools.wraps(func) def wrapper(self, *args, **kwargs): - try: - return func(self, *args, **kwargs) - except GeoRestrictedError as e: - msg = e.msg - if e.countries: - msg += '\nThis video is available in %s.' % ', '.join( - map(ISO3166Utils.short2full, e.countries)) - msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' - self.report_error(msg) - except ExtractorError as e: # An error we somewhat expected - self.report_error(compat_str(e), e.format_traceback()) - except ReExtractInfo as e: - if e.expected: - self.to_screen(f'{e}; Re-extracting data') - else: - self.to_stderr('\r') - self.report_warning(f'{e}; Re-extracting data') - return wrapper(self, *args, **kwargs) - except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError): - raise - except Exception as e: - if self.params.get('ignoreerrors'): - self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc())) - else: + while True: + try: + return func(self, *args, **kwargs) + except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError): raise + except ReExtractInfo as e: + if e.expected: + self.to_screen(f'{e}; Re-extracting data') + else: + self.to_stderr('\r') + self.report_warning(f'{e}; Re-extracting data') + continue + except GeoRestrictedError as e: + msg = e.msg + if e.countries: + msg += '\nThis video is available in %s.' % ', '.join( + map(ISO3166Utils.short2full, e.countries)) + msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' + self.report_error(msg) + except ExtractorError as e: # An error we somewhat expected + self.report_error(str(e), e.format_traceback()) + except Exception as e: + if self.params.get('ignoreerrors'): + self.report_error(str(e), tb=encode_compat_str(traceback.format_exc())) + else: + raise + break return wrapper def _wait_for_video(self, ie_result): @@ -1482,7 +1496,7 @@ class YoutubeDL(object): self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls)) ie_result['additional_entries'] = [ self.extract_info( - url, download, extra_info, + url, download, extra_info=extra_info, force_generic_extractor=self.params.get('force_generic_extractor')) for url in additional_urls ] @@ -2461,10 +2475,7 @@ class YoutubeDL(object): info_dict['id'], automatic_captions, 'automatic captions') self.list_subtitles(info_dict['id'], subtitles, 'subtitles') if self.params.get('listformats') or interactive_format_selection: - if not info_dict.get('formats') and not info_dict.get('url'): - self.to_screen('%s has no formats' % info_dict['id']) - else: - self.list_formats(info_dict) + self.list_formats(info_dict) if list_only: # Without this printing, -F --print-json will not work self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True) @@ -3135,9 +3146,8 @@ class YoutubeDL(object): 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber', } - empty_values = (None, {}, [], set(), tuple()) reject = lambda k, v: k not in keep_keys and ( - k.startswith('_') or k in remove_keys or v in empty_values) + k.startswith('_') or k in remove_keys or v is None) else: reject = lambda k, v: k in remove_keys @@ -3348,6 +3358,11 @@ class YoutubeDL(object): return headers def list_formats(self, info_dict): + if not info_dict.get('formats') and not info_dict.get('url'): + self.to_screen('%s has no formats' % info_dict['id']) + return + self.to_screen('[info] Available formats for %s:' % info_dict['id']) + formats = info_dict.get('formats', [info_dict]) new_format = self.params.get('listformats_table', True) is not False if new_format: @@ -3362,7 +3377,7 @@ class YoutubeDL(object): delim, format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes), format_field(f, 'tbr', '\t%dk'), - shorten_protocol_name(f.get('protocol', '').replace('native', 'n')), + shorten_protocol_name(f.get('protocol', '')), delim, format_field(f, 'vcodec', default='unknown').replace( 'none', @@ -3398,8 +3413,6 @@ class YoutubeDL(object): if f.get('preference') is None or f['preference'] >= -1000] header_line = ['format code', 'extension', 'resolution', 'note'] - self.to_screen( - '[info] Available formats for %s:' % info_dict['id']) self.to_stdout(render_table( header_line, table, extra_gap=(0 if new_format else 1), @@ -3527,11 +3540,11 @@ class YoutubeDL(object): from .downloader.websocket import has_websockets from .postprocessor.embedthumbnail import has_mutagen - from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE + from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE lib_str = join_nonempty( compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0], - KEYRING_AVAILABLE and 'keyring', + SECRETSTORAGE_AVAILABLE and 'secretstorage', has_mutagen and 'mutagen', SQLITE_AVAILABLE and 'sqlite', has_websockets and 'websockets', |