diff options
Diffstat (limited to 'youtube_dlc')
-rw-r--r-- | youtube_dlc/YoutubeDL.py | 8 | ||||
-rw-r--r-- | youtube_dlc/__init__.py | 7 | ||||
-rw-r--r-- | youtube_dlc/extractor/common.py | 334 | ||||
-rw-r--r-- | youtube_dlc/extractor/vimeo.py | 10 | ||||
-rw-r--r-- | youtube_dlc/options.py | 19 |
5 files changed, 297 insertions, 81 deletions
diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py index ef6fe0a78..2e74802ee 100644 --- a/youtube_dlc/YoutubeDL.py +++ b/youtube_dlc/YoutubeDL.py @@ -162,7 +162,9 @@ class YoutubeDL(object): dump_single_json: Force printing the info_dict of the whole playlist (or video) as a single JSON line. simulate: Do not download the video files. - format: Video format code. See options.py for more information. + format: Video format code. see "FORMAT SELECTION" for more details. + format_sort: How to sort the video formats. see "Sorting Formats" for more details. + format_sort_force: Force the given format_sort. see "Sorting Formats" for more details. outtmpl: Template for output names. restrictfilenames: Do not allow "&" and spaces in file names. trim_file_name: Limit length of filename (extension excluded). @@ -2305,8 +2307,8 @@ class YoutubeDL(object): [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)] for f in formats if f.get('preference') is None or f['preference'] >= -1000] - if len(formats) > 1: - table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)' + # if len(formats) > 1: + # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)' header_line = ['format code', 'extension', 'resolution', 'note'] self.to_screen( diff --git a/youtube_dlc/__init__.py b/youtube_dlc/__init__.py index 7d72ab985..40fdd8d74 100644 --- a/youtube_dlc/__init__.py +++ b/youtube_dlc/__init__.py @@ -8,6 +8,7 @@ __license__ = 'Public Domain' import codecs import io import os +import re import random import sys @@ -41,6 +42,7 @@ from .downloader import ( FileDownloader, ) from .extractor import gen_extractors, list_extractors +from .extractor.common import InfoExtractor from .extractor.adobepass import MSO_INFO from .YoutubeDL import YoutubeDL @@ -245,6 +247,9 @@ def _real_main(argv=None): parser.error('Cannot download a video and extract audio into the same' ' file! Use "{0}.%(ext)s" instead of "{0}" as the output' ' template'.format(outtmpl)) + for f in opts.format_sort: + if re.match(InfoExtractor.FormatSort.regex, f) is None: + parser.error('invalid format sort string "%s" specified' % f) any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json any_printing = opts.print_json @@ -347,6 +352,8 @@ def _real_main(argv=None): 'simulate': opts.simulate or any_getting, 'skip_download': opts.skip_download, 'format': opts.format, + 'format_sort': opts.format_sort, + 'format_sort_force': opts.format_sort_force, 'listformats': opts.listformats, 'outtmpl': outtmpl, 'autonumber_size': opts.autonumber_size, diff --git a/youtube_dlc/extractor/common.py b/youtube_dlc/extractor/common.py index aacdf06fe..2d8d74793 100644 --- a/youtube_dlc/extractor/common.py +++ b/youtube_dlc/extractor/common.py @@ -32,6 +32,7 @@ from ..compat import ( compat_urlparse, compat_xml_parse_error, ) +from ..downloader import FileDownloader from ..downloader.f4m import ( get_base_url, remove_encrypted_media, @@ -1354,81 +1355,270 @@ class InfoExtractor(object): html, '%s form' % form_id, group='form') return self._hidden_inputs(form) - def _sort_formats(self, formats, field_preference=None): - if not formats: - raise ExtractorError('No video formats found') - - for f in formats: - # Automatically determine tbr when missing based on abr and vbr (improves - # formats sorting in some cases) - if 'tbr' not in f and f.get('abr') is not None and f.get('vbr') is not None: - f['tbr'] = f['abr'] + f['vbr'] - - def _formats_key(f): - # TODO remove the following workaround - from ..utils import determine_ext - if not f.get('ext') and 'url' in f: - f['ext'] = determine_ext(f['url']) - - if isinstance(field_preference, (list, tuple)): - return tuple( - f.get(field) - if f.get(field) is not None - else ('' if field == 'format_id' else -1) - for field in field_preference) - - preference = f.get('preference') - if preference is None: - preference = 0 - if f.get('ext') in ['f4f', 'f4m']: # Not yet supported - preference -= 0.5 - - protocol = f.get('protocol') or determine_protocol(f) - proto_preference = 0 if protocol in ['http', 'https'] else (-0.5 if protocol == 'rtsp' else -0.1) - - if f.get('vcodec') == 'none': # audio only - preference -= 50 - if self._downloader.params.get('prefer_free_formats'): - ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus'] + class FormatSort: + regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<seperator>[~:])(?P<limit>.*?))?)? *$' + + default = ('hidden', 'has_video', 'has_audio', 'extractor', 'lang', 'quality', + 'tbr', 'filesize', 'vbr', 'height', 'width', 'protocol', 'vext', + 'abr', 'aext', 'fps', 'filesize_approx', 'source_preference', 'format_id') + + settings = { + 'vcodec': {'type': 'ordered', 'regex': True, + 'order': ['av01', 'vp9', '(h265|he?vc?)', '(h264|avc)', 'vp8', '(mp4v|h263)', 'theora', '', None, 'none']}, + 'acodec': {'type': 'ordered', 'regex': True, + 'order': ['opus', 'vorbis', 'aac', 'mp?4a?', 'mp3', 'e?a?c-?3', 'dts', '', None, 'none']}, + 'protocol': {'type': 'ordered', 'regex': True, + 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.+', 'm3u8', '.*dash', '', 'mms|rtsp', 'none', 'f4']}, + 'vext': {'type': 'ordered', 'field': 'video_ext', + 'order': ('mp4', 'flv', 'webm', '', 'none'), # Why is flv prefered over webm??? + 'order_free': ('webm', 'mp4', 'flv', '', 'none')}, + 'aext': {'type': 'ordered', 'field': 'audio_ext', + 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'webm', '', 'none'), + 'order_free': ('opus', 'ogg', 'webm', 'm4a', 'mp3', 'aac', '', 'none')}, + 'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000}, + 'extractor_preference': {'priority': True, 'type': 'extractor'}, + 'has_video': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)}, + 'has_audio': {'priority': True, 'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)}, + 'language_preference': {'priority': True, 'convert': 'ignore'}, + 'quality': {'priority': True, 'convert': 'float_none'}, + 'filesize': {'convert': 'bytes'}, + 'filesize_approx': {'convert': 'bytes'}, + 'format_id': {'convert': 'string'}, + 'height': {'convert': 'float_none'}, + 'width': {'convert': 'float_none'}, + 'fps': {'convert': 'float_none'}, + 'tbr': {'convert': 'float_none'}, + 'vbr': {'convert': 'float_none'}, + 'abr': {'convert': 'float_none'}, + 'asr': {'convert': 'float_none'}, + 'source_preference': {'convert': 'ignore'}, + 'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')}, + 'bitrate': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True}, + 'filesize_estimate': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'filesize_approx')}, + 'extension': {'type': 'combined', 'field': ('vext', 'aext')}, + 'dimension': {'type': 'multiple', 'field': ('height', 'width'), 'function': min}, # not named as 'resolution' because such a field exists + 'res': {'type': 'alias', 'field': 'dimension'}, + 'ext': {'type': 'alias', 'field': 'extension'}, + 'br': {'type': 'alias', 'field': 'bitrate'}, + 'total_bitrate': {'type': 'alias', 'field': 'tbr'}, + 'video_bitrate': {'type': 'alias', 'field': 'vbr'}, + 'audio_bitrate': {'type': 'alias', 'field': 'abr'}, + 'framerate': {'type': 'alias', 'field': 'fps'}, + 'lang': {'type': 'alias', 'field': 'language_preference'}, # not named as 'language' because such a field exists + 'proto': {'type': 'alias', 'field': 'protocol'}, + 'source': {'type': 'alias', 'field': 'source_preference'}, + 'size': {'type': 'alias', 'field': 'filesize_estimate'}, + 'samplerate': {'type': 'alias', 'field': 'asr'}, + 'video_ext': {'type': 'alias', 'field': 'vext'}, + 'audio_ext': {'type': 'alias', 'field': 'aext'}, + 'video_codec': {'type': 'alias', 'field': 'vcodec'}, + 'audio_codec': {'type': 'alias', 'field': 'acodec'}, + 'video': {'type': 'alias', 'field': 'has_video'}, + 'audio': {'type': 'alias', 'field': 'has_audio'}, + 'extractor': {'type': 'alias', 'field': 'extractor_preference'}, + 'preference': {'type': 'alias', 'field': 'extractor_preference'}} + + _order = [] + + def _get_field_setting(self, field, key): + if field not in self.settings: + self.settings[field] = {} + propObj = self.settings[field] + if key not in propObj: + type = propObj.get('type') + if key == 'field': + default = 'preference' if type == 'extractor' else (field,) if type in ('combined', 'multiple') else field + elif key == 'convert': + default = 'order' if type == 'ordered' else 'float_string' if field else 'ignore' else: - ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a'] - ext_preference = 0 - try: - audio_ext_preference = ORDER.index(f['ext']) - except ValueError: - audio_ext_preference = -1 + default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,), 'function': max}.get(key, None) + propObj[key] = default + return propObj[key] + + def _resolve_field_value(self, field, value, convertNone=False): + if value is None: + if not convertNone: + return None + else: + value = value.lower() + conversion = self._get_field_setting(field, 'convert') + if conversion == 'ignore': + return None + if conversion == 'string': + return value + elif conversion == 'float_none': + return float_or_none(value) + elif conversion == 'bytes': + return FileDownloader.parse_bytes(value) + elif conversion == 'order': + order_free = self._get_field_setting(field, 'order_free') + order_list = order_free if order_free and self._use_free_order else self._get_field_setting(field, 'order') + use_regex = self._get_field_setting(field, 'regex') + list_length = len(order_list) + empty_pos = order_list.index('') if '' in order_list else list_length + 1 + if use_regex and value is not None: + for (i, regex) in enumerate(order_list): + if regex and re.match(regex, value): + return list_length - i + return list_length - empty_pos # not in list + else: # not regex or value = None + return list_length - (order_list.index(value) if value in order_list else empty_pos) else: - if f.get('acodec') == 'none': # video only - preference -= 40 - if self._downloader.params.get('prefer_free_formats'): - ORDER = ['flv', 'mp4', 'webm'] + if value.isnumeric(): + return float(value) else: - ORDER = ['webm', 'flv', 'mp4'] - try: - ext_preference = ORDER.index(f['ext']) - except ValueError: - ext_preference = -1 - audio_ext_preference = 0 - - return ( - preference, - f.get('language_preference') if f.get('language_preference') is not None else -1, - f.get('quality') if f.get('quality') is not None else -1, - f.get('tbr') if f.get('tbr') is not None else -1, - f.get('filesize') if f.get('filesize') is not None else -1, - f.get('vbr') if f.get('vbr') is not None else -1, - f.get('height') if f.get('height') is not None else -1, - f.get('width') if f.get('width') is not None else -1, - proto_preference, - ext_preference, - f.get('abr') if f.get('abr') is not None else -1, - audio_ext_preference, - f.get('fps') if f.get('fps') is not None else -1, - f.get('filesize_approx') if f.get('filesize_approx') is not None else -1, - f.get('source_preference') if f.get('source_preference') is not None else -1, - f.get('format_id') if f.get('format_id') is not None else '', - ) - formats.sort(key=_formats_key) + self.settings[field]['convert'] = 'string' + return value + + def evaluate_params(self, params, sort_extractor): + self._use_free_order = params.get('prefer_free_formats', False) + self._sort_user = params.get('format_sort', []) + self._sort_extractor = sort_extractor + + def add_item(field, reverse, closest, limit_text): + field = field.lower() + if field in self._order: + return + self._order.append(field) + limit = self._resolve_field_value(field, limit_text) + data = { + 'reverse': reverse, + 'closest': False if limit is None else closest, + 'limit_text': limit_text, + 'limit': limit} + if field in self.settings: + self.settings[field].update(data) + else: + self.settings[field] = data + + sort_list = ( + tuple(field for field in self.default if self._get_field_setting(field, 'forced')) + + (tuple() if params.get('format_sort_force', False) + else tuple(field for field in self.default if self._get_field_setting(field, 'priority'))) + + tuple(self._sort_user) + tuple(sort_extractor) + self.default) + + for item in sort_list: + match = re.match(self.regex, item) + if match is None: + raise ExtractorError('Invalid format sort string "%s" given by extractor' % item) + field = match.group('field') + if field is None: + continue + if self._get_field_setting(field, 'type') == 'alias': + field = self._get_field_setting(field, 'field') + reverse = match.group('reverse') is not None + closest = match.group('seperator') == '~' + limit_text = match.group('limit') + + has_limit = limit_text is not None + has_multiple_fields = self._get_field_setting(field, 'type') == 'combined' + has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit') + + fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,) + limits = limit_text.split(":") if has_multiple_limits else (limit_text,) if has_limit else tuple() + limit_count = len(limits) + for (i, f) in enumerate(fields): + add_item(f, reverse, closest, + limits[i] if i < limit_count + else limits[0] if has_limit and not has_multiple_limits + else None) + + def print_verbose_info(self, to_screen): + to_screen('[debug] Sort order given by user: %s' % ','.join(self._sort_user)) + if self._sort_extractor: + to_screen('[debug] Sort order given by extractor: %s' % ','.join(self._sort_extractor)) + to_screen('[debug] Formats sorted by: %s' % ', '.join(['%s%s%s' % ( + '+' if self._get_field_setting(field, 'reverse') else '', field, + '%s%s(%s)' % ('~' if self._get_field_setting(field, 'closest') else ':', + self._get_field_setting(field, 'limit_text'), + self._get_field_setting(field, 'limit')) + if self._get_field_setting(field, 'limit_text') is not None else '') + for field in self._order if self._get_field_setting(field, 'visible')])) + + def _calculate_field_preference_from_value(self, format, field, type, value): + reverse = self._get_field_setting(field, 'reverse') + closest = self._get_field_setting(field, 'closest') + limit = self._get_field_setting(field, 'limit') + + if type == 'extractor': + maximum = self._get_field_setting(field, 'max') + if value is None or (maximum is not None and value >= maximum): + value = 0 + elif type == 'boolean': + in_list = self._get_field_setting(field, 'in_list') + not_in_list = self._get_field_setting(field, 'not_in_list') + value = 0 if ((in_list is None or value in in_list) and (not_in_list is None or value not in not_in_list)) else -1 + elif type == 'ordered': + value = self._resolve_field_value(field, value, True) + + # try to convert to number + val_num = float_or_none(value) + is_num = self._get_field_setting(field, 'convert') != 'string' and val_num is not None + if is_num: + value = val_num + + return ((-10, 0) if value is None + else (1, value, 0) if not is_num # if a field has mixed strings and numbers, strings are sorted higher + else (0, -abs(value - limit), value - limit if reverse else limit - value) if closest + else (0, value, 0) if not reverse and (limit is None or value <= limit) + else (0, -value, 0) if limit is None or (reverse and value == limit) or value > limit + else (-1, value, 0)) + + def _calculate_field_preference(self, format, field): + type = self._get_field_setting(field, 'type') # extractor, boolean, ordered, field, multiple + get_value = lambda f: format.get(self._get_field_setting(f, 'field')) + if type == 'multiple': + type = 'field' # Only 'field' is allowed in multiple for now + actual_fields = self._get_field_setting(field, 'field') + + def wrapped_function(values): + values = tuple(filter(lambda x: x is not None, values)) + return (self._get_field_setting(field, 'function')(*values) if len(values) > 1 + else values[0] if values + else None) + + value = wrapped_function((get_value(f) for f in actual_fields)) + else: + value = get_value(field) + return self._calculate_field_preference_from_value(format, field, type, value) + + def calculate_preference(self, format): + # Determine missing protocol + if not format.get('protocol'): + format['protocol'] = determine_protocol(format) + + # Determine missing ext + if not format.get('ext') and 'url' in format: + format['ext'] = determine_ext(format['url']) + if format.get('vcodec') == 'none': + format['audio_ext'] = format['ext'] + format['video_ext'] = 'none' + else: + format['video_ext'] = format['ext'] + format['audio_ext'] = 'none' + # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'): # Not supported? + # format['preference'] = -1000 + + # Determine missing bitrates + if format.get('tbr') is None: + if format.get('vbr') is not None and format.get('abr') is not None: + format['tbr'] = format.get('vbr', 0) + format.get('abr', 0) + else: + if format.get('vcodec') != "none" and format.get('vbr') is None: + format['vbr'] = format.get('tbr') - format.get('abr', 0) + if format.get('acodec') != "none" and format.get('abr') is None: + format['abr'] = format.get('tbr') - format.get('vbr', 0) + + return tuple(self._calculate_field_preference(format, field) for field in self._order) + + def _sort_formats(self, formats, field_preference=[]): + if not formats: + raise ExtractorError('No video formats found') + format_sort = self.FormatSort() # params and to_screen are taken from the downloader + format_sort.evaluate_params(self._downloader.params, field_preference) + if self._downloader.params.get('verbose', False): + format_sort.print_verbose_info(self._downloader.to_screen) + formats.sort(key=lambda f: format_sort.calculate_preference(f)) def _check_formats(self, formats, video_id): if formats: diff --git a/youtube_dlc/extractor/vimeo.py b/youtube_dlc/extractor/vimeo.py index 51a0ab2fa..21f0620be 100644 --- a/youtube_dlc/extractor/vimeo.py +++ b/youtube_dlc/extractor/vimeo.py @@ -181,11 +181,11 @@ class VimeoBaseInfoExtractor(InfoExtractor): 'preference': 1, }) - for f in formats: - if f.get('vcodec') == 'none': - f['preference'] = -50 - elif f.get('acodec') == 'none': - f['preference'] = -40 + # for f in formats: + # if f.get('vcodec') == 'none': + # f['preference'] = -50 + # elif f.get('acodec') == 'none': + # f['preference'] = -40 subtitles = {} text_tracks = config['request'].get('text_tracks') diff --git a/youtube_dlc/options.py b/youtube_dlc/options.py index 9ad8a6ddd..bbec33678 100644 --- a/youtube_dlc/options.py +++ b/youtube_dlc/options.py @@ -397,7 +397,24 @@ def parseOpts(overrideArguments=None): video_format.add_option( '-f', '--format', action='store', dest='format', metavar='FORMAT', default=None, - help='Video format code, see the "FORMAT SELECTION" for all the info') + help='Video format code, see "FORMAT SELECTION" for more details') + video_format.add_option( + '-S', '--format-sort', + dest='format_sort', default=[], + action='callback', callback=_comma_separated_values_options_callback, type='str', + help='Sort the formats by the fields given, see "Sorting Formats" for more details') + video_format.add_option( + '--format-sort-force', '--S-force', + action='store_true', dest='format_sort_force', metavar='FORMAT', default=False, + help=( + 'Force user specified sort order to have precedence over all fields, ' + 'see "Sorting Formats" for more details')) + video_format.add_option( + '--no-format-sort-force', + action='store_false', dest='format_sort_force', metavar='FORMAT', default=False, + help=( + 'Some fields have precedence over the user specified sort order (default), ' + 'see "Sorting Formats" for more details')) video_format.add_option( '--all-formats', action='store_const', dest='format', const='all', |