diff options
author | Max Teegen <870074+max-te@users.noreply.github.com> | 2021-06-13 16:25:19 +0200 |
---|---|---|
committer | pukkandan <pukkandan.ytdlp@gmail.com> | 2021-08-05 03:37:20 +0530 |
commit | 77b87f0519719c4264f400f5627da86c12f48bca (patch) | |
tree | 798db56b443b927f806073f1da56b7dd06ac66b6 | |
parent | 678da2f21b9a9ff0329bc64469f5312f4ef9d921 (diff) | |
download | hypervideo-pre-77b87f0519719c4264f400f5627da86c12f48bca.tar.lz hypervideo-pre-77b87f0519719c4264f400f5627da86c12f48bca.tar.xz hypervideo-pre-77b87f0519719c4264f400f5627da86c12f48bca.zip |
Add all format filtering operators also to `--match-filter`
PR: https://github.com/ytdl-org/youtube-dl/pull/27361
Authored by: max-te
-rw-r--r-- | README.md | 34 | ||||
-rw-r--r-- | test/test_utils.py | 12 | ||||
-rw-r--r-- | yt_dlp/options.py | 26 | ||||
-rw-r--r-- | yt_dlp/utils.py | 19 |
4 files changed, 49 insertions, 42 deletions
@@ -338,25 +338,21 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t COUNT views --max-views COUNT Do not download any videos with more than COUNT views - --match-filter FILTER Generic video filter. Specify any key (see - "OUTPUT TEMPLATE" for a list of available - keys) to match if the key is present, !key - to check if the key is not present, - key>NUMBER (like "view_count > 12", also - works with >=, <, <=, !=, =) to compare - against a number, key = 'LITERAL' (like - "uploader = 'Mike Smith'", also works with - !=) to match against a string literal and & - to require multiple matches. Values which - are not known are excluded unless you put a - question mark (?) after the operator. For - example, to only match videos that have - been liked more than 100 times and disliked - less than 50 times (or the dislike - functionality is not available at the given - service), but who also have a description, - use --match-filter "like_count > 100 & - dislike_count <? 50 & description" + --match-filter FILTER Generic video filter. Any field (see + "OUTPUT TEMPLATE") can be compared with a + number or a quoted string using the + operators defined in "Filtering formats". + You can also simply specify a field to + match if the field is present and "!field" + to check if the field is not present. + Multiple filters can be checked using "&". + For example, to only match videos that are + not live, has a like count more than 100, a + dislike count less than 50 (or the dislike + field is not available), and also has a + description that contains "python", use + --match-filter "!is_live & like_count>100 & + dislike_count<?50 & description*='python'" --no-match-filter Do not use generic video filter (default) --no-playlist Download only the video, if the URL refers to a video and a playlist diff --git a/test/test_utils.py b/test/test_utils.py index f73e7b204..5ac5dedc9 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1207,7 +1207,6 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') '9999 51') def test_match_str(self): - self.assertRaises(ValueError, match_str, 'xy>foobar', {}) self.assertFalse(match_str('xy', {'x': 1200})) self.assertTrue(match_str('!xy', {'x': 1200})) self.assertTrue(match_str('x', {'x': 1200})) @@ -1224,6 +1223,17 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'})) self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'})) self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'})) + self.assertTrue(match_str('y^=foo', {'y': 'foobar42'})) + self.assertFalse(match_str('y!^=foo', {'y': 'foobar42'})) + self.assertFalse(match_str('y^=bar', {'y': 'foobar42'})) + self.assertTrue(match_str('y!^=bar', {'y': 'foobar42'})) + self.assertRaises(ValueError, match_str, 'x^=42', {'x': 42}) + self.assertTrue(match_str('y*=bar', {'y': 'foobar42'})) + self.assertFalse(match_str('y!*=bar', {'y': 'foobar42'})) + self.assertFalse(match_str('y*=baz', {'y': 'foobar42'})) + self.assertTrue(match_str('y!*=baz', {'y': 'foobar42'})) + self.assertTrue(match_str('y$=42', {'y': 'foobar42'})) + self.assertFalse(match_str('y$=43', {'y': 'foobar42'})) self.assertFalse(match_str( 'like_count > 100 & dislike_count <? 50 & description', {'like_count': 90, 'description': 'foo'})) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 85dadc7d9..fba231382 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -375,22 +375,16 @@ def parseOpts(overrideArguments=None): '--match-filter', metavar='FILTER', dest='match_filter', default=None, help=( - 'Generic video filter. ' - 'Specify any key (see "OUTPUT TEMPLATE" for a list of available keys) to ' - 'match if the key is present, ' - '!key to check if the key is not present, ' - 'key>NUMBER (like "view_count > 12", also works with ' - '>=, <, <=, !=, =) to compare against a number, ' - 'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) ' - 'to match against a string literal ' - 'and & to require multiple matches. ' - 'Values which are not known are excluded unless you ' - 'put a question mark (?) after the operator. ' - 'For example, to only match videos that have been liked more than ' - '100 times and disliked less than 50 times (or the dislike ' - 'functionality is not available at the given service), but who ' - 'also have a description, use --match-filter ' - '"like_count > 100 & dislike_count <? 50 & description"')) + 'Generic video filter. Any field (see "OUTPUT TEMPLATE") can be compared with a ' + 'number or a string using the operators defined in "Filtering formats". ' + 'You can also simply specify a field to match if the field is present ' + 'and "!field" to check if the field is not present. ' + 'Multiple filters can be checked using "&". ' + 'For example, to only match videos that are not live, ' + 'has a like count more than 100, a dislike count less than 50 ' + '(or the dislike field is not available), and also has a description ' + 'that contains "python", use --match-filter "!is_live & ' + 'like_count>100 & dislike_count<?50 & description*=\'python\'"')) selection.add_option( '--no-match-filter', metavar='FILTER', dest='match_filter', action='store_const', const=None, diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index f24e00b02..d06b18e00 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4663,17 +4663,20 @@ def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False): def _match_one(filter_part, dct): + # TODO: Generalize code with YoutubeDL._build_format_filter COMPARISON_OPERATORS = { '<': operator.lt, '<=': operator.le, '>': operator.gt, '>=': operator.ge, '=': operator.eq, - '!=': operator.ne, + '*=': operator.contains, + '^=': lambda attr, value: attr.startswith(value), + '$=': lambda attr, value: attr.endswith(value), } operator_rex = re.compile(r'''(?x)\s* (?P<key>[a-z_]+) - \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* + \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* (?: (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)| @@ -4683,7 +4686,11 @@ def _match_one(filter_part, dct): ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) m = operator_rex.search(filter_part) if m: - op = COMPARISON_OPERATORS[m.group('op')] + unnegated_op = COMPARISON_OPERATORS[m.group('op')] + if m.group('negation'): + op = lambda attr, value: not unnegated_op(attr, value) + else: + op = unnegated_op actual_value = dct.get(m.group('key')) if (m.group('quotedstrval') is not None or m.group('strval') is not None @@ -4693,14 +4700,14 @@ def _match_one(filter_part, dct): # https://github.com/ytdl-org/youtube-dl/issues/11082). or actual_value is not None and m.group('intval') is not None and isinstance(actual_value, compat_str)): - if m.group('op') not in ('=', '!='): - raise ValueError( - 'Operator %s does not support string values!' % m.group('op')) comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval') quote = m.group('quote') if quote is not None: comparison_value = comparison_value.replace(r'\%s' % quote, quote) else: + if m.group('op') in ('*=', '^=', '$='): + raise ValueError( + 'Operator %s only supports string values!' % m.group('op')) try: comparison_value = int(m.group('intval')) except ValueError: |