aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMax Teegen <870074+max-te@users.noreply.github.com>2021-06-13 16:25:19 +0200
committerpukkandan <pukkandan.ytdlp@gmail.com>2021-08-05 03:37:20 +0530
commit77b87f0519719c4264f400f5627da86c12f48bca (patch)
tree798db56b443b927f806073f1da56b7dd06ac66b6
parent678da2f21b9a9ff0329bc64469f5312f4ef9d921 (diff)
downloadhypervideo-pre-77b87f0519719c4264f400f5627da86c12f48bca.tar.lz
hypervideo-pre-77b87f0519719c4264f400f5627da86c12f48bca.tar.xz
hypervideo-pre-77b87f0519719c4264f400f5627da86c12f48bca.zip
Add all format filtering operators also to `--match-filter`
PR: https://github.com/ytdl-org/youtube-dl/pull/27361 Authored by: max-te
-rw-r--r--README.md34
-rw-r--r--test/test_utils.py12
-rw-r--r--yt_dlp/options.py26
-rw-r--r--yt_dlp/utils.py19
4 files changed, 49 insertions, 42 deletions
diff --git a/README.md b/README.md
index 5a51bdf5f..1967d216c 100644
--- a/README.md
+++ b/README.md
@@ -338,25 +338,21 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
COUNT views
--max-views COUNT Do not download any videos with more than
COUNT views
- --match-filter FILTER Generic video filter. Specify any key (see
- "OUTPUT TEMPLATE" for a list of available
- keys) to match if the key is present, !key
- to check if the key is not present,
- key>NUMBER (like "view_count > 12", also
- works with >=, <, <=, !=, =) to compare
- against a number, key = 'LITERAL' (like
- "uploader = 'Mike Smith'", also works with
- !=) to match against a string literal and &
- to require multiple matches. Values which
- are not known are excluded unless you put a
- question mark (?) after the operator. For
- example, to only match videos that have
- been liked more than 100 times and disliked
- less than 50 times (or the dislike
- functionality is not available at the given
- service), but who also have a description,
- use --match-filter "like_count > 100 &
- dislike_count <? 50 & description"
+ --match-filter FILTER Generic video filter. Any field (see
+ "OUTPUT TEMPLATE") can be compared with a
+ number or a quoted string using the
+ operators defined in "Filtering formats".
+ You can also simply specify a field to
+ match if the field is present and "!field"
+ to check if the field is not present.
+ Multiple filters can be checked using "&".
+ For example, to only match videos that are
+ not live, has a like count more than 100, a
+ dislike count less than 50 (or the dislike
+ field is not available), and also has a
+ description that contains "python", use
+ --match-filter "!is_live & like_count>100 &
+ dislike_count<?50 & description*='python'"
--no-match-filter Do not use generic video filter (default)
--no-playlist Download only the video, if the URL refers
to a video and a playlist
diff --git a/test/test_utils.py b/test/test_utils.py
index f73e7b204..5ac5dedc9 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -1207,7 +1207,6 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
'9999 51')
def test_match_str(self):
- self.assertRaises(ValueError, match_str, 'xy>foobar', {})
self.assertFalse(match_str('xy', {'x': 1200}))
self.assertTrue(match_str('!xy', {'x': 1200}))
self.assertTrue(match_str('x', {'x': 1200}))
@@ -1224,6 +1223,17 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y^=foo', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y!^=foo', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y^=bar', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y!^=bar', {'y': 'foobar42'}))
+ self.assertRaises(ValueError, match_str, 'x^=42', {'x': 42})
+ self.assertTrue(match_str('y*=bar', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y!*=bar', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y*=baz', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y!*=baz', {'y': 'foobar42'}))
+ self.assertTrue(match_str('y$=42', {'y': 'foobar42'}))
+ self.assertFalse(match_str('y$=43', {'y': 'foobar42'}))
self.assertFalse(match_str(
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 90, 'description': 'foo'}))
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index 85dadc7d9..fba231382 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -375,22 +375,16 @@ def parseOpts(overrideArguments=None):
'--match-filter',
metavar='FILTER', dest='match_filter', default=None,
help=(
- 'Generic video filter. '
- 'Specify any key (see "OUTPUT TEMPLATE" for a list of available keys) to '
- 'match if the key is present, '
- '!key to check if the key is not present, '
- 'key>NUMBER (like "view_count > 12", also works with '
- '>=, <, <=, !=, =) to compare against a number, '
- 'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) '
- 'to match against a string literal '
- 'and & to require multiple matches. '
- 'Values which are not known are excluded unless you '
- 'put a question mark (?) after the operator. '
- 'For example, to only match videos that have been liked more than '
- '100 times and disliked less than 50 times (or the dislike '
- 'functionality is not available at the given service), but who '
- 'also have a description, use --match-filter '
- '"like_count > 100 & dislike_count <? 50 & description"'))
+ 'Generic video filter. Any field (see "OUTPUT TEMPLATE") can be compared with a '
+ 'number or a string using the operators defined in "Filtering formats". '
+ 'You can also simply specify a field to match if the field is present '
+ 'and "!field" to check if the field is not present. '
+ 'Multiple filters can be checked using "&". '
+ 'For example, to only match videos that are not live, '
+ 'has a like count more than 100, a dislike count less than 50 '
+ '(or the dislike field is not available), and also has a description '
+ 'that contains "python", use --match-filter "!is_live & '
+ 'like_count>100 & dislike_count<?50 & description*=\'python\'"'))
selection.add_option(
'--no-match-filter',
metavar='FILTER', dest='match_filter', action='store_const', const=None,
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index f24e00b02..d06b18e00 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -4663,17 +4663,20 @@ def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
def _match_one(filter_part, dct):
+ # TODO: Generalize code with YoutubeDL._build_format_filter
COMPARISON_OPERATORS = {
'<': operator.lt,
'<=': operator.le,
'>': operator.gt,
'>=': operator.ge,
'=': operator.eq,
- '!=': operator.ne,
+ '*=': operator.contains,
+ '^=': lambda attr, value: attr.startswith(value),
+ '$=': lambda attr, value: attr.endswith(value),
}
operator_rex = re.compile(r'''(?x)\s*
(?P<key>[a-z_]+)
- \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?:
(?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
(?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
@@ -4683,7 +4686,11 @@ def _match_one(filter_part, dct):
''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
m = operator_rex.search(filter_part)
if m:
- op = COMPARISON_OPERATORS[m.group('op')]
+ unnegated_op = COMPARISON_OPERATORS[m.group('op')]
+ if m.group('negation'):
+ op = lambda attr, value: not unnegated_op(attr, value)
+ else:
+ op = unnegated_op
actual_value = dct.get(m.group('key'))
if (m.group('quotedstrval') is not None
or m.group('strval') is not None
@@ -4693,14 +4700,14 @@ def _match_one(filter_part, dct):
# https://github.com/ytdl-org/youtube-dl/issues/11082).
or actual_value is not None and m.group('intval') is not None
and isinstance(actual_value, compat_str)):
- if m.group('op') not in ('=', '!='):
- raise ValueError(
- 'Operator %s does not support string values!' % m.group('op'))
comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
quote = m.group('quote')
if quote is not None:
comparison_value = comparison_value.replace(r'\%s' % quote, quote)
else:
+ if m.group('op') in ('*=', '^=', '$='):
+ raise ValueError(
+ 'Operator %s only supports string values!' % m.group('op'))
try:
comparison_value = int(m.group('intval'))
except ValueError: