diff options
| -rw-r--r-- | README.md | 4 | ||||
| -rw-r--r-- | test/test_utils.py | 12 | ||||
| -rw-r--r-- | yt_dlp/YoutubeDL.py | 21 | ||||
| -rw-r--r-- | yt_dlp/options.py | 8 | ||||
| -rw-r--r-- | yt_dlp/utils.py | 19 | 
5 files changed, 40 insertions, 24 deletions
| @@ -1439,6 +1439,10 @@ While these options are redundant, they are still expected to be used due to the      -e, --get-title                  --print title      -g, --get-url                    --print urls      -j, --dump-json                  --print "%()j" +    --match-title REGEX              --match-filter "title ~= (?i)REGEX" +    --reject-title REGEX             --match-filter "title !~= (?i)REGEX" +    --min-views COUNT                --match-filter "view_count >=? COUNT" +    --max-views COUNT                --match-filter "view_count <=? COUNT"  #### Not recommended diff --git a/test/test_utils.py b/test/test_utils.py index aef59e491..dedc598f7 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1285,9 +1285,15 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')          self.assertTrue(match_str(r'x="foo \& bar" & x^=foo', {'x': 'foo & bar'}))          # Example from docs -        self.assertTrue( -            r'!is_live & like_count>?100 & description~=\'(?i)\bcats \& dogs\b\'', -            {'description': 'Raining Cats & Dogs'}) +        self.assertTrue(match_str( +            r"!is_live & like_count>?100 & description~='(?i)\bcats \& dogs\b'", +            {'description': 'Raining Cats & Dogs'})) + +        # Incomplete +        self.assertFalse(match_str('id!=foo', {'id': 'foo'}, True)) +        self.assertTrue(match_str('x', {'id': 'foo'}, True)) +        self.assertTrue(match_str('!x', {'id': 'foo'}, True)) +        self.assertFalse(match_str('x', {'id': 'foo'}, False))      def test_parse_dfxp_time_expr(self):          self.assertEqual(parse_dfxp_time_expr(None), None) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index d12131acd..eef3f8b4c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1117,12 +1117,15 @@ class YoutubeDL(object):              if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):                  return 'Skipping "%s" because it is age restricted' % video_title -            if not incomplete: -                match_filter = self.params.get('match_filter') -                if match_filter is not None: -                    ret = match_filter(info_dict) -                    if ret is not None: -                        return ret +            match_filter = self.params.get('match_filter') +            if match_filter is not None: +                try: +                    ret = match_filter(info_dict, incomplete=incomplete) +                except TypeError: +                    # For backward compatibility +                    ret = None if incomplete else match_filter(info_dict) +                if ret is not None: +                    return ret              return None          if self.in_download_archive(info_dict): @@ -2873,13 +2876,13 @@ class YoutubeDL(object):              except UnavailableVideoError:                  self.report_error('unable to download video')              except MaxDownloadsReached: -                self.to_screen('[info] Maximum number of downloaded files reached') +                self.to_screen('[info] Maximum number of downloads reached')                  raise              except ExistingVideoReached: -                self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing') +                self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')                  raise              except RejectedVideoReached: -                self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject') +                self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')                  raise              else:                  if self.params.get('dump_single_json', False): diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 1499991a1..ef821eb11 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -356,11 +356,11 @@ def parseOpts(overrideArguments=None):      selection.add_option(          '--match-title',          dest='matchtitle', metavar='REGEX', -        help='Download only matching titles (regex or caseless sub-string)') +        help=optparse.SUPPRESS_HELP)      selection.add_option(          '--reject-title',          dest='rejecttitle', metavar='REGEX', -        help='Skip download for matching titles (regex or caseless sub-string)') +        help=optparse.SUPPRESS_HELP)      selection.add_option(          '--max-downloads',          dest='max_downloads', metavar='NUMBER', type=int, default=None, @@ -395,11 +395,11 @@ def parseOpts(overrideArguments=None):      selection.add_option(          '--min-views',          metavar='COUNT', dest='min_views', default=None, type=int, -        help='Do not download any videos with less than COUNT views') +        help=optparse.SUPPRESS_HELP)      selection.add_option(          '--max-views',          metavar='COUNT', dest='max_views', default=None, type=int, -        help='Do not download any videos with more than COUNT views') +        help=optparse.SUPPRESS_HELP)      selection.add_option(          '--match-filter',          metavar='FILTER', dest='match_filter', default=None, diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 0e8392fdf..6276ac726 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4657,7 +4657,7 @@ def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):      return '\n'.join(format_str % tuple(row) for row in table) -def _match_one(filter_part, dct): +def _match_one(filter_part, dct, incomplete):      # TODO: Generalize code with YoutubeDL._build_format_filter      STRING_OPERATORS = {          '*=': operator.contains, @@ -4718,7 +4718,7 @@ def _match_one(filter_part, dct):                          'Invalid integer value %r in filter part %r' % (                              m.group('intval'), filter_part))          if actual_value is None: -            return m.group('none_inclusive') +            return incomplete or m.group('none_inclusive')          return op(actual_value, comparison_value)      UNARY_OPERATORS = { @@ -4733,22 +4733,25 @@ def _match_one(filter_part, dct):      if m:          op = UNARY_OPERATORS[m.group('op')]          actual_value = dct.get(m.group('key')) +        if incomplete and actual_value is None: +            return True          return op(actual_value)      raise ValueError('Invalid filter part %r' % filter_part) -def match_str(filter_str, dct): -    """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """ - +def match_str(filter_str, dct, incomplete=False): +    """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false +        When incomplete, all conditions passes on missing fields +    """      return all( -        _match_one(filter_part.replace(r'\&', '&'), dct) +        _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)          for filter_part in re.split(r'(?<!\\)&', filter_str))  def match_filter_func(filter_str): -    def _match_func(info_dict): -        if match_str(filter_str, info_dict): +    def _match_func(info_dict, *args, **kwargs): +        if match_str(filter_str, info_dict, *args, **kwargs):              return None          else:              video_title = info_dict.get('title', info_dict.get('id', 'video')) | 
