aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/postprocessor
diff options
context:
space:
mode:
authorpukkandan <pukkandan.ytdlp@gmail.com>2021-08-10 01:22:55 +0530
committerpukkandan <pukkandan.ytdlp@gmail.com>2021-08-10 01:22:55 +0530
commite9f4ccd19eb92621970b518fb5984b8aef52bdc8 (patch)
tree981232dca595135910e3e7e087c1257e2d46ecb8 /yt_dlp/postprocessor
parenta38bd1defac0fbbac6e7184631234108989bf7d7 (diff)
downloadhypervideo-pre-e9f4ccd19eb92621970b518fb5984b8aef52bdc8.tar.lz
hypervideo-pre-e9f4ccd19eb92621970b518fb5984b8aef52bdc8.tar.xz
hypervideo-pre-e9f4ccd19eb92621970b518fb5984b8aef52bdc8.zip
Add option `--replace-in-metadata`
Diffstat (limited to 'yt_dlp/postprocessor')
-rw-r--r--yt_dlp/postprocessor/__init__.py8
-rw-r--r--yt_dlp/postprocessor/metadatafromfield.py74
-rw-r--r--yt_dlp/postprocessor/metadataparser.py117
3 files changed, 123 insertions, 76 deletions
diff --git a/yt_dlp/postprocessor/__init__.py b/yt_dlp/postprocessor/__init__.py
index 98cbe8665..b1a6917d7 100644
--- a/yt_dlp/postprocessor/__init__.py
+++ b/yt_dlp/postprocessor/__init__.py
@@ -20,8 +20,11 @@ from .ffmpeg import (
)
from .xattrpp import XAttrMetadataPP
from .execafterdownload import ExecAfterDownloadPP
-from .metadatafromfield import MetadataFromFieldPP
-from .metadatafromfield import MetadataFromTitlePP
+from .metadataparser import (
+ MetadataFromFieldPP,
+ MetadataFromTitlePP,
+ MetadataParserPP,
+)
from .movefilesafterdownload import MoveFilesAfterDownloadPP
from .sponskrub import SponSkrubPP
@@ -48,6 +51,7 @@ __all__ = [
'FFmpegThumbnailsConvertorPP',
'FFmpegVideoConvertorPP',
'FFmpegVideoRemuxerPP',
+ 'MetadataParserPP',
'MetadataFromFieldPP',
'MetadataFromTitlePP',
'MoveFilesAfterDownloadPP',
diff --git a/yt_dlp/postprocessor/metadatafromfield.py b/yt_dlp/postprocessor/metadatafromfield.py
deleted file mode 100644
index 002794765..000000000
--- a/yt_dlp/postprocessor/metadatafromfield.py
+++ /dev/null
@@ -1,74 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import PostProcessor
-from ..compat import compat_str
-
-
-class MetadataFromFieldPP(PostProcessor):
- regex = r'(?P<in>.*?)(?<!\\):(?P<out>.+)$'
-
- def __init__(self, downloader, formats):
- PostProcessor.__init__(self, downloader)
- assert isinstance(formats, (list, tuple))
- self._data = []
- for f in formats:
- assert isinstance(f, compat_str)
- match = re.match(self.regex, f)
- assert match is not None
- inp = match.group('in').replace('\\:', ':')
- self._data.append({
- 'in': inp,
- 'out': match.group('out'),
- 'tmpl': self.field_to_template(inp),
- 'regex': self.format_to_regex(match.group('out')),
- })
-
- @staticmethod
- def field_to_template(tmpl):
- if re.match(r'[a-zA-Z_]+$', tmpl):
- return '%%(%s)s' % tmpl
- return tmpl
-
- @staticmethod
- def format_to_regex(fmt):
- r"""
- Converts a string like
- '%(title)s - %(artist)s'
- to a regex like
- '(?P<title>.+)\ \-\ (?P<artist>.+)'
- """
- if not re.search(r'%\(\w+\)s', fmt):
- return fmt
- lastpos = 0
- regex = ''
- # replace %(..)s with regex group and escape other string parts
- for match in re.finditer(r'%\((\w+)\)s', fmt):
- regex += re.escape(fmt[lastpos:match.start()])
- regex += r'(?P<%s>.+)' % match.group(1)
- lastpos = match.end()
- if lastpos < len(fmt):
- regex += re.escape(fmt[lastpos:])
- return regex
-
- def run(self, info):
- for dictn in self._data:
- tmpl, tmpl_dict = self._downloader.prepare_outtmpl(dictn['tmpl'], info)
- data_to_parse = self._downloader.escape_outtmpl(tmpl) % tmpl_dict
- self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], dictn['tmpl']))
- match = re.search(dictn['regex'], data_to_parse)
- if match is None:
- self.report_warning('Could not interpret video %s as "%s"' % (dictn['in'], dictn['out']))
- continue
- for attribute, value in match.groupdict().items():
- info[attribute] = value
- self.to_screen('parsed %s from "%s": %s' % (attribute, dictn['tmpl'], value if value is not None else 'NA'))
- return [], info
-
-
-class MetadataFromTitlePP(MetadataFromFieldPP): # for backward compatibility
- def __init__(self, downloader, titleformat):
- super(MetadataFromTitlePP, self).__init__(downloader, ['%%(title)s:%s' % titleformat])
- self._titleformat = titleformat
- self._titleregex = self._data[0]['regex']
diff --git a/yt_dlp/postprocessor/metadataparser.py b/yt_dlp/postprocessor/metadataparser.py
new file mode 100644
index 000000000..4d3c0e0ed
--- /dev/null
+++ b/yt_dlp/postprocessor/metadataparser.py
@@ -0,0 +1,117 @@
+import re
+
+from enum import Enum
+
+from .common import PostProcessor
+
+
+class MetadataParserPP(PostProcessor):
+ class Actions(Enum):
+ INTERPRET = 'interpretter'
+ REPLACE = 'replacer'
+
+ def __init__(self, downloader, actions):
+ PostProcessor.__init__(self, downloader)
+ self._actions = []
+ for f in actions:
+ action = f[0]
+ assert isinstance(action, self.Actions)
+ self._actions.append(getattr(self, action._value_)(*f[1:]))
+
+ @classmethod
+ def validate_action(cls, action, *data):
+ ''' Each action can be:
+ (Actions.INTERPRET, from, to) OR
+ (Actions.REPLACE, field, search, replace)
+ '''
+ if not isinstance(action, cls.Actions):
+ raise ValueError(f'{action!r} is not a valid action')
+ getattr(cls, action._value_)(cls, *data)
+
+ @staticmethod
+ def field_to_template(tmpl):
+ if re.match(r'[a-zA-Z_]+$', tmpl):
+ return f'%({tmpl})s'
+ return tmpl
+
+ @staticmethod
+ def format_to_regex(fmt):
+ r"""
+ Converts a string like
+ '%(title)s - %(artist)s'
+ to a regex like
+ '(?P<title>.+)\ \-\ (?P<artist>.+)'
+ """
+ if not re.search(r'%\(\w+\)s', fmt):
+ return fmt
+ lastpos = 0
+ regex = ''
+ # replace %(..)s with regex group and escape other string parts
+ for match in re.finditer(r'%\((\w+)\)s', fmt):
+ regex += re.escape(fmt[lastpos:match.start()])
+ regex += rf'(?P<{match.group(1)}>.+)'
+ lastpos = match.end()
+ if lastpos < len(fmt):
+ regex += re.escape(fmt[lastpos:])
+ return regex
+
+ def run(self, info):
+ for f in self._actions:
+ f(info)
+ return [], info
+
+ def interpretter(self, inp, out):
+ def f(info):
+ outtmpl, tmpl_dict = self._downloader.prepare_outtmpl(template, info)
+ data_to_parse = self._downloader.escape_outtmpl(outtmpl) % tmpl_dict
+ self.write_debug(f'Searching for r{out_re.pattern!r} in {template!r}')
+ match = out_re.search(data_to_parse)
+ if match is None:
+ self.report_warning('Could not interpret {inp!r} as {out!r}')
+ return
+ for attribute, value in match.groupdict().items():
+ info[attribute] = value
+ self.to_screen('Parsed %s from %r: %r' % (attribute, template, value if value is not None else 'NA'))
+
+ template = self.field_to_template(inp)
+ out_re = re.compile(self.format_to_regex(out))
+ return f
+
+ def replacer(self, field, search, replace):
+ def f(info):
+ val = info.get(field)
+ if val is None:
+ self.report_warning(f'Video does not have a {field}')
+ return
+ elif not isinstance(val, str):
+ self.report_warning(f'Cannot replace in field {field} since it is a {type(val).__name__}')
+ return
+ self.write_debug(f'Replacing all r{search!r} in {field} with {replace!r}')
+ info[field], n = search_re.subn(replace, val)
+ if n:
+ self.to_screen(f'Changed {field} to: {info[field]}')
+ else:
+ self.to_screen(f'Did not find r{search!r} in {field}')
+
+ search_re = re.compile(search)
+ return f
+
+
+class MetadataFromFieldPP(MetadataParserPP):
+ @classmethod
+ def to_action(cls, f):
+ match = re.match(r'(?P<in>.*?)(?<!\\):(?P<out>.+)$', f)
+ if match is None:
+ raise ValueError(f'it should be FROM:TO, not {f!r}')
+ return (
+ cls.Actions.INTERPRET,
+ match.group('in').replace('\\:', ':'),
+ match.group('out'))
+
+ def __init__(self, downloader, formats):
+ MetadataParserPP.__init__(self, downloader, [self.to_action(f) for f in formats])
+
+
+class MetadataFromTitlePP(MetadataParserPP): # for backward compatibility
+ def __init__(self, downloader, titleformat):
+ MetadataParserPP.__init__(self, downloader, [(self.Actions.INTERPRET, 'title', titleformat)])