diff options
Diffstat (limited to 'yt_dlp/postprocessor/metadatafromfield.py')
-rw-r--r-- | yt_dlp/postprocessor/metadatafromfield.py | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/yt_dlp/postprocessor/metadatafromfield.py b/yt_dlp/postprocessor/metadatafromfield.py new file mode 100644 index 000000000..716911b21 --- /dev/null +++ b/yt_dlp/postprocessor/metadatafromfield.py @@ -0,0 +1,71 @@ +from __future__ import unicode_literals + +import re + +from .common import PostProcessor +from ..compat import compat_str +from ..utils import str_or_none + + +class MetadataFromFieldPP(PostProcessor): + regex = r'(?P<field>\w+):(?P<format>.+)$' + + def __init__(self, downloader, formats): + PostProcessor.__init__(self, downloader) + assert isinstance(formats, (list, tuple)) + self._data = [] + for f in formats: + assert isinstance(f, compat_str) + match = re.match(self.regex, f) + assert match is not None + self._data.append({ + 'field': match.group('field'), + 'format': match.group('format'), + 'regex': self.format_to_regex(match.group('format'))}) + + def format_to_regex(self, fmt): + r""" + Converts a string like + '%(title)s - %(artist)s' + to a regex like + '(?P<title>.+)\ \-\ (?P<artist>.+)' + """ + if not re.search(r'%\(\w+\)s', fmt): + return fmt + lastpos = 0 + regex = '' + # replace %(..)s with regex group and escape other string parts + for match in re.finditer(r'%\((\w+)\)s', fmt): + regex += re.escape(fmt[lastpos:match.start()]) + regex += r'(?P<' + match.group(1) + r'>[^\r\n]+)' + lastpos = match.end() + if lastpos < len(fmt): + regex += re.escape(fmt[lastpos:]) + return regex + + def run(self, info): + for dictn in self._data: + field, regex = dictn['field'], dictn['regex'] + if field not in info: + self.report_warning('Video doesnot have a %s' % field) + continue + data_to_parse = str_or_none(info[field]) + if data_to_parse is None: + self.report_warning('Field %s cannot be parsed' % field) + continue + self.write_debug('Searching for r"%s" in %s' % (regex, field)) + match = re.search(regex, data_to_parse) + if match is None: + self.report_warning('Could not interpret video %s as "%s"' % (field, dictn['format'])) + continue + for attribute, value in match.groupdict().items(): + info[attribute] = value + self.to_screen('parsed %s from %s: %s' % (attribute, field, value if value is not None else 'NA')) + return [], info + + +class MetadataFromTitlePP(MetadataFromFieldPP): # for backward compatibility + def __init__(self, downloader, titleformat): + super(MetadataFromTitlePP, self).__init__(downloader, ['title:%s' % titleformat]) + self._titleformat = titleformat + self._titleregex = self._data[0]['regex'] |