aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/postprocessor/metadataparser.py
blob: 01ee6c1fb924485ca2a16a33e979a24b15ad6037 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import re
from enum import Enum

from .common import PostProcessor


class MetadataParserPP(PostProcessor):
    class Actions(Enum):
        INTERPRET = 'interpretter'
        REPLACE = 'replacer'

    def __init__(self, downloader, actions):
        PostProcessor.__init__(self, downloader)
        self._actions = []
        for f in actions:
            action = f[0]
            assert isinstance(action, self.Actions)
            self._actions.append(getattr(self, action.value)(*f[1:]))

    @classmethod
    def validate_action(cls, action, *data):
        ''' Each action can be:
                (Actions.INTERPRET, from, to) OR
                (Actions.REPLACE, field, search, replace)
        '''
        if not isinstance(action, cls.Actions):
            raise ValueError(f'{action!r} is not a valid action')
        getattr(cls, action.value)(cls, *data)  # So this can raise error to validate

    @staticmethod
    def field_to_template(tmpl):
        if re.match(r'[a-zA-Z_]+$', tmpl):
            return f'%({tmpl})s'

        from ..YoutubeDL import YoutubeDL
        err = YoutubeDL.validate_outtmpl(tmpl)
        if err:
            raise err
        return tmpl

    @staticmethod
    def format_to_regex(fmt):
        r"""
        Converts a string like
           '%(title)s - %(artist)s'
        to a regex like
           '(?P<title>.+)\ \-\ (?P<artist>.+)'
        """
        if not re.search(r'%\(\w+\)s', fmt):
            return fmt
        lastpos = 0
        regex = ''
        # replace %(..)s with regex group and escape other string parts
        for match in re.finditer(r'%\((\w+)\)s', fmt):
            regex += re.escape(fmt[lastpos:match.start()])
            regex += rf'(?P<{match.group(1)}>.+)'
            lastpos = match.end()
        if lastpos < len(fmt):
            regex += re.escape(fmt[lastpos:])
        return regex

    def run(self, info):
        for f in self._actions:
            f(info)
        return [], info

    def interpretter(self, inp, out):
        def f(info):
            data_to_parse = self._downloader.evaluate_outtmpl(template, info)
            self.write_debug(f'Searching for {out_re.pattern!r} in {template!r}')
            match = out_re.search(data_to_parse)
            if match is None:
                self.to_screen(f'Could not interpret {inp!r} as {out!r}')
                return
            for attribute, value in match.groupdict().items():
                info[attribute] = value
                self.to_screen('Parsed %s from %r: %r' % (attribute, template, value if value is not None else 'NA'))

        template = self.field_to_template(inp)
        out_re = re.compile(self.format_to_regex(out))
        return f

    def replacer(self, field, search, replace):
        def f(info):
            val = info.get(field)
            if val is None:
                self.to_screen(f'Video does not have a {field}')
                return
            elif not isinstance(val, str):
                self.report_warning(f'Cannot replace in field {field} since it is a {type(val).__name__}')
                return
            self.write_debug(f'Replacing all {search!r} in {field} with {replace!r}')
            info[field], n = search_re.subn(replace, val)
            if n:
                self.to_screen(f'Changed {field} to: {info[field]}')
            else:
                self.to_screen(f'Did not find {search!r} in {field}')

        search_re = re.compile(search)
        return f


class MetadataFromFieldPP(MetadataParserPP):
    @classmethod
    def to_action(cls, f):
        match = re.match(r'(?s)(?P<in>.*?)(?<!\\):(?P<out>.+)$', f)
        if match is None:
            raise ValueError(f'it should be FROM:TO, not {f!r}')
        return (
            cls.Actions.INTERPRET,
            match.group('in').replace('\\:', ':'),
            match.group('out'),
        )

    def __init__(self, downloader, formats):
        super().__init__(downloader, [self.to_action(f) for f in formats])


# Deprecated
class MetadataFromTitlePP(MetadataParserPP):
    def __init__(self, downloader, titleformat):
        super().__init__(downloader, [(self.Actions.INTERPRET, 'title', titleformat)])
        self.deprecation_warning(
            'hypervideo_dl.postprocessor.MetadataFromTitlePP is deprecated '
            'and may be removed in a future version. Use hypervideo_dl.postprocessor.MetadataFromFieldPP instead')