diff options
Diffstat (limited to 'yt_dlp/webvtt.py')
-rw-r--r-- | yt_dlp/webvtt.py | 23 |
1 files changed, 18 insertions, 5 deletions
diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index cc2353436..23d67a897 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -161,6 +161,12 @@ class Magic(HeaderBlock): _REGEX_TSMAP_MPEGTS = re.compile(r'MPEGTS:([0-9]+)') _REGEX_TSMAP_SEP = re.compile(r'[ \t]*,[ \t]*') + # This was removed from the spec in the 2017 revision; + # the last spec draft to describe this syntax element is + # <https://www.w3.org/TR/2015/WD-webvtt1-20151208/#webvtt-metadata-header>. + # Nevertheless, YouTube keeps serving those + _REGEX_META = re.compile(r'(?:(?!-->)[^\r\n])+:(?:(?!-->)[^\r\n])+(?:\r\n|[\r\n])') + @classmethod def __parse_tsmap(cls, parser): parser = parser.child() @@ -200,13 +206,18 @@ class Magic(HeaderBlock): raise ParseError(parser) extra = m.group(1) - local, mpegts = None, None - if parser.consume(cls._REGEX_TSMAP): - local, mpegts = cls.__parse_tsmap(parser) - if not parser.consume(_REGEX_NL): + local, mpegts, meta = None, None, '' + while not parser.consume(_REGEX_NL): + if parser.consume(cls._REGEX_TSMAP): + local, mpegts = cls.__parse_tsmap(parser) + continue + m = parser.consume(cls._REGEX_META) + if m: + meta += m.group(0) + continue raise ParseError(parser) parser.commit() - return cls(extra=extra, mpegts=mpegts, local=local) + return cls(extra=extra, mpegts=mpegts, local=local, meta=meta) def write_into(self, stream): stream.write('WEBVTT') @@ -219,6 +230,8 @@ class Magic(HeaderBlock): stream.write(',MPEGTS:') stream.write(str(self.mpegts if self.mpegts is not None else 0)) stream.write('\n') + if self.meta: + stream.write(self.meta) stream.write('\n') |