diff options
author | Jesús <heckyel@hyperbola.info> | 2022-12-01 23:33:30 +0800 |
---|---|---|
committer | Jesús <heckyel@hyperbola.info> | 2022-12-01 23:33:30 +0800 |
commit | ef1a420d6de7876b7b6732abc8ab78351c5a2bfc (patch) | |
tree | 9ba7d8409aa5baa696f5fb10db5d395c2f050276 /yt_dlp/webvtt.py | |
parent | 16e8548f6a720a78679e417a20a300db2036bf6c (diff) | |
parent | ddf1e22d48530819d60220d0bdc36e20f5b8483b (diff) | |
download | hypervideo-pre-ef1a420d6de7876b7b6732abc8ab78351c5a2bfc.tar.lz hypervideo-pre-ef1a420d6de7876b7b6732abc8ab78351c5a2bfc.tar.xz hypervideo-pre-ef1a420d6de7876b7b6732abc8ab78351c5a2bfc.zip |
update from upstream 2022-12-01 UTC+8
Diffstat (limited to 'yt_dlp/webvtt.py')
-rw-r--r-- | yt_dlp/webvtt.py | 28 |
1 files changed, 20 insertions, 8 deletions
diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index b8974f883..dd7298277 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -9,8 +9,8 @@ in RFC 8216 §3.5 <https://tools.ietf.org/html/rfc8216#section-3.5>. """ import io +import re -from .compat import re from .utils import int_or_none, timetuple_from_msec @@ -93,7 +93,7 @@ _REGEX_TS = re.compile(r'''(?x) ([0-9]{3})? ''') _REGEX_EOF = re.compile(r'\Z') -_REGEX_NL = re.compile(r'(?:\r\n|[\r\n])') +_REGEX_NL = re.compile(r'(?:\r\n|[\r\n]|$)') _REGEX_BLANK = re.compile(r'(?:\r\n|[\r\n])+') @@ -140,7 +140,6 @@ class HeaderBlock(Block): A WebVTT block that may only appear in the header part of the file, i.e. before any cue blocks. """ - pass @@ -161,6 +160,12 @@ class Magic(HeaderBlock): _REGEX_TSMAP_MPEGTS = re.compile(r'MPEGTS:([0-9]+)') _REGEX_TSMAP_SEP = re.compile(r'[ \t]*,[ \t]*') + # This was removed from the spec in the 2017 revision; + # the last spec draft to describe this syntax element is + # <https://www.w3.org/TR/2015/WD-webvtt1-20151208/#webvtt-metadata-header>. + # Nevertheless, YouTube keeps serving those + _REGEX_META = re.compile(r'(?:(?!-->)[^\r\n])+:(?:(?!-->)[^\r\n])+(?:\r\n|[\r\n])') + @classmethod def __parse_tsmap(cls, parser): parser = parser.child() @@ -200,13 +205,18 @@ class Magic(HeaderBlock): raise ParseError(parser) extra = m.group(1) - local, mpegts = None, None - if parser.consume(cls._REGEX_TSMAP): - local, mpegts = cls.__parse_tsmap(parser) - if not parser.consume(_REGEX_NL): + local, mpegts, meta = None, None, '' + while not parser.consume(_REGEX_NL): + if parser.consume(cls._REGEX_TSMAP): + local, mpegts = cls.__parse_tsmap(parser) + continue + m = parser.consume(cls._REGEX_META) + if m: + meta += m.group(0) + continue raise ParseError(parser) parser.commit() - return cls(extra=extra, mpegts=mpegts, local=local) + return cls(extra=extra, mpegts=mpegts, local=local, meta=meta) def write_into(self, stream): stream.write('WEBVTT') @@ -219,6 +229,8 @@ class Magic(HeaderBlock): stream.write(',MPEGTS:') stream.write(str(self.mpegts if self.mpegts is not None else 0)) stream.write('\n') + if self.meta: + stream.write(self.meta) stream.write('\n') |