diff options
author | pukkandan <pukkandan.ytdlp@gmail.com> | 2021-09-08 16:10:10 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-09-08 16:10:10 +0530 |
commit | 81a136b80f3d29c73884bb116f869df44bfd6fa1 (patch) | |
tree | a0498bebd83e3cf157434c77155208a25c67c096 | |
parent | eab3f867e246b064ff8cd38460f93623b03b4540 (diff) | |
download | hypervideo-pre-81a136b80f3d29c73884bb116f869df44bfd6fa1.tar.lz hypervideo-pre-81a136b80f3d29c73884bb116f869df44bfd6fa1.tar.xz hypervideo-pre-81a136b80f3d29c73884bb116f869df44bfd6fa1.zip |
[WebVTT] Adjust parser to accommodate PBS subtitles (#922)
Closes #921
-rw-r--r-- | yt_dlp/webvtt.py | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index eee2a4a2d..cd936e7e5 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -89,8 +89,12 @@ class ParseError(Exception): )) +# While the specification <https://www.w3.org/TR/webvtt1/#webvtt-timestamp> +# prescribes that hours must be *2 or more* digits, timestamps with a single +# digit for the hour part has been seen in the wild. +# See https://github.com/yt-dlp/yt-dlp/issues/921 _REGEX_TS = re.compile(r'''(?x) - (?:([0-9]{2,}):)? + (?:([0-9]{1,}):)? ([0-9]{2}): ([0-9]{2})\. ([0-9]{3})? @@ -172,6 +176,7 @@ class Magic(HeaderBlock): _REGEX_TSMAP = re.compile(r'X-TIMESTAMP-MAP=') _REGEX_TSMAP_LOCAL = re.compile(r'LOCAL:') _REGEX_TSMAP_MPEGTS = re.compile(r'MPEGTS:([0-9]+)') + _REGEX_TSMAP_SEP = re.compile(r'[ \t]*,[ \t]*') @classmethod def __parse_tsmap(cls, parser): @@ -194,7 +199,7 @@ class Magic(HeaderBlock): raise ParseError(parser) else: raise ParseError(parser) - if parser.consume(','): + if parser.consume(cls._REGEX_TSMAP_SEP): continue if parser.consume(_REGEX_NL): break |