aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpukkandan <pukkandan.ytdlp@gmail.com>2021-09-08 16:10:10 +0530
committerGitHub <noreply@github.com>2021-09-08 16:10:10 +0530
commit81a136b80f3d29c73884bb116f869df44bfd6fa1 (patch)
treea0498bebd83e3cf157434c77155208a25c67c096
parenteab3f867e246b064ff8cd38460f93623b03b4540 (diff)
downloadhypervideo-pre-81a136b80f3d29c73884bb116f869df44bfd6fa1.tar.lz
hypervideo-pre-81a136b80f3d29c73884bb116f869df44bfd6fa1.tar.xz
hypervideo-pre-81a136b80f3d29c73884bb116f869df44bfd6fa1.zip
[WebVTT] Adjust parser to accommodate PBS subtitles (#922)
Closes #921
-rw-r--r--yt_dlp/webvtt.py9
1 files changed, 7 insertions, 2 deletions
diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py
index eee2a4a2d..cd936e7e5 100644
--- a/yt_dlp/webvtt.py
+++ b/yt_dlp/webvtt.py
@@ -89,8 +89,12 @@ class ParseError(Exception):
))
+# While the specification <https://www.w3.org/TR/webvtt1/#webvtt-timestamp>
+# prescribes that hours must be *2 or more* digits, timestamps with a single
+# digit for the hour part has been seen in the wild.
+# See https://github.com/yt-dlp/yt-dlp/issues/921
_REGEX_TS = re.compile(r'''(?x)
- (?:([0-9]{2,}):)?
+ (?:([0-9]{1,}):)?
([0-9]{2}):
([0-9]{2})\.
([0-9]{3})?
@@ -172,6 +176,7 @@ class Magic(HeaderBlock):
_REGEX_TSMAP = re.compile(r'X-TIMESTAMP-MAP=')
_REGEX_TSMAP_LOCAL = re.compile(r'LOCAL:')
_REGEX_TSMAP_MPEGTS = re.compile(r'MPEGTS:([0-9]+)')
+ _REGEX_TSMAP_SEP = re.compile(r'[ \t]*,[ \t]*')
@classmethod
def __parse_tsmap(cls, parser):
@@ -194,7 +199,7 @@ class Magic(HeaderBlock):
raise ParseError(parser)
else:
raise ParseError(parser)
- if parser.consume(','):
+ if parser.consume(cls._REGEX_TSMAP_SEP):
continue
if parser.consume(_REGEX_NL):
break