diff options
author | pukkandan <pukkandan.ytdlp@gmail.com> | 2022-11-05 15:34:53 +0530 |
---|---|---|
committer | pukkandan <pukkandan.ytdlp@gmail.com> | 2022-11-05 15:34:53 +0530 |
commit | 2e30b46fe4a04e82d1ec1a21f8d387e5f96405be (patch) | |
tree | 1b4c9902ea15492c19e752f2ecdefd4fe7bd4b26 | |
parent | 68a9a450d432f67dc8c2531f053a5fd41b5f341a (diff) | |
download | hypervideo-pre-2e30b46fe4a04e82d1ec1a21f8d387e5f96405be.tar.lz hypervideo-pre-2e30b46fe4a04e82d1ec1a21f8d387e5f96405be.tar.xz hypervideo-pre-2e30b46fe4a04e82d1ec1a21f8d387e5f96405be.zip |
[extractor/youtube] Improve chapter parsing from description
Closes #5448
-rw-r--r-- | yt_dlp/extractor/youtube.py | 7 |
1 files changed, 6 insertions, 1 deletions
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 77a8b93f3..555c94f97 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3027,9 +3027,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): for contents in content_list)), []) def _extract_chapters_from_description(self, description, duration): + duration_re = r'(?:\d+:)?\d{1,2}:\d{2}' + sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$' return self._extract_chapters( - re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''), + re.findall(sep_re % (duration_re, r'.+?'), description or ''), chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1], + duration=duration, strict=False) or self._extract_chapters( + re.findall(sep_re % (r'.+?', duration_re), description or ''), + chapter_time=lambda x: parse_duration(x[1]), chapter_title=lambda x: x[0], duration=duration, strict=False) def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True): |