diff options
Diffstat (limited to 'yt_dlp/extractor/youtube.py')
-rw-r--r-- | yt_dlp/extractor/youtube.py | 42 |
1 files changed, 5 insertions, 37 deletions
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index b02e0153a..b8bb980f3 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3205,11 +3205,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters' ), expected_type=list) - return self._extract_chapters( + return self._extract_chapters_helper( chapter_list, - chapter_time=lambda chapter: float_or_none( + start_function=lambda chapter: float_or_none( traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000), - chapter_title=lambda chapter: traverse_obj( + title_function=lambda chapter: traverse_obj( chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str), duration=duration) @@ -3222,42 +3222,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): chapter_title = lambda chapter: self._get_text(chapter, 'title') return next(filter(None, ( - self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')), - chapter_time, chapter_title, duration) + self._extract_chapters_helper(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')), + chapter_time, chapter_title, duration) for contents in content_list)), []) - def _extract_chapters_from_description(self, description, duration): - duration_re = r'(?:\d+:)?\d{1,2}:\d{2}' - sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$' - return self._extract_chapters( - re.findall(sep_re % (duration_re, r'.+?'), description or ''), - chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1], - duration=duration, strict=False) or self._extract_chapters( - re.findall(sep_re % (r'.+?', duration_re), description or ''), - chapter_time=lambda x: parse_duration(x[1]), chapter_title=lambda x: x[0], - duration=duration, strict=False) - - def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True): - if not duration: - return - chapter_list = [{ - 'start_time': chapter_time(chapter), - 'title': chapter_title(chapter), - } for chapter in chapter_list or []] - if not strict: - chapter_list.sort(key=lambda c: c['start_time'] or 0) - - chapters = [{'start_time': 0}] - for idx, chapter in enumerate(chapter_list): - if chapter['start_time'] is None: - self.report_warning(f'Incomplete chapter {idx}') - elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration: - chapters.append(chapter) - elif chapter not in chapters: - self.report_warning( - f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"') - return chapters[1:] - def _extract_comment(self, comment_renderer, parent=None): comment_id = comment_renderer.get('commentId') if not comment_id: |