diff options
author | mushbite <mushbite@users.noreply.github.com> | 2023-03-04 15:33:17 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-03-04 19:03:17 +0530 |
commit | 22ccd5420b3eb0782776071f12cccd1fedaa1fd0 (patch) | |
tree | 927b86293d9aabb64bf38606bb23410716d718a4 /yt_dlp/extractor/common.py | |
parent | 08ff6d59f97b5f5f0128f6bf6fbef56fd836cc52 (diff) | |
download | hypervideo-pre-22ccd5420b3eb0782776071f12cccd1fedaa1fd0.tar.lz hypervideo-pre-22ccd5420b3eb0782776071f12cccd1fedaa1fd0.tar.xz hypervideo-pre-22ccd5420b3eb0782776071f12cccd1fedaa1fd0.zip |
[extractor/rutube] Extract chapters from description (#6345)
Authored by: mushbite
Diffstat (limited to 'yt_dlp/extractor/common.py')
-rw-r--r-- | yt_dlp/extractor/common.py | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 8ad63b411..2091df7fa 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3649,6 +3649,38 @@ class InfoExtractor: or urllib.parse.unquote(os.path.splitext(url_basename(url))[0]) or default) + def _extract_chapters_helper(self, chapter_list, start_function, title_function, duration, strict=True): + if not duration: + return + chapter_list = [{ + 'start_time': start_function(chapter), + 'title': title_function(chapter), + } for chapter in chapter_list or []] + if not strict: + chapter_list.sort(key=lambda c: c['start_time'] or 0) + + chapters = [{'start_time': 0}] + for idx, chapter in enumerate(chapter_list): + if chapter['start_time'] is None: + self.report_warning(f'Incomplete chapter {idx}') + elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration: + chapters.append(chapter) + elif chapter not in chapters: + self.report_warning( + f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"') + return chapters[1:] + + def _extract_chapters_from_description(self, description, duration): + duration_re = r'(?:\d+:)?\d{1,2}:\d{2}' + sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$' + return self._extract_chapters_helper( + re.findall(sep_re % (duration_re, r'.+?'), description or ''), + start_function=lambda x: parse_duration(x[0]), title_function=lambda x: x[1], + duration=duration, strict=False) or self._extract_chapters_helper( + re.findall(sep_re % (r'.+?', duration_re), description or ''), + start_function=lambda x: parse_duration(x[1]), title_function=lambda x: x[0], + duration=duration, strict=False) + @staticmethod def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None): all_known = all(map( |