aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/common.py
diff options
context:
space:
mode:
authormushbite <mushbite@users.noreply.github.com>2023-03-04 15:33:17 +0200
committerGitHub <noreply@github.com>2023-03-04 19:03:17 +0530
commit22ccd5420b3eb0782776071f12cccd1fedaa1fd0 (patch)
tree927b86293d9aabb64bf38606bb23410716d718a4 /yt_dlp/extractor/common.py
parent08ff6d59f97b5f5f0128f6bf6fbef56fd836cc52 (diff)
downloadhypervideo-pre-22ccd5420b3eb0782776071f12cccd1fedaa1fd0.tar.lz
hypervideo-pre-22ccd5420b3eb0782776071f12cccd1fedaa1fd0.tar.xz
hypervideo-pre-22ccd5420b3eb0782776071f12cccd1fedaa1fd0.zip
[extractor/rutube] Extract chapters from description (#6345)
Authored by: mushbite
Diffstat (limited to 'yt_dlp/extractor/common.py')
-rw-r--r--yt_dlp/extractor/common.py32
1 files changed, 32 insertions, 0 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 8ad63b411..2091df7fa 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -3649,6 +3649,38 @@ class InfoExtractor:
or urllib.parse.unquote(os.path.splitext(url_basename(url))[0])
or default)
+ def _extract_chapters_helper(self, chapter_list, start_function, title_function, duration, strict=True):
+ if not duration:
+ return
+ chapter_list = [{
+ 'start_time': start_function(chapter),
+ 'title': title_function(chapter),
+ } for chapter in chapter_list or []]
+ if not strict:
+ chapter_list.sort(key=lambda c: c['start_time'] or 0)
+
+ chapters = [{'start_time': 0}]
+ for idx, chapter in enumerate(chapter_list):
+ if chapter['start_time'] is None:
+ self.report_warning(f'Incomplete chapter {idx}')
+ elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
+ chapters.append(chapter)
+ elif chapter not in chapters:
+ self.report_warning(
+ f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
+ return chapters[1:]
+
+ def _extract_chapters_from_description(self, description, duration):
+ duration_re = r'(?:\d+:)?\d{1,2}:\d{2}'
+ sep_re = r'(?m)^\s*(%s)\b\W*\s(%s)\s*$'
+ return self._extract_chapters_helper(
+ re.findall(sep_re % (duration_re, r'.+?'), description or ''),
+ start_function=lambda x: parse_duration(x[0]), title_function=lambda x: x[1],
+ duration=duration, strict=False) or self._extract_chapters_helper(
+ re.findall(sep_re % (r'.+?', duration_re), description or ''),
+ start_function=lambda x: parse_duration(x[1]), title_function=lambda x: x[0],
+ duration=duration, strict=False)
+
@staticmethod
def _availability(is_private=None, needs_premium=None, needs_subscription=None, needs_auth=None, is_unlisted=None):
all_known = all(map(