aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp
diff options
context:
space:
mode:
authorchris <6024426+iw0nderhow@users.noreply.github.com>2022-01-01 22:07:00 +0100
committerGitHub <noreply@github.com>2022-01-02 02:37:00 +0530
commitf5225737877a78f63b9a6f1de675c95c650f65d6 (patch)
tree6481494f9e2fc6063646be74df7cec4199f327d4 /yt_dlp
parent7592749cbe377675688dfcad5b7c1d46bbb684e1 (diff)
downloadhypervideo-pre-f5225737877a78f63b9a6f1de675c95c650f65d6.tar.lz
hypervideo-pre-f5225737877a78f63b9a6f1de675c95c650f65d6.tar.xz
hypervideo-pre-f5225737877a78f63b9a6f1de675c95c650f65d6.zip
[extractor] Extract chapters from JSON-LD (#2031)
Authored by: iw0nderhow, pukkandan
Diffstat (limited to 'yt_dlp')
-rw-r--r--yt_dlp/extractor/common.py18
1 files changed, 18 insertions, 0 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 1d694293e..79f53c9c2 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1429,6 +1429,23 @@ class InfoExtractor(object):
continue
info[count_key] = interaction_count
+ def extract_chapter_information(e):
+ chapters = [{
+ 'title': part.get('name'),
+ 'start_time': part.get('startOffset'),
+ 'end_time': part.get('endOffset'),
+ } for part in e.get('hasPart', []) if part.get('@type') == 'Clip']
+ for idx, (last_c, current_c, next_c) in enumerate(zip(
+ [{'end_time': 0}] + chapters, chapters, chapters[1:])):
+ current_c['end_time'] = current_c['end_time'] or next_c['start_time']
+ current_c['start_time'] = current_c['start_time'] or last_c['end_time']
+ if None in current_c.values():
+ self.report_warning(f'Chapter {idx} contains broken data. Not extracting chapters')
+ return
+ if chapters:
+ chapters[-1]['end_time'] = chapters[-1]['end_time'] or info['duration']
+ info['chapters'] = chapters
+
def extract_video_object(e):
assert e['@type'] == 'VideoObject'
author = e.get('author')
@@ -1452,6 +1469,7 @@ class InfoExtractor(object):
'view_count': int_or_none(e.get('interactionCount')),
})
extract_interaction_statistic(e)
+ extract_chapter_information(e)
def traverse_json_ld(json_ld, at_top_level=True):
for e in json_ld: