From d5c3254889208a75d57c74868a7e7ce62be6b636 Mon Sep 17 00:00:00 2001 From: Zenon Mousmoulas Date: Thu, 16 Dec 2021 22:46:30 +0200 Subject: [extractor] Support default implicit graph in JSON-LD (#1983) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Original PR: https://github.com/ytdl-org/youtube-dl/pull/30229 Per W3C JSON-LD v1.1 ยง4.9 (non-normative ref): When a JSON-LD document's top-level structure is a map that contains no other keys than @graph and optionally @context (properties that are not mapped to an IRI or a keyword are ignored), @graph is considered to express the otherwise implicit default graph. Authored by: zmousm --- yt_dlp/extractor/common.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'yt_dlp/extractor/common.py') diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index ebf2e3cea..52099b4b4 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1451,8 +1451,13 @@ class InfoExtractor(object): }) extract_interaction_statistic(e) - for e in json_ld: - if '@context' in e: + def traverse_json_ld(json_ld, at_top_level=True): + for e in json_ld: + if at_top_level and '@context' not in e: + continue + if at_top_level and set(e.keys()) == {'@context', '@graph'}: + traverse_json_ld(variadic(e['@graph'], allowed_types=(dict,)), at_top_level=False) + break item_type = e.get('@type') if expected_type is not None and expected_type != item_type: continue @@ -1488,7 +1493,7 @@ class InfoExtractor(object): info.update({ 'timestamp': parse_iso8601(e.get('datePublished')), 'title': unescapeHTML(e.get('headline')), - 'description': unescapeHTML(e.get('articleBody')), + 'description': unescapeHTML(e.get('articleBody') or e.get('description')), }) elif item_type == 'VideoObject': extract_video_object(e) @@ -1503,6 +1508,8 @@ class InfoExtractor(object): continue else: break + traverse_json_ld(json_ld) + return dict((k, v) for k, v in info.items() if v is not None) def _search_nextjs_data(self, webpage, video_id, **kw): -- cgit v1.2.3 From adbc4ec4bbfbe57842049cf9194384480f534859 Mon Sep 17 00:00:00 2001 From: The Hatsune Daishi Date: Mon, 20 Dec 2021 15:06:46 +0900 Subject: [dash,youtube] Download live from start to end (#888) * Add option `--live-from-start` to enable downloading live videos from start * Add key `is_from_start` in formats to identify formats (of live videos) that downloads from start * [dash] Create protocol `http_dash_segments_generator` that allows a function to be passed instead of fragments * [fragment] Allow multiple live dash formats to download simultaneously * [youtube] Implement fragment re-fetching for the live dash formats * [youtube] Re-extract dash manifest every 5 hours (manifest expires in 6hrs) * [postprocessor/ffmpeg] Add `FFmpegFixupDuplicateMoovPP` to fixup duplicated moov atoms Known issue: Ctrl+C doesn't work on Windows when downloading multiple formats Closes #1521 Authored by: nao20010128nao, pukkandan --- yt_dlp/extractor/common.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'yt_dlp/extractor/common.py') diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 52099b4b4..9abbaf04f 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -163,9 +163,8 @@ class InfoExtractor(object): * filesize_approx An estimate for the number of bytes * player_url SWF Player URL (used for rtmpdump). * protocol The protocol that will be used for the actual - download, lower-case. - "http", "https", "rtsp", "rtmp", "rtmp_ffmpeg", "rtmpe", - "m3u8", "m3u8_native" or "http_dash_segments". + download, lower-case. One of "http", "https" or + one of the protocols defined in downloader.PROTOCOL_MAP * fragment_base_url Base URL for fragments. Each fragment's path value (if present) will be relative to @@ -181,6 +180,8 @@ class InfoExtractor(object): fragment_base_url * "duration" (optional, int or float) * "filesize" (optional, int) + * is_from_start Is a live format that can be downloaded + from the start. Boolean * preference Order number of this format. If this field is present and not None, the formats get sorted by this field, regardless of all other values. -- cgit v1.2.3