aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/common.py
diff options
context:
space:
mode:
authorJesús <heckyel@hyperbola.info>2021-12-20 11:53:26 -0500
committerJesús <heckyel@hyperbola.info>2021-12-20 11:53:26 -0500
commit65ee789d3062a5321faf0092fc2cf8ec498fc4a1 (patch)
tree72ef3e3de40dac0e03517d28eb0a2cea17f86ba4 /yt_dlp/extractor/common.py
parent1abb4c89c62a9fec2e8a65c3d76d4ac0e8dd4c43 (diff)
parent0fcba15d57430e8ebfd2091463e12dfa9882cbef (diff)
downloadhypervideo-pre-65ee789d3062a5321faf0092fc2cf8ec498fc4a1.tar.lz
hypervideo-pre-65ee789d3062a5321faf0092fc2cf8ec498fc4a1.tar.xz
hypervideo-pre-65ee789d3062a5321faf0092fc2cf8ec498fc4a1.zip
updated from upstream | 20/12/2021 at 11:53
Diffstat (limited to 'yt_dlp/extractor/common.py')
-rw-r--r--yt_dlp/extractor/common.py20
1 files changed, 14 insertions, 6 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index ebf2e3cea..9abbaf04f 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -163,9 +163,8 @@ class InfoExtractor(object):
* filesize_approx An estimate for the number of bytes
* player_url SWF Player URL (used for rtmpdump).
* protocol The protocol that will be used for the actual
- download, lower-case.
- "http", "https", "rtsp", "rtmp", "rtmp_ffmpeg", "rtmpe",
- "m3u8", "m3u8_native" or "http_dash_segments".
+ download, lower-case. One of "http", "https" or
+ one of the protocols defined in downloader.PROTOCOL_MAP
* fragment_base_url
Base URL for fragments. Each fragment's path
value (if present) will be relative to
@@ -181,6 +180,8 @@ class InfoExtractor(object):
fragment_base_url
* "duration" (optional, int or float)
* "filesize" (optional, int)
+ * is_from_start Is a live format that can be downloaded
+ from the start. Boolean
* preference Order number of this format. If this field is
present and not None, the formats get sorted
by this field, regardless of all other values.
@@ -1451,8 +1452,13 @@ class InfoExtractor(object):
})
extract_interaction_statistic(e)
- for e in json_ld:
- if '@context' in e:
+ def traverse_json_ld(json_ld, at_top_level=True):
+ for e in json_ld:
+ if at_top_level and '@context' not in e:
+ continue
+ if at_top_level and set(e.keys()) == {'@context', '@graph'}:
+ traverse_json_ld(variadic(e['@graph'], allowed_types=(dict,)), at_top_level=False)
+ break
item_type = e.get('@type')
if expected_type is not None and expected_type != item_type:
continue
@@ -1488,7 +1494,7 @@ class InfoExtractor(object):
info.update({
'timestamp': parse_iso8601(e.get('datePublished')),
'title': unescapeHTML(e.get('headline')),
- 'description': unescapeHTML(e.get('articleBody')),
+ 'description': unescapeHTML(e.get('articleBody') or e.get('description')),
})
elif item_type == 'VideoObject':
extract_video_object(e)
@@ -1503,6 +1509,8 @@ class InfoExtractor(object):
continue
else:
break
+ traverse_json_ld(json_ld)
+
return dict((k, v) for k, v in info.items() if v is not None)
def _search_nextjs_data(self, webpage, video_id, **kw):