From d5c3254889208a75d57c74868a7e7ce62be6b636 Mon Sep 17 00:00:00 2001 From: Zenon Mousmoulas Date: Thu, 16 Dec 2021 22:46:30 +0200 Subject: [extractor] Support default implicit graph in JSON-LD (#1983) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Original PR: https://github.com/ytdl-org/youtube-dl/pull/30229 Per W3C JSON-LD v1.1 §4.9 (non-normative ref): When a JSON-LD document's top-level structure is a map that contains no other keys than @graph and optionally @context (properties that are not mapped to an IRI or a keyword are ignored), @graph is considered to express the otherwise implicit default graph. Authored by: zmousm --- test/test_InfoExtractor.py | 99 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 84 insertions(+), 15 deletions(-) (limited to 'test/test_InfoExtractor.py') diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index cbca22c91..cf06dbde4 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -99,10 +99,10 @@ class TestInfoExtractor(unittest.TestCase): self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True) def test_search_json_ld_realworld(self): - # https://github.com/ytdl-org/youtube-dl/issues/23306 - expect_dict( - self, - self.ie._search_json_ld(r'''''', None), - { - 'title': '1 On 1 With Kleio', - 'description': 'Kleio Valentien', - 'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4', - 'timestamp': 1449347075, - 'duration': 743.0, - 'view_count': 1120958, - 'width': 1920, - 'height': 1080, - }) + ''', + { + 'title': '1 On 1 With Kleio', + 'description': 'Kleio Valentien', + 'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4', + 'timestamp': 1449347075, + 'duration': 743.0, + 'view_count': 1120958, + 'width': 1920, + 'height': 1080, + }, + {}, + ), + ( + r'''''', + { + 'timestamp': 1636523400, + 'title': 'md5:91fe569e952e4d146485740ae927662b', + }, + {'expected_type': 'NewsArticle'}, + ), + ] + for html, expected_dict, search_json_ld_kwargs in _TESTS: + expect_dict( + self, + self.ie._search_json_ld(html, None, **search_json_ld_kwargs), + expected_dict + ) def test_download_json(self): uri = encode_data_uri(b'{"foo": "blah"}', 'application/json') -- cgit v1.2.3