From d5c3254889208a75d57c74868a7e7ce62be6b636 Mon Sep 17 00:00:00 2001 From: Zenon Mousmoulas Date: Thu, 16 Dec 2021 22:46:30 +0200 Subject: [extractor] Support default implicit graph in JSON-LD (#1983) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Original PR: https://github.com/ytdl-org/youtube-dl/pull/30229 Per W3C JSON-LD v1.1 §4.9 (non-normative ref): When a JSON-LD document's top-level structure is a map that contains no other keys than @graph and optionally @context (properties that are not mapped to an IRI or a keyword are ignored), @graph is considered to express the otherwise implicit default graph. Authored by: zmousm --- test/test_InfoExtractor.py | 99 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 84 insertions(+), 15 deletions(-) (limited to 'test') diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index cbca22c91..cf06dbde4 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -99,10 +99,10 @@ class TestInfoExtractor(unittest.TestCase): self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True) def test_search_json_ld_realworld(self): - # https://github.com/ytdl-org/youtube-dl/issues/23306 - expect_dict( - self, - self.ie._search_json_ld(r'''''', None), - { - 'title': '1 On 1 With Kleio', - 'description': 'Kleio Valentien', - 'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4', - 'timestamp': 1449347075, - 'duration': 743.0, - 'view_count': 1120958, - 'width': 1920, - 'height': 1080, - }) + ''', + { + 'title': '1 On 1 With Kleio', + 'description': 'Kleio Valentien', + 'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4', + 'timestamp': 1449347075, + 'duration': 743.0, + 'view_count': 1120958, + 'width': 1920, + 'height': 1080, + }, + {}, + ), + ( + r'''''', + { + 'timestamp': 1636523400, + 'title': 'md5:91fe569e952e4d146485740ae927662b', + }, + {'expected_type': 'NewsArticle'}, + ), + ] + for html, expected_dict, search_json_ld_kwargs in _TESTS: + expect_dict( + self, + self.ie._search_json_ld(html, None, **search_json_ld_kwargs), + expected_dict + ) def test_download_json(self): uri = encode_data_uri(b'{"foo": "blah"}', 'application/json') -- cgit v1.2.3 From e978789f0f0bfe1963f4a295f6094dafa7524933 Mon Sep 17 00:00:00 2001 From: PilzAdam Date: Fri, 17 Dec 2021 21:35:48 +0100 Subject: [outtmpl] Add operator `&` for replacement text (#2012) Authored by: PilzAdam --- test/test_YoutubeDL.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'test') diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 63ef50e1a..6c2530046 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -836,6 +836,11 @@ class TestYoutubeDL(unittest.TestCase): test('%(title3)s', ('foo/bar\\test', 'foo_bar_test')) test('folder/%(title3)s', ('folder/foo/bar\\test', 'folder%sfoo_bar_test' % os.path.sep)) + # Replacement + test('%(id&foo)s.bar', 'foo.bar') + test('%(title&foo)s.bar', 'NA.bar') + test('%(title&foo|baz)s.bar', 'baz.bar') + def test_format_note(self): ydl = YoutubeDL() self.assertEqual(ydl._format_note({}), '') -- cgit v1.2.3 From 75ad33572bd347c4a30fddbcf28b69d4d990da3f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 19 Dec 2021 09:05:40 +0530 Subject: [test/download] Split `sanitize_got_info_dict` into a separate function so that it can be used by third party scripts --- test/helper.py | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) (limited to 'test') diff --git a/test/helper.py b/test/helper.py index 9fb4f2120..b63a5c897 100644 --- a/test/helper.py +++ b/test/helper.py @@ -194,20 +194,8 @@ def expect_dict(self, got_dict, expected_dict): expect_value(self, got, expected, info_field) -def expect_info_dict(self, got_dict, expected_dict): - expect_dict(self, got_dict, expected_dict) - # Check for the presence of mandatory fields - if got_dict.get('_type') not in ('playlist', 'multi_video'): - mandatory_fields = ['id', 'title'] - if expected_dict.get('ext'): - mandatory_fields.extend(('url', 'ext')) - for key in mandatory_fields: - self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) - # Check for mandatory fields that are automatically set by YoutubeDL - for key in ['webpage_url', 'extractor', 'extractor_key']: - self.assertTrue(got_dict.get(key), 'Missing field: %s' % key) - - ignored_fields = ( +def sanitize_got_info_dict(got_dict): + IGNORED_FIELDS = ( # Format keys 'url', 'manifest_url', 'format', 'format_id', 'format_note', 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'vbr', 'fps', 'vcodec', 'container', 'filesize', @@ -222,14 +210,14 @@ def expect_info_dict(self, got_dict, expected_dict): 'formats', 'thumbnails', 'subtitles', 'automatic_captions', 'comments', 'entries', # Auto-generated - 'playlist', 'format_index', 'webpage_url', 'video_ext', 'audio_ext', 'duration_string', 'epoch', 'fulltitle', - 'extractor', 'extractor_key', 'original_url', 'webpage_url_basename', 'webpage_url_domain', 'filepath', 'infojson_filename', + 'autonumber', 'playlist', 'format_index', 'video_ext', 'audio_ext', 'duration_string', 'epoch', + 'fulltitle', 'extractor', 'extractor_key', 'filepath', 'infojson_filename', 'original_url', # Only live_status needs to be checked 'is_live', 'was_live', ) - ignored_prefixes = ('', 'playlist', 'requested') + IGNORED_PREFIXES = ('', 'playlist', 'requested', 'webpage') def sanitize(key, value): if isinstance(value, str) and len(value) > 100: @@ -240,14 +228,32 @@ def expect_info_dict(self, got_dict, expected_dict): test_info_dict = { key: sanitize(key, value) for key, value in got_dict.items() - if value is not None and key not in ignored_fields and not any( - key.startswith(f'{prefix}_') for prefix in ignored_prefixes) + if value is not None and key not in IGNORED_FIELDS and not any( + key.startswith(f'{prefix}_') for prefix in IGNORED_PREFIXES) } # display_id may be generated from id if test_info_dict.get('display_id') == test_info_dict['id']: test_info_dict.pop('display_id') + return test_info_dict + + +def expect_info_dict(self, got_dict, expected_dict): + expect_dict(self, got_dict, expected_dict) + # Check for the presence of mandatory fields + if got_dict.get('_type') not in ('playlist', 'multi_video'): + mandatory_fields = ['id', 'title'] + if expected_dict.get('ext'): + mandatory_fields.extend(('url', 'ext')) + for key in mandatory_fields: + self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) + # Check for mandatory fields that are automatically set by YoutubeDL + for key in ['webpage_url', 'extractor', 'extractor_key']: + self.assertTrue(got_dict.get(key), 'Missing field: %s' % key) + + test_info_dict = sanitize_got_info_dict(got_dict) + missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys()) if missing_keys: def _repr(v): -- cgit v1.2.3