[extractor] Extract thumbnails from JSON-LD (#2195)

Authored by: nixxo
author: nixxo <nixxo@protonmail.com> 2022-01-01 20:50:27 +0100
committer: GitHub <noreply@github.com> 2022-01-02 01:20:27 +0530
commit: 7592749cbe377675688dfcad5b7c1d46bbb684e1 (patch)
tree: e35110d845c027a884216c4dabfa93c4dfc3ce7f
parent: 767f999b53705cdeda1b5bcca360aa1fc9c88135 (diff)
download: hypervideo-pre-7592749cbe377675688dfcad5b7c1d46bbb684e1.tar.lz
hypervideo-pre-7592749cbe377675688dfcad5b7c1d46bbb684e1.tar.xz
hypervideo-pre-7592749cbe377675688dfcad5b7c1d46bbb684e1.zip
2 files changed, 28 insertions, 1 deletions
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index cf06dbde4..25bc00e0b 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -208,6 +208,32 @@ class TestInfoExtractor(unittest.TestCase):
                 },
                 {'expected_type': 'NewsArticle'},
             ),
+            (
+                # test multiple thumbnails in a list
+                r'''
+<script type="application/ld+json">
+{"@context":"https://schema.org",
+"@type":"VideoObject",
+"thumbnailUrl":["https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"]}
+</script>''',
+                {
+                    'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}],
+                },
+                {},
+            ),
+            (
+                # test single thumbnail
+                r'''
+<script type="application/ld+json">
+{"@context":"https://schema.org",
+"@type":"VideoObject",
+"thumbnailUrl":"https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"}
+</script>''',
+                {
+                    'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}],
+                },
+                {},
+            )
         ]
         for html, expected_dict, search_json_ld_kwargs in _TESTS:
             expect_dict(
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 3821b7183..1d694293e 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1436,7 +1436,8 @@ class InfoExtractor(object):
                 'url': url_or_none(e.get('contentUrl')),
                 'title': unescapeHTML(e.get('name')),
                 'description': unescapeHTML(e.get('description')),
-                'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
+                'thumbnails': [{'url': url_or_none(url)}
+                               for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))],
                 'duration': parse_duration(e.get('duration')),
                 'timestamp': unified_timestamp(e.get('uploadDate')),
                 # author can be an instance of 'Organization' or 'Person' types.
author	nixxo <nixxo@protonmail.com>	2022-01-01 20:50:27 +0100
committer	GitHub <noreply@github.com>	2022-01-02 01:20:27 +0530
commit	7592749cbe377675688dfcad5b7c1d46bbb684e1 (patch)
tree	e35110d845c027a884216c4dabfa93c4dfc3ce7f
parent	767f999b53705cdeda1b5bcca360aa1fc9c88135 (diff)
download	hypervideo-pre-7592749cbe377675688dfcad5b7c1d46bbb684e1.tar.lz hypervideo-pre-7592749cbe377675688dfcad5b7c1d46bbb684e1.tar.xz hypervideo-pre-7592749cbe377675688dfcad5b7c1d46bbb684e1.zip