diff options
author | nixxo <nixxo@protonmail.com> | 2022-01-01 20:50:27 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-02 01:20:27 +0530 |
commit | 7592749cbe377675688dfcad5b7c1d46bbb684e1 (patch) | |
tree | e35110d845c027a884216c4dabfa93c4dfc3ce7f | |
parent | 767f999b53705cdeda1b5bcca360aa1fc9c88135 (diff) | |
download | hypervideo-pre-7592749cbe377675688dfcad5b7c1d46bbb684e1.tar.lz hypervideo-pre-7592749cbe377675688dfcad5b7c1d46bbb684e1.tar.xz hypervideo-pre-7592749cbe377675688dfcad5b7c1d46bbb684e1.zip |
[extractor] Extract thumbnails from JSON-LD (#2195)
Authored by: nixxo
-rw-r--r-- | test/test_InfoExtractor.py | 26 | ||||
-rw-r--r-- | yt_dlp/extractor/common.py | 3 |
2 files changed, 28 insertions, 1 deletions
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index cf06dbde4..25bc00e0b 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -208,6 +208,32 @@ class TestInfoExtractor(unittest.TestCase): }, {'expected_type': 'NewsArticle'}, ), + ( + # test multiple thumbnails in a list + r''' +<script type="application/ld+json"> +{"@context":"https://schema.org", +"@type":"VideoObject", +"thumbnailUrl":["https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"]} +</script>''', + { + 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}], + }, + {}, + ), + ( + # test single thumbnail + r''' +<script type="application/ld+json"> +{"@context":"https://schema.org", +"@type":"VideoObject", +"thumbnailUrl":"https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"} +</script>''', + { + 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}], + }, + {}, + ) ] for html, expected_dict, search_json_ld_kwargs in _TESTS: expect_dict( diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 3821b7183..1d694293e 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1436,7 +1436,8 @@ class InfoExtractor(object): 'url': url_or_none(e.get('contentUrl')), 'title': unescapeHTML(e.get('name')), 'description': unescapeHTML(e.get('description')), - 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')), + 'thumbnails': [{'url': url_or_none(url)} + for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))], 'duration': parse_duration(e.get('duration')), 'timestamp': unified_timestamp(e.get('uploadDate')), # author can be an instance of 'Organization' or 'Person' types. |