aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLesmiscore <nao20010128@gmail.com>2022-05-29 22:48:04 +0900
committerGitHub <noreply@github.com>2022-05-29 22:48:04 +0900
commit222a230871fe4fe63f35c49590379c9a77116819 (patch)
tree0067375281c6e356b8302db3f99d9e75fcad65b7
parentee27297f82ccbd702ccd4721d1d3c9d67bbe187e (diff)
downloadhypervideo-pre-222a230871fe4fe63f35c49590379c9a77116819.tar.lz
hypervideo-pre-222a230871fe4fe63f35c49590379c9a77116819.tar.xz
hypervideo-pre-222a230871fe4fe63f35c49590379c9a77116819.zip
[extractor/common] Recognize `src` attribute from HTML5 media elements (#3899)
Authored by: Lesmiscore
-rw-r--r--test/test_InfoExtractor.py18
-rw-r--r--yt_dlp/extractor/common.py7
2 files changed, 23 insertions, 2 deletions
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 257ea7dd3..928246668 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -502,6 +502,24 @@ class TestInfoExtractor(unittest.TestCase):
}],
})
+ # from https://0000.studio/
+ # with type attribute but without extension in URL
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://0000.studio',
+ r'''
+ <video src="https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92"
+ controls="controls" type="video/mp4" preload="metadata" autoplay="autoplay" playsinline class="object-contain">
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92',
+ 'ext': 'mp4',
+ }],
+ })
+
def test_extract_jwplayer_data_realworld(self):
# from http://www.suffolk.edu/sjc/
expect_dict(
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 5767662ed..a589fb7fa 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -3197,7 +3197,8 @@ class InfoExtractor:
return f
return {}
- def _media_formats(src, cur_media_type, type_info={}):
+ def _media_formats(src, cur_media_type, type_info=None):
+ type_info = type_info or {}
full_url = absolute_url(src)
ext = type_info.get('ext') or determine_ext(full_url)
if ext == 'm3u8':
@@ -3215,6 +3216,7 @@ class InfoExtractor:
formats = [{
'url': full_url,
'vcodec': 'none' if cur_media_type == 'audio' else None,
+ 'ext': ext,
}]
return is_plain_url, formats
@@ -3241,7 +3243,8 @@ class InfoExtractor:
media_attributes = extract_attributes(media_tag)
src = strip_or_none(media_attributes.get('src'))
if src:
- _, formats = _media_formats(src, media_type)
+ f = parse_content_type(media_attributes.get('type'))
+ _, formats = _media_formats(src, media_type, f)
media_info['formats'].extend(formats)
media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
if media_content: