aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorcoletdjnz <coletdjnz@protonmail.com>2022-10-04 08:37:48 +1300
committerGitHub <noreply@github.com>2022-10-03 19:37:48 +0000
commitd3a3d7f0cc27ca78aeb807b27c7ebee88ff3161e (patch)
tree22e9aded6d3f0ace3c9f8a5db41e4f016343a1ad
parent8671f995cc5296f1bc9f68afc886353b5a9e40aa (diff)
downloadhypervideo-pre-d3a3d7f0cc27ca78aeb807b27c7ebee88ff3161e.tar.lz
hypervideo-pre-d3a3d7f0cc27ca78aeb807b27c7ebee88ff3161e.tar.xz
hypervideo-pre-d3a3d7f0cc27ca78aeb807b27c7ebee88ff3161e.zip
[extractor/JWPlatform] Fix extractor (#5112)
Fix bitrate and filesize extraction and support embeds with unquoted urls. Related: #5106 Authored by: coletdjnz
-rw-r--r--yt_dlp/extractor/common.py3
-rw-r--r--yt_dlp/extractor/generic.py12
-rw-r--r--yt_dlp/extractor/jwplatform.py31
3 files changed, 32 insertions, 14 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index caec0ccf6..0700b4767 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -3587,7 +3587,8 @@ class InfoExtractor:
'url': source_url,
'width': int_or_none(source.get('width')),
'height': height,
- 'tbr': int_or_none(source.get('bitrate')),
+ 'tbr': int_or_none(source.get('bitrate'), scale=1000),
+ 'filesize': int_or_none(source.get('filesize')),
'ext': ext,
}
if source_url.startswith('rtmp'):
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 73aefc782..73422f937 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -1072,18 +1072,6 @@ class GenericIE(InfoExtractor):
}
},
{
- # JWPlatform iframe
- 'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
- 'info_dict': {
- 'id': 'AG26UQXM',
- 'ext': 'mp4',
- 'upload_date': '20160719',
- 'timestamp': 468923808,
- 'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
- },
- 'add_ie': ['JWPlatform'],
- },
- {
# Video.js embed, multiple formats
'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
'info_dict': {
diff --git a/yt_dlp/extractor/jwplatform.py b/yt_dlp/extractor/jwplatform.py
index d6b8420a8..c94968943 100644
--- a/yt_dlp/extractor/jwplatform.py
+++ b/yt_dlp/extractor/jwplatform.py
@@ -22,13 +22,42 @@ class JWPlatformIE(InfoExtractor):
'only_matching': True,
}]
+ _WEBPAGE_TESTS = [{
+ # JWPlatform iframe
+ 'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
+ 'info_dict': {
+ 'id': 'AG26UQXM',
+ 'ext': 'mp4',
+ 'upload_date': '20160719',
+ 'timestamp': 1468923808,
+ 'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/AG26UQXM/poster.jpg?width=720',
+ 'description': '',
+ 'duration': 294.0,
+ },
+ }, {
+ # Player url not surrounded by quotes
+ 'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/darling-berlin',
+ 'info_dict': {
+ 'id': 'R10NQdhY',
+ 'title': 'Playgirl',
+ 'ext': 'mp4',
+ 'upload_date': '20220624',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/R10NQdhY/poster.jpg?width=720',
+ 'timestamp': 1656064800,
+ 'description': 'BRD 1966, Will Tremper',
+ 'duration': 5146.0,
+ },
+ 'params': {'allowed_extractors': ['generic', 'jwplatform']},
+ }]
+
@classmethod
def _extract_embed_urls(cls, url, webpage):
for tag, key in ((r'(?:script|iframe)', 'src'), ('input', 'value')):
# <input value=URL> is used by hyland.com
# if we find <iframe>, dont look for <input>
ret = re.findall(
- r'<%s[^>]+?%s=["\']((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
+ r'<%s[^>]+?%s=["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
webpage)
if ret:
return ret