aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/jwplatform.py
diff options
context:
space:
mode:
authorcoletdjnz <coletdjnz@protonmail.com>2022-10-04 08:37:48 +1300
committerGitHub <noreply@github.com>2022-10-03 19:37:48 +0000
commitd3a3d7f0cc27ca78aeb807b27c7ebee88ff3161e (patch)
tree22e9aded6d3f0ace3c9f8a5db41e4f016343a1ad /yt_dlp/extractor/jwplatform.py
parent8671f995cc5296f1bc9f68afc886353b5a9e40aa (diff)
downloadhypervideo-pre-d3a3d7f0cc27ca78aeb807b27c7ebee88ff3161e.tar.lz
hypervideo-pre-d3a3d7f0cc27ca78aeb807b27c7ebee88ff3161e.tar.xz
hypervideo-pre-d3a3d7f0cc27ca78aeb807b27c7ebee88ff3161e.zip
[extractor/JWPlatform] Fix extractor (#5112)
Fix bitrate and filesize extraction and support embeds with unquoted urls. Related: #5106 Authored by: coletdjnz
Diffstat (limited to 'yt_dlp/extractor/jwplatform.py')
-rw-r--r--yt_dlp/extractor/jwplatform.py31
1 files changed, 30 insertions, 1 deletions
diff --git a/yt_dlp/extractor/jwplatform.py b/yt_dlp/extractor/jwplatform.py
index d6b8420a8..c94968943 100644
--- a/yt_dlp/extractor/jwplatform.py
+++ b/yt_dlp/extractor/jwplatform.py
@@ -22,13 +22,42 @@ class JWPlatformIE(InfoExtractor):
'only_matching': True,
}]
+ _WEBPAGE_TESTS = [{
+ # JWPlatform iframe
+ 'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
+ 'info_dict': {
+ 'id': 'AG26UQXM',
+ 'ext': 'mp4',
+ 'upload_date': '20160719',
+ 'timestamp': 1468923808,
+ 'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/AG26UQXM/poster.jpg?width=720',
+ 'description': '',
+ 'duration': 294.0,
+ },
+ }, {
+ # Player url not surrounded by quotes
+ 'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/darling-berlin',
+ 'info_dict': {
+ 'id': 'R10NQdhY',
+ 'title': 'Playgirl',
+ 'ext': 'mp4',
+ 'upload_date': '20220624',
+ 'thumbnail': 'https://cdn.jwplayer.com/v2/media/R10NQdhY/poster.jpg?width=720',
+ 'timestamp': 1656064800,
+ 'description': 'BRD 1966, Will Tremper',
+ 'duration': 5146.0,
+ },
+ 'params': {'allowed_extractors': ['generic', 'jwplatform']},
+ }]
+
@classmethod
def _extract_embed_urls(cls, url, webpage):
for tag, key in ((r'(?:script|iframe)', 'src'), ('input', 'value')):
# <input value=URL> is used by hyland.com
# if we find <iframe>, dont look for <input>
ret = re.findall(
- r'<%s[^>]+?%s=["\']((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
+ r'<%s[^>]+?%s=["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
webpage)
if ret:
return ret