[extractor/JWPlatform] Fix extractor (#5112)

Fix bitrate and filesize extraction and support embeds with unquoted urls. Related: #5106 Authored by: coletdjnz
author: coletdjnz <coletdjnz@protonmail.com> 2022-10-04 08:37:48 +1300
committer: GitHub <noreply@github.com> 2022-10-03 19:37:48 +0000
commit: d3a3d7f0cc27ca78aeb807b27c7ebee88ff3161e (patch)
tree: 22e9aded6d3f0ace3c9f8a5db41e4f016343a1ad /yt_dlp/extractor/jwplatform.py
parent: 8671f995cc5296f1bc9f68afc886353b5a9e40aa (diff)
download: hypervideo-pre-d3a3d7f0cc27ca78aeb807b27c7ebee88ff3161e.tar.lz
hypervideo-pre-d3a3d7f0cc27ca78aeb807b27c7ebee88ff3161e.tar.xz
hypervideo-pre-d3a3d7f0cc27ca78aeb807b27c7ebee88ff3161e.zip
1 files changed, 30 insertions, 1 deletions
diff --git a/yt_dlp/extractor/jwplatform.py b/yt_dlp/extractor/jwplatform.py
index d6b8420a8..c94968943 100644
--- a/yt_dlp/extractor/jwplatform.py
+++ b/yt_dlp/extractor/jwplatform.py
@@ -22,13 +22,42 @@ class JWPlatformIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    _WEBPAGE_TESTS = [{
+        # JWPlatform iframe
+        'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved',
+        'info_dict': {
+            'id': 'AG26UQXM',
+            'ext': 'mp4',
+            'upload_date': '20160719',
+            'timestamp': 1468923808,
+            'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4',
+            'thumbnail': 'https://cdn.jwplayer.com/v2/media/AG26UQXM/poster.jpg?width=720',
+            'description': '',
+            'duration': 294.0,
+        },
+    }, {
+        # Player url not surrounded by quotes
+        'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/darling-berlin',
+        'info_dict': {
+            'id': 'R10NQdhY',
+            'title': 'Playgirl',
+            'ext': 'mp4',
+            'upload_date': '20220624',
+            'thumbnail': 'https://cdn.jwplayer.com/v2/media/R10NQdhY/poster.jpg?width=720',
+            'timestamp': 1656064800,
+            'description': 'BRD 1966, Will Tremper',
+            'duration': 5146.0,
+        },
+        'params': {'allowed_extractors': ['generic', 'jwplatform']},
+    }]
+
     @classmethod
     def _extract_embed_urls(cls, url, webpage):
         for tag, key in ((r'(?:script|iframe)', 'src'), ('input', 'value')):
             # <input value=URL> is used by hyland.com
             # if we find <iframe>, dont look for <input>
             ret = re.findall(
-                r'<%s[^>]+?%s=["\']((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
+                r'<%s[^>]+?%s=["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
                 webpage)
             if ret:
                 return ret
author	coletdjnz <coletdjnz@protonmail.com>	2022-10-04 08:37:48 +1300
committer	GitHub <noreply@github.com>	2022-10-03 19:37:48 +0000
commit	d3a3d7f0cc27ca78aeb807b27c7ebee88ff3161e (patch)
tree	22e9aded6d3f0ace3c9f8a5db41e4f016343a1ad /yt_dlp/extractor/jwplatform.py
parent	8671f995cc5296f1bc9f68afc886353b5a9e40aa (diff)
download	hypervideo-pre-d3a3d7f0cc27ca78aeb807b27c7ebee88ff3161e.tar.lz hypervideo-pre-d3a3d7f0cc27ca78aeb807b27c7ebee88ff3161e.tar.xz hypervideo-pre-d3a3d7f0cc27ca78aeb807b27c7ebee88ff3161e.zip