aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/wistia.py
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp/extractor/wistia.py')
-rw-r--r--yt_dlp/extractor/wistia.py33
1 files changed, 20 insertions, 13 deletions
diff --git a/yt_dlp/extractor/wistia.py b/yt_dlp/extractor/wistia.py
index 3cbcb4aa0..438828624 100644
--- a/yt_dlp/extractor/wistia.py
+++ b/yt_dlp/extractor/wistia.py
@@ -5,8 +5,8 @@ from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
+ try_call,
try_get,
- unescapeHTML,
)
@@ -117,7 +117,7 @@ class WistiaBaseIE(InfoExtractor):
class WistiaIE(WistiaBaseIE):
_VALID_URL = r'(?:wistia:|%s(?:iframe|medias)/)%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX)
-
+ _EMBED_REGEX = [r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})']
_TESTS = [{
# with hls video
'url': 'wistia:807fafadvk',
@@ -146,17 +146,10 @@ class WistiaIE(WistiaBaseIE):
}]
# https://wistia.com/support/embed-and-share/video-on-your-website
- @staticmethod
- def _extract_url(webpage):
- urls = WistiaIE._extract_urls(webpage)
- return urls[0] if urls else None
-
- @staticmethod
- def _extract_urls(webpage):
- urls = []
- for match in re.finditer(
- r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
- urls.append(unescapeHTML(match.group('url')))
+ @classmethod
+ def _extract_embed_urls(cls, url, webpage):
+ urls = list(super()._extract_embed_urls(url, webpage))
+
for match in re.finditer(
r'''(?sx)
<div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1
@@ -166,6 +159,20 @@ class WistiaIE(WistiaBaseIE):
urls.append('wistia:%s' % match.group('id'))
return urls
+ @classmethod
+ def _extract_from_webpage(cls, url, webpage):
+ from .teachable import TeachableIE
+
+ if list(TeachableIE._extract_embed_urls(url, webpage)):
+ return
+
+ for entry in super()._extract_from_webpage(url, webpage):
+ yield {
+ **entry,
+ '_type': 'url_transparent',
+ 'uploader': try_call(lambda: re.match(r'(?:https?://)?([^/]+)/', url).group(1)),
+ }
+
def _real_extract(self, url):
video_id = self._match_id(url)
embed_config = self._download_embed_config('media', video_id, url)