1 files changed, 32 insertions, 67 deletions
diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py
index 77febd2eb..0fbc888ec 100644
--- a/youtube_dl/extractor/wistia.py
+++ b/youtube_dl/extractor/wistia.py
@@ -13,7 +13,8 @@ from ..utils import (
 
 class WistiaIE(InfoExtractor):
     _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]{10})'
-    _EMBED_BASE_URL = 'http://fast.wistia.com/embed/'
+    _API_URL = 'http://fast.wistia.com/embed/medias/%s.json'
+    _IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
 
     _TESTS = [{
         'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
@@ -45,32 +46,31 @@ class WistiaIE(InfoExtractor):
     # https://wistia.com/support/embed-and-share/video-on-your-website
     @staticmethod
     def _extract_url(webpage):
-        urls = WistiaIE._extract_urls(webpage)
-        return urls[0] if urls else None
+        match = re.search(
+            r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage)
+        if match:
+            return unescapeHTML(match.group('url'))
 
-    @staticmethod
-    def _extract_urls(webpage):
-        urls = []
-        for match in re.finditer(
-                r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
-            urls.append(unescapeHTML(match.group('url')))
-        for match in re.finditer(
-                r'''(?sx)
-                    <div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1
-                ''', webpage):
-            urls.append('wistia:%s' % match.group('id'))
-        for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
-            urls.append('wistia:%s' % match.group('id'))
-        return urls
+        match = re.search(
+            r'''(?sx)
+                <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
+                <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]{10})\b.*?\2
+            ''', webpage)
+        if match:
+            return 'wistia:%s' % match.group('id')
+
+        match = re.search(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage)
+        if match:
+            return 'wistia:%s' % match.group('id')
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
         data_json = self._download_json(
-            self._EMBED_BASE_URL + 'medias/%s.json' % video_id, video_id,
+            self._API_URL % video_id, video_id,
             # Some videos require this.
             headers={
-                'Referer': url if url.startswith('http') else self._EMBED_BASE_URL + 'iframe/' + video_id,
+                'Referer': url if url.startswith('http') else self._IFRAME_URL % video_id,
             })
 
         if data_json.get('error'):
@@ -95,61 +95,27 @@ class WistiaIE(InfoExtractor):
                     'url': aurl,
                     'width': int_or_none(a.get('width')),
                     'height': int_or_none(a.get('height')),
-                    'filesize': int_or_none(a.get('size')),
                 })
             else:
                 aext = a.get('ext')
-                display_name = a.get('display_name')
-                format_id = atype
-                if atype and atype.endswith('_video') and display_name:
-                    format_id = '%s-%s' % (atype[:-6], display_name)
-                f = {
-                    'format_id': format_id,
+                is_m3u8 = a.get('container') == 'm3u8' or aext == 'm3u8'
+                formats.append({
+                    'format_id': atype,
                     'url': aurl,
-                    'tbr': int_or_none(a.get('bitrate')) or None,
+                    'tbr': int_or_none(a.get('bitrate')),
+                    'vbr': int_or_none(a.get('opt_vbitrate')),
+                    'width': int_or_none(a.get('width')),
+                    'height': int_or_none(a.get('height')),
+                    'filesize': int_or_none(a.get('size')),
+                    'vcodec': a.get('codec'),
+                    'container': a.get('container'),
+                    'ext': 'mp4' if is_m3u8 else aext,
+                    'protocol': 'm3u8' if is_m3u8 else None,
                     'preference': 1 if atype == 'original' else None,
-                }
-                if display_name == 'Audio':
-                    f.update({
-                        'vcodec': 'none',
-                    })
-                else:
-                    f.update({
-                        'width': int_or_none(a.get('width')),
-                        'height': int_or_none(a.get('height')),
-                        'vcodec': a.get('codec'),
-                    })
-                if a.get('container') == 'm3u8' or aext == 'm3u8':
-                    ts_f = f.copy()
-                    ts_f.update({
-                        'ext': 'ts',
-                        'format_id': f['format_id'].replace('hls-', 'ts-'),
-                        'url': f['url'].replace('.bin', '.ts'),
-                    })
-                    formats.append(ts_f)
-                    f.update({
-                        'ext': 'mp4',
-                        'protocol': 'm3u8_native',
-                    })
-                else:
-                    f.update({
-                        'container': a.get('container'),
-                        'ext': aext,
-                        'filesize': int_or_none(a.get('size')),
-                    })
-                formats.append(f)
+                })
 
         self._sort_formats(formats)
 
-        subtitles = {}
-        for caption in data.get('captions', []):
-            language = caption.get('language')
-            if not language:
-                continue
-            subtitles[language] = [{
-                'url': self._EMBED_BASE_URL + 'captions/' + video_id + '.vtt?language=' + language,
-            }]
-
         return {
             'id': video_id,
             'title': title,
@@ -158,5 +124,4 @@ class WistiaIE(InfoExtractor):
             'thumbnails': thumbnails,
             'duration': float_or_none(data.get('duration')),
             'timestamp': int_or_none(data.get('createdAt')),
-            'subtitles': subtitles,
         }