diff options
| author | remitamine <remitamine@gmail.com> | 2015-12-04 08:23:21 +0100 | 
|---|---|---|
| committer | remitamine <remitamine@gmail.com> | 2015-12-04 08:23:21 +0100 | 
| commit | c3d3110f6a4b769e1ddb5532ac61f3da419ebd07 (patch) | |
| tree | 908e2dfec70acd00abed79e5663c6f323d78c255 | |
| parent | 79ec00276cc0b3ce49aed5f451b99abf68a7fd93 (diff) | |
| parent | cce9d15d0115e8b4cd1f6e2a327b5e9dbdf0ee54 (diff) | |
| download | hypervideo-pre-c3d3110f6a4b769e1ddb5532ac61f3da419ebd07.tar.lz hypervideo-pre-c3d3110f6a4b769e1ddb5532ac61f3da419ebd07.tar.xz hypervideo-pre-c3d3110f6a4b769e1ddb5532ac61f3da419ebd07.zip | |
Merge pull request #7185 from remitamine/ooyala
[ooyala] extract more formats and metadata
| -rw-r--r-- | youtube_dl/extractor/byutv.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 9 | ||||
| -rw-r--r-- | youtube_dl/extractor/groupon.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/howcast.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/ooyala.py | 166 | ||||
| -rw-r--r-- | youtube_dl/extractor/teachingchannel.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/vice.py | 1 | 
7 files changed, 78 insertions, 107 deletions
| diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 3b2de517e..ce25816f0 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -14,9 +14,10 @@ class BYUtvIE(InfoExtractor):          'info_dict': {              'id': 'studio-c-season-5-episode-5',              'ext': 'mp4', -            'description': 'md5:5438d33774b6bdc662f9485a340401cc', +            'description': 'md5:e07269172baff037f8e8bf9956bc9747',              'title': 'Season 5 Episode 5', -            'thumbnail': 're:^https?://.*\.jpg$' +            'thumbnail': 're:^https?://.*\.jpg$', +            'duration': 1486486,          },          'params': {              'skip_download': True, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5075d131e..b60684f98 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -339,6 +339,7 @@ class GenericIE(InfoExtractor):                  'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',                  'ext': 'mp4',                  'title': '2cc213299525360.mov',  # that's what we get +                'duration': 238231,              },              'add_ie': ['Ooyala'],          }, @@ -350,6 +351,7 @@ class GenericIE(InfoExtractor):                  'ext': 'mp4',                  'title': '"Steve Jobs: Man in the Machine" trailer',                  'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."', +                'duration': 135427,              },              'params': {                  'skip_download': True, @@ -960,8 +962,9 @@ class GenericIE(InfoExtractor):              'info_dict': {                  'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs',                  'ext': 'mp4', -                'description': 'VIDEO: Index/Match versus VLOOKUP.', +                'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.',                  'title': 'This is what separates the Excel masters from the wannabes', +                'duration': 191933,              },              'params': {                  # m3u8 downloads @@ -1501,7 +1504,7 @@ class GenericIE(InfoExtractor):                  re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or                  re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))          if mobj is not None: -            return OoyalaIE._build_url_result(mobj.group('ec')) +            return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url}))          # Look for multiple Ooyala embeds on SBN network websites          mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage) @@ -1509,7 +1512,7 @@ class GenericIE(InfoExtractor):              embeds = self._parse_json(mobj.group(1), video_id, fatal=False)              if embeds:                  return _playlist_from_matches( -                    embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala') +                    embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')          # Look for Aparat videos          mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage) diff --git a/youtube_dl/extractor/groupon.py b/youtube_dl/extractor/groupon.py index 8b9e0e2f8..22ff7182f 100644 --- a/youtube_dl/extractor/groupon.py +++ b/youtube_dl/extractor/groupon.py @@ -18,6 +18,8 @@ class GrouponIE(InfoExtractor):                  'id': 'tubGNycTo_9Uxg82uESj4i61EYX8nyuf',                  'ext': 'mp4',                  'title': 'Bikram Yoga Huntington Beach | Orange County', +                'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', +                'duration': 44961,              },          }],          'params': { diff --git a/youtube_dl/extractor/howcast.py b/youtube_dl/extractor/howcast.py index 16677f179..165b9f39e 100644 --- a/youtube_dl/extractor/howcast.py +++ b/youtube_dl/extractor/howcast.py @@ -16,6 +16,7 @@ class HowcastIE(InfoExtractor):              'description': 'md5:dbe792e5f6f1489027027bf2eba188a3',              'timestamp': 1276081287,              'upload_date': '20100609', +            'duration': 56823,          },          'params': {              # m3u8 download diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index a262a9f6d..3b692e903 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -1,108 +1,69 @@  from __future__ import unicode_literals  import re -import json  import base64  from .common import InfoExtractor  from ..utils import ( -    unescapeHTML, -    ExtractorError, -    determine_ext,      int_or_none, +    float_or_none, +    ExtractorError, +    unsmuggle_url,  ) +from ..compat import compat_urllib_parse  class OoyalaBaseIE(InfoExtractor): -    def _extract_result(self, info, more_info): -        embedCode = info['embedCode'] -        video_url = info.get('ipad_url') or info['url'] - -        if determine_ext(video_url) == 'm3u8': -            formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4') -        else: -            formats = [{ -                'url': video_url, -                'ext': 'mp4', -            }] - -        return { -            'id': embedCode, -            'title': unescapeHTML(info['title']), -            'formats': formats, -            'description': unescapeHTML(more_info['description']), -            'thumbnail': more_info['promo'], +    def _extract(self, content_tree_url, video_id, domain='example.org'): +        content_tree = self._download_json(content_tree_url, video_id)['content_tree'] +        metadata = content_tree[list(content_tree)[0]] +        embed_code = metadata['embed_code'] +        pcode = metadata.get('asset_pcode') or embed_code +        video_info = { +            'id': embed_code, +            'title': metadata['title'], +            'description': metadata.get('description'), +            'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'), +            'duration': int_or_none(metadata.get('duration')),          } -    def _extract(self, player_url, video_id): -        player = self._download_webpage(player_url, video_id) -        mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', -                                        player, 'mobile player url') -        # Looks like some videos are only available for particular devices -        # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0 -        # is only available for ipad) -        # Working around with fetching URLs for all the devices found starting with 'unknown' -        # until we succeed or eventually fail for each device. -        devices = re.findall(r'device\s*=\s*"([^"]+)";', player) -        devices.remove('unknown') -        devices.insert(0, 'unknown') -        for device in devices: -            mobile_player = self._download_webpage( -                '%s&device=%s' % (mobile_url, device), video_id, -                'Downloading mobile player JS for %s device' % device) -            videos_info = self._search_regex( -                r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', -                mobile_player, 'info', fatal=False, default=None) -            if videos_info: -                break - -        if not videos_info: -            formats = [] +        formats = [] +        for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'):              auth_data = self._download_json( -                'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (video_id, video_id), -                video_id) - -            cur_auth_data = auth_data['authorization_data'][video_id] - -            for stream in cur_auth_data['streams']: -                formats.append({ -                    'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'), -                    'ext': stream.get('delivery_type'), -                    'format': stream.get('video_codec'), -                    'format_id': stream.get('profile'), -                    'width': int_or_none(stream.get('width')), -                    'height': int_or_none(stream.get('height')), -                    'abr': int_or_none(stream.get('audio_bitrate')), -                    'vbr': int_or_none(stream.get('video_bitrate')), -                }) -            if formats: -                return { -                    'id': video_id, -                    'formats': formats, -                    'title': 'Ooyala video', -                } - -            if not cur_auth_data['authorized']: -                raise ExtractorError(cur_auth_data['message'], expected=True) - -        if not videos_info: -            raise ExtractorError('Unable to extract info') -        videos_info = videos_info.replace('\\"', '"') -        videos_more_info = self._search_regex( -            r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"') -        videos_info = json.loads(videos_info) -        videos_more_info = json.loads(videos_more_info) - -        if videos_more_info.get('lineup'): -            videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])] -            return { -                '_type': 'playlist', -                'id': video_id, -                'title': unescapeHTML(videos_more_info['title']), -                'entries': videos, -            } -        else: -            return self._extract_result(videos_info[0], videos_more_info) +                'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?' % (pcode, embed_code) + compat_urllib_parse.urlencode({'domain': domain, 'supportedFormats': supported_format}), +                video_id, 'Downloading %s JSON' % supported_format) + +            cur_auth_data = auth_data['authorization_data'][embed_code] + +            if cur_auth_data['authorized']: +                for stream in cur_auth_data['streams']: +                    url = base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8') +                    delivery_type = stream['delivery_type'] +                    if delivery_type == 'remote_asset': +                        video_info['url'] = url +                        return video_info +                    if delivery_type == 'hls': +                        formats.extend(self._extract_m3u8_formats(url, embed_code, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) +                    elif delivery_type == 'hds': +                        formats.extend(self._extract_f4m_formats(url, embed_code, -1, 'hds', fatal=False)) +                    else: +                        formats.append({ +                            'url': url, +                            'ext': stream.get('delivery_type'), +                            'vcodec': stream.get('video_codec'), +                            'format_id': '%s-%s-%sp' % (stream.get('profile'), delivery_type, stream.get('height')), +                            'width': int_or_none(stream.get('width')), +                            'height': int_or_none(stream.get('height')), +                            'abr': int_or_none(stream.get('audio_bitrate')), +                            'vbr': int_or_none(stream.get('video_bitrate')), +                            'fps': float_or_none(stream.get('framerate')), +                        }) +            else: +                raise ExtractorError('%s said: %s' % (self.IE_NAME, cur_auth_data['message']), expected=True) +        self._sort_formats(formats) + +        video_info['formats'] = formats +        return video_info  class OoyalaIE(OoyalaBaseIE): @@ -117,6 +78,7 @@ class OoyalaIE(OoyalaBaseIE):                  'ext': 'mp4',                  'title': 'Explaining Data Recovery from Hard Drives and SSDs',                  'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', +                'duration': 853386,              },          }, {              # Only available for ipad @@ -125,7 +87,7 @@ class OoyalaIE(OoyalaBaseIE):                  'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',                  'ext': 'mp4',                  'title': 'Simulation Overview - Levels of Simulation', -                'description': '', +                'duration': 194948,              },          },          { @@ -136,7 +98,8 @@ class OoyalaIE(OoyalaBaseIE):              'info_dict': {                  'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',                  'ext': 'mp4', -                'title': 'Ooyala video', +                'title': 'Divide Tool Path.mp4', +                'duration': 204405,              }          }      ] @@ -151,9 +114,11 @@ class OoyalaIE(OoyalaBaseIE):                                ie=cls.ie_key())      def _real_extract(self, url): +        url, smuggled_data = unsmuggle_url(url, {})          embed_code = self._match_id(url) -        player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code -        return self._extract(player_url, embed_code) +        domain = smuggled_data.get('domain') +        content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/embed_code/%s/%s' % (embed_code, embed_code) +        return self._extract(content_tree_url, embed_code, domain)  class OoyalaExternalIE(OoyalaBaseIE): @@ -170,7 +135,7 @@ class OoyalaExternalIE(OoyalaBaseIE):                          .*?&pcode=                      )                      (?P<pcode>.+?) -                    (&|$) +                    (?:&|$)                      '''      _TEST = { @@ -179,7 +144,7 @@ class OoyalaExternalIE(OoyalaBaseIE):              'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',              'ext': 'mp4',              'title': 'dm_140128_30for30Shorts___JudgingJewellv2', -            'description': '', +            'duration': 1302000,          },          'params': {              # m3u8 download @@ -188,9 +153,6 @@ class OoyalaExternalIE(OoyalaBaseIE):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        partner_id = mobj.group('partner_id') -        video_id = mobj.group('id') -        pcode = mobj.group('pcode') -        player_url = 'http://player.ooyala.com/player.js?externalId=%s:%s&pcode=%s' % (partner_id, video_id, pcode) -        return self._extract(player_url, video_id) +        partner_id, video_id, pcode = re.match(self._VALID_URL, url).groups() +        content_tree_url = 'http://player.ooyala.com/player_api/v1/content_tree/external_id/%s/%s:%s' % (pcode, partner_id, video_id) +        return self._extract(content_tree_url, video_id) diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py index 117afa9bf..36a6fc679 100644 --- a/youtube_dl/extractor/teachingchannel.py +++ b/youtube_dl/extractor/teachingchannel.py @@ -16,6 +16,7 @@ class TeachingChannelIE(InfoExtractor):              'ext': 'mp4',              'title': 'A History of Teaming',              'description': 'md5:2a9033db8da81f2edffa4c99888140b3', +            'duration': 422255,          },          'params': {              # m3u8 download diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 01af7a995..7df87c31c 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -15,6 +15,7 @@ class ViceIE(InfoExtractor):                  'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',                  'ext': 'mp4',                  'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', +                'duration': 725983,              },              'params': {                  # Requires ffmpeg (m3u8 manifest) | 
