diff options
Diffstat (limited to 'youtube_dl/extractor/imdb.py')
-rw-r--r-- | youtube_dl/extractor/imdb.py | 58 |
1 files changed, 16 insertions, 42 deletions
diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index a31301985..436759da5 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -1,7 +1,5 @@ from __future__ import unicode_literals -import base64 -import json import re from .common import InfoExtractor @@ -10,7 +8,6 @@ from ..utils import ( mimetype2ext, parse_duration, qualities, - try_get, url_or_none, ) @@ -18,16 +15,15 @@ from ..utils import ( class ImdbIE(InfoExtractor): IE_NAME = 'imdb' IE_DESC = 'Internet Movie Database trailers' - _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).*?[/-]vi(?P<id>\d+)' + _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).+?[/-]vi(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.imdb.com/video/imdb/vi2524815897', 'info_dict': { 'id': '2524815897', 'ext': 'mp4', - 'title': 'No. 2', + 'title': 'No. 2 from Ice Age: Continental Drift (2012)', 'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7', - 'duration': 152, } }, { 'url': 'http://www.imdb.com/video/_/vi2524815897', @@ -51,23 +47,21 @@ class ImdbIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - - data = self._download_json( - 'https://www.imdb.com/ve/data/VIDEO_PLAYBACK_DATA', video_id, - query={ - 'key': base64.b64encode(json.dumps({ - 'type': 'VIDEO_PLAYER', - 'subType': 'FORCE_LEGACY', - 'id': 'vi%s' % video_id, - }).encode()).decode(), - })[0] + webpage = self._download_webpage( + 'https://www.imdb.com/videoplayer/vi' + video_id, video_id) + video_metadata = self._parse_json(self._search_regex( + r'window\.IMDbReactInitialState\.push\(({.+?})\);', webpage, + 'video metadata'), video_id)['videos']['videoMetadata']['vi' + video_id] + title = self._html_search_meta( + ['og:title', 'twitter:title'], webpage) or self._html_search_regex( + r'<title>(.+?)</title>', webpage, 'title', fatal=False) or video_metadata['title'] quality = qualities(('SD', '480p', '720p', '1080p')) formats = [] - for encoding in data['videoLegacyEncodings']: + for encoding in video_metadata.get('encodings', []): if not encoding or not isinstance(encoding, dict): continue - video_url = url_or_none(encoding.get('url')) + video_url = url_or_none(encoding.get('videoUrl')) if not video_url: continue ext = mimetype2ext(encoding.get( @@ -75,7 +69,7 @@ class ImdbIE(InfoExtractor): if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( video_url, video_id, 'mp4', entry_protocol='m3u8_native', - preference=1, m3u8_id='hls', fatal=False)) + m3u8_id='hls', fatal=False)) continue format_id = encoding.get('definition') formats.append({ @@ -86,33 +80,13 @@ class ImdbIE(InfoExtractor): }) self._sort_formats(formats) - webpage = self._download_webpage( - 'https://www.imdb.com/video/vi' + video_id, video_id) - video_metadata = self._parse_json(self._search_regex( - r'args\.push\(\s*({.+?})\s*\)\s*;', webpage, - 'video metadata'), video_id) - - video_info = video_metadata.get('VIDEO_INFO') - if video_info and isinstance(video_info, dict): - info = try_get( - video_info, lambda x: x[list(video_info.keys())[0]][0], dict) - else: - info = {} - - title = self._html_search_meta( - ['og:title', 'twitter:title'], webpage) or self._html_search_regex( - r'<title>(.+?)</title>', webpage, 'title', - default=None) or info['videoTitle'] - return { 'id': video_id, 'title': title, - 'alt_title': info.get('videoSubTitle'), 'formats': formats, - 'description': info.get('videoDescription'), - 'thumbnail': url_or_none(try_get( - video_metadata, lambda x: x['videoSlate']['source'])), - 'duration': parse_duration(info.get('videoRuntime')), + 'description': video_metadata.get('description'), + 'thumbnail': video_metadata.get('slate', {}).get('url'), + 'duration': parse_duration(video_metadata.get('duration')), } |