diff options
Diffstat (limited to 'youtube_dlc/extractor/twitch.py')
-rw-r--r-- | youtube_dlc/extractor/twitch.py | 229 |
1 files changed, 101 insertions, 128 deletions
diff --git a/youtube_dlc/extractor/twitch.py b/youtube_dlc/extractor/twitch.py index ab131a07d..503d019de 100644 --- a/youtube_dlc/extractor/twitch.py +++ b/youtube_dlc/extractor/twitch.py @@ -9,7 +9,6 @@ import re from .common import InfoExtractor from ..compat import ( - compat_kwargs, compat_parse_qs, compat_str, compat_urlparse, @@ -42,30 +41,16 @@ class TwitchBaseIE(InfoExtractor): _CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko' _NETRC_MACHINE = 'twitch' - def _handle_error(self, response): - if not isinstance(response, dict): - return - error = response.get('error') - if error: - raise ExtractorError( - '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')), - expected=True) - - def _call_api(self, path, item_id, *args, **kwargs): - headers = kwargs.get('headers', {}).copy() - headers.update({ - 'Accept': 'application/vnd.twitchtv.v5+json; charset=UTF-8', - 'Client-ID': self._CLIENT_ID, - }) - kwargs.update({ - 'headers': headers, - 'expected_status': (400, 410), - }) - response = self._download_json( - '%s/%s' % (self._API_BASE, path), item_id, - *args, **compat_kwargs(kwargs)) - self._handle_error(response) - return response + _OPERATION_HASHES = { + 'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14', + 'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb', + 'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777', + 'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84', + 'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e', + 'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01', + 'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c', + 'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687', + } def _real_initialize(self): self._login() @@ -151,13 +136,46 @@ class TwitchBaseIE(InfoExtractor): }) self._sort_formats(formats) - def _download_access_token(self, channel_name): - return self._call_api( - 'api/channels/%s/access_token' % channel_name, channel_name, - 'Downloading access token JSON') + def _download_base_gql(self, video_id, ops, note, fatal=True): + return self._download_json( + 'https://gql.twitch.tv/gql', video_id, note, + data=json.dumps(ops).encode(), + headers={ + 'Content-Type': 'text/plain;charset=UTF-8', + 'Client-ID': self._CLIENT_ID, + }, fatal=fatal) - def _extract_channel_id(self, token, channel_name): - return compat_str(self._parse_json(token, channel_name)['channel_id']) + def _download_gql(self, video_id, ops, note, fatal=True): + for op in ops: + op['extensions'] = { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': self._OPERATION_HASHES[op['operationName']], + } + } + return self._download_base_gql(video_id, ops, note) + + def _download_access_token(self, video_id, token_kind, param_name): + method = '%sPlaybackAccessToken' % token_kind + ops = { + 'query': '''{ + %s( + %s: "%s", + params: { + platform: "web", + playerBackend: "mediaplayer", + playerType: "site" + } + ) + { + value + signature + } + }''' % (method, param_name, video_id), + } + return self._download_base_gql( + video_id, ops, + 'Downloading %s access token GraphQL' % token_kind)['data'][method] class TwitchVodIE(TwitchBaseIE): @@ -170,8 +188,6 @@ class TwitchVodIE(TwitchBaseIE): ) (?P<id>\d+) ''' - _ITEM_TYPE = 'vod' - _ITEM_SHORTCUT = 'v' _TESTS = [{ 'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s', @@ -181,7 +197,7 @@ class TwitchVodIE(TwitchBaseIE): 'title': 'LCK Summer Split - Week 6 Day 1', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 17208, - 'timestamp': 1435131709, + 'timestamp': 1435131734, 'upload_date': '20150624', 'uploader': 'Riot Games', 'uploader_id': 'riotgames', @@ -230,10 +246,20 @@ class TwitchVodIE(TwitchBaseIE): }] def _download_info(self, item_id): - return self._extract_info( - self._call_api( - 'kraken/videos/%s' % item_id, item_id, - 'Downloading video info JSON')) + data = self._download_gql( + item_id, [{ + 'operationName': 'VideoMetadata', + 'variables': { + 'channelLogin': '', + 'videoID': item_id, + }, + }], + 'Downloading stream metadata GraphQL')[0]['data'] + video = data.get('video') + if video is None: + raise ExtractorError( + 'Video %s does not exist' % item_id, expected=True) + return self._extract_info_gql(video, item_id) @staticmethod def _extract_info(info): @@ -272,13 +298,33 @@ class TwitchVodIE(TwitchBaseIE): 'is_live': is_live, } + @staticmethod + def _extract_info_gql(info, item_id): + vod_id = info.get('id') or item_id + # id backward compatibility for download archives + if vod_id[0] != 'v': + vod_id = 'v%s' % vod_id + thumbnail = url_or_none(info.get('previewThumbnailURL')) + if thumbnail: + for p in ('width', 'height'): + thumbnail = thumbnail.replace('{%s}' % p, '0') + return { + 'id': vod_id, + 'title': info.get('title') or 'Untitled Broadcast', + 'description': info.get('description'), + 'duration': int_or_none(info.get('lengthSeconds')), + 'thumbnail': thumbnail, + 'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str), + 'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str), + 'timestamp': unified_timestamp(info.get('publishedAt')), + 'view_count': int_or_none(info.get('viewCount')), + } + def _real_extract(self, url): vod_id = self._match_id(url) info = self._download_info(vod_id) - access_token = self._call_api( - 'api/vods/%s/access_token' % vod_id, vod_id, - 'Downloading %s access token' % self._ITEM_TYPE) + access_token = self._download_access_token(vod_id, 'video', 'id') formats = self._extract_m3u8_formats( '%s/vod/%s.m3u8?%s' % ( @@ -289,8 +335,8 @@ class TwitchVodIE(TwitchBaseIE): 'allow_spectre': 'true', 'player': 'twitchweb', 'playlist_include_framerate': 'true', - 'nauth': access_token['token'], - 'nauthsig': access_token['sig'], + 'nauth': access_token['value'], + 'nauthsig': access_token['signature'], })), vod_id, 'mp4', entry_protocol='m3u8_native') @@ -333,37 +379,7 @@ def _make_video_result(node): } -class TwitchGraphQLBaseIE(TwitchBaseIE): - _PAGE_LIMIT = 100 - - _OPERATION_HASHES = { - 'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14', - 'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb', - 'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777', - 'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84', - 'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e', - 'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01', - 'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c', - } - - def _download_gql(self, video_id, ops, note, fatal=True): - for op in ops: - op['extensions'] = { - 'persistedQuery': { - 'version': 1, - 'sha256Hash': self._OPERATION_HASHES[op['operationName']], - } - } - return self._download_json( - 'https://gql.twitch.tv/gql', video_id, note, - data=json.dumps(ops).encode(), - headers={ - 'Content-Type': 'text/plain;charset=UTF-8', - 'Client-ID': self._CLIENT_ID, - }, fatal=fatal) - - -class TwitchCollectionIE(TwitchGraphQLBaseIE): +class TwitchCollectionIE(TwitchBaseIE): _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P<id>[^/]+)' _TESTS = [{ @@ -400,7 +416,9 @@ class TwitchCollectionIE(TwitchGraphQLBaseIE): entries, playlist_id=collection_id, playlist_title=title) -class TwitchPlaylistBaseIE(TwitchGraphQLBaseIE): +class TwitchPlaylistBaseIE(TwitchBaseIE): + _PAGE_LIMIT = 100 + def _entries(self, channel_name, *args): cursor = None variables_common = self._make_variables(channel_name, *args) @@ -440,49 +458,6 @@ class TwitchPlaylistBaseIE(TwitchGraphQLBaseIE): if not cursor or not isinstance(cursor, compat_str): break - # Deprecated kraken v5 API - def _entries_kraken(self, channel_name, broadcast_type, sort): - access_token = self._download_access_token(channel_name) - channel_id = self._extract_channel_id(access_token['token'], channel_name) - offset = 0 - counter_override = None - for counter in itertools.count(1): - response = self._call_api( - 'kraken/channels/%s/videos/' % channel_id, - channel_id, - 'Downloading video JSON page %s' % (counter_override or counter), - query={ - 'offset': offset, - 'limit': self._PAGE_LIMIT, - 'broadcast_type': broadcast_type, - 'sort': sort, - }) - videos = response.get('videos') - if not isinstance(videos, list): - break - for video in videos: - if not isinstance(video, dict): - continue - video_url = url_or_none(video.get('url')) - if not video_url: - continue - yield { - '_type': 'url_transparent', - 'ie_key': TwitchVodIE.ie_key(), - 'id': video.get('_id'), - 'url': video_url, - 'title': video.get('title'), - 'description': video.get('description'), - 'timestamp': unified_timestamp(video.get('published_at')), - 'duration': float_or_none(video.get('length')), - 'view_count': int_or_none(video.get('views')), - 'language': video.get('language'), - } - offset += self._PAGE_LIMIT - total = int_or_none(response.get('_total')) - if total and offset >= total: - break - class TwitchVideosIE(TwitchPlaylistBaseIE): _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)' @@ -724,7 +699,7 @@ class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE): playlist_title='%s - Collections' % channel_name) -class TwitchStreamIE(TwitchGraphQLBaseIE): +class TwitchStreamIE(TwitchBaseIE): IE_NAME = 'twitch:stream' _VALID_URL = r'''(?x) https?:// @@ -814,8 +789,9 @@ class TwitchStreamIE(TwitchGraphQLBaseIE): if not stream: raise ExtractorError('%s is offline' % channel_name, expected=True) - access_token = self._download_access_token(channel_name) - token = access_token['token'] + access_token = self._download_access_token( + channel_name, 'stream', 'channelName') + token = access_token['value'] stream_id = stream.get('id') or channel_name query = { @@ -826,7 +802,7 @@ class TwitchStreamIE(TwitchGraphQLBaseIE): 'player': 'twitchweb', 'playlist_include_framerate': 'true', 'segment_preference': '4', - 'sig': access_token['sig'].encode('utf-8'), + 'sig': access_token['signature'].encode('utf-8'), 'token': token.encode('utf-8'), } formats = self._extract_m3u8_formats( @@ -912,8 +888,8 @@ class TwitchClipsIE(TwitchBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - clip = self._download_json( - 'https://gql.twitch.tv/gql', video_id, data=json.dumps({ + clip = self._download_base_gql( + video_id, { 'query': '''{ clip(slug: "%s") { broadcaster { @@ -937,10 +913,7 @@ class TwitchClipsIE(TwitchBaseIE): } viewCount } -}''' % video_id, - }).encode(), headers={ - 'Client-ID': self._CLIENT_ID, - })['data']['clip'] +}''' % video_id}, 'Downloading clip GraphQL')['data']['clip'] if not clip: raise ExtractorError( |