diff options
Diffstat (limited to 'hypervideo_dl/extractor/trovo.py')
-rw-r--r-- | hypervideo_dl/extractor/trovo.py | 308 |
1 files changed, 192 insertions, 116 deletions
diff --git a/hypervideo_dl/extractor/trovo.py b/hypervideo_dl/extractor/trovo.py index 65ea13d..545a672 100644 --- a/hypervideo_dl/extractor/trovo.py +++ b/hypervideo_dl/extractor/trovo.py @@ -1,8 +1,7 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import json +import random +import string from .common import InfoExtractor from ..utils import ( @@ -10,6 +9,7 @@ from ..utils import ( format_field, int_or_none, str_or_none, + traverse_obj, try_get, ) @@ -18,10 +18,20 @@ class TrovoBaseIE(InfoExtractor): _VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/' _HEADERS = {'Origin': 'https://trovo.live'} - def _call_api(self, video_id, query=None, data=None): - return self._download_json( - 'https://gql.trovo.live/', video_id, query=query, data=data, - headers={'Accept': 'application/json'}) + def _call_api(self, video_id, data): + if 'persistedQuery' in data.get('extensions', {}): + url = 'https://gql.trovo.live' + else: + url = 'https://api-web.trovo.live/graphql' + + resp = self._download_json( + url, video_id, data=json.dumps([data]).encode(), headers={'Accept': 'application/json'}, + query={ + 'qid': ''.join(random.choices(string.ascii_uppercase + string.digits, k=16)), + })[0] + if 'errors' in resp: + raise ExtractorError(f'Trovo said: {resp["errors"][0]["message"]}') + return resp['data'][data['operationName']] def _extract_streamer_info(self, data): streamer_info = data.get('streamerInfo') or {} @@ -29,36 +39,43 @@ class TrovoBaseIE(InfoExtractor): return { 'uploader': streamer_info.get('nickName'), 'uploader_id': str_or_none(streamer_info.get('uid')), - 'uploader_url': format_field(username, template='https://trovo.live/%s'), + 'uploader_url': format_field(username, None, 'https://trovo.live/%s'), } class TrovoIE(TrovoBaseIE): - _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?!(?:clip|video)/)(?P<id>[^/?&#]+)' + _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:s/)?(?!(?:clip|video)/)(?P<id>(?!s/)[^/?&#]+(?![^#]+[?&]vid=))' + _TESTS = [{ + 'url': 'https://trovo.live/Exsl', + 'only_matching': True, + }, { + 'url': 'https://trovo.live/s/SkenonSLive/549759191497', + 'only_matching': True, + }, { + 'url': 'https://trovo.live/s/zijo987/208251706', + 'info_dict': { + 'id': '104125853_104125853_1656439572', + 'ext': 'flv', + 'uploader_url': 'https://trovo.live/zijo987', + 'uploader_id': '104125853', + 'thumbnail': 'https://livecover.trovo.live/screenshot/73846_104125853_104125853-2022-06-29-04-00-22-852x480.jpg', + 'uploader': 'zijo987', + 'title': 'š„IGRAMO IGRICE UPADAJTEš„2500/5000 2022-06-28 22:01', + 'live_status': 'is_live', + }, + 'skip': 'May not be live' + }] def _real_extract(self, url): username = self._match_id(url) - live_info = self._call_api(username, query={ - 'query': '''{ - getLiveInfo(params: {userName: "%s"}) { - isLive - programInfo { - coverUrl - id - streamInfo { - desc - playUrl - } - title - } - streamerInfo { - nickName - uid - userName - } - } -}''' % username, - })['data']['getLiveInfo'] + live_info = self._call_api(username, data={ + 'operationName': 'live_LiveReaderService_GetLiveInfo', + 'variables': { + 'params': { + 'userName': username, + }, + }, + }) if live_info.get('isLive') == 0: raise ExtractorError('%s is offline' % username, expected=True) program_info = live_info['programInfo'] @@ -75,9 +92,9 @@ class TrovoIE(TrovoBaseIE): 'format_id': format_id, 'height': int_or_none(format_id[:-1]) if format_id else None, 'url': play_url, + 'tbr': stream_info.get('bitrate'), 'http_headers': self._HEADERS, }) - self._sort_formats(formats) info = { 'id': program_id, @@ -91,57 +108,100 @@ class TrovoIE(TrovoBaseIE): class TrovoVodIE(TrovoBaseIE): - _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)' + _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video|s)/(?:[^/]+/\d+[^#]*[?&]vid=)?(?P<id>(?<!/s/)[^/?&#]+)' _TESTS = [{ - 'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043', + 'url': 'https://trovo.live/clip/lc-5285890818705062210?ltab=videos', + 'params': {'getcomments': True}, 'info_dict': { - 'id': 'ltv-100095501_100095501_1609596043', + 'id': 'lc-5285890818705062210', 'ext': 'mp4', - 'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!', - 'uploader': 'Exsl', - 'timestamp': 1609640305, - 'upload_date': '20210103', - 'uploader_id': '100095501', - 'duration': 43977, + 'title': 'fatal moaning for a super goodš¤£š¤£', + 'uploader': 'OneTappedYou', + 'timestamp': 1621628019, + 'upload_date': '20210521', + 'uploader_id': '100719456', + 'duration': 31, 'view_count': int, 'like_count': int, 'comment_count': int, - 'comments': 'mincount:8', - 'categories': ['Grand Theft Auto V'], + 'comments': 'mincount:1', + 'categories': ['Call of Duty: Mobile'], + 'uploader_url': 'https://trovo.live/OneTappedYou', + 'thumbnail': r're:^https?://.*\.jpg', }, - 'skip': '404' }, { - 'url': 'https://trovo.live/clip/lc-5285890810184026005', + 'url': 'https://trovo.live/s/SkenonSLive/549759191497?vid=ltv-100829718_100829718_387702301737980280', + 'info_dict': { + 'id': 'ltv-100829718_100829718_387702301737980280', + 'ext': 'mp4', + 'timestamp': 1654909624, + 'thumbnail': 'http://vod.trovo.live/1f09baf0vodtransger1301120758/ef9ea3f0387702301737980280/coverBySnapshot/coverBySnapshot_10_0.jpg', + 'uploader_id': '100829718', + 'uploader': 'SkenonSLive', + 'title': 'Trovo u secanju, uz par modova i muzike :)', + 'uploader_url': 'https://trovo.live/SkenonSLive', + 'duration': 10830, + 'view_count': int, + 'like_count': int, + 'upload_date': '20220611', + 'comment_count': int, + 'categories': ['Minecraft'], + }, + 'skip': 'Not available', + }, { + 'url': 'https://trovo.live/s/Trovo/549756886599?vid=ltv-100264059_100264059_387702304241698583', + 'info_dict': { + 'id': 'ltv-100264059_100264059_387702304241698583', + 'ext': 'mp4', + 'timestamp': 1661479563, + 'thumbnail': 'http://vod.trovo.live/be5ae591vodtransusw1301120758/cccb9915387702304241698583/coverBySnapshot/coverBySnapshot_10_0.jpg', + 'uploader_id': '100264059', + 'uploader': 'Trovo', + 'title': 'Dev Corner 8/25', + 'uploader_url': 'https://trovo.live/Trovo', + 'duration': 3753, + 'view_count': int, + 'like_count': int, + 'upload_date': '20220826', + 'comment_count': int, + 'categories': ['Talk Shows'], + }, + }, { + 'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043', + 'only_matching': True, + }, { + 'url': 'https://trovo.live/s/SkenonSLive/549759191497?foo=bar&vid=ltv-100829718_100829718_387702301737980280', 'only_matching': True, }] def _real_extract(self, url): vid = self._match_id(url) - resp = self._call_api(vid, data=json.dumps([{ - 'query': '''{ - batchGetVodDetailInfo(params: {vids: ["%s"]}) { - VodDetailInfos - } -}''' % vid, - }, { - 'query': '''{ - getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) { - commentList { - author { - nickName - uid - } - commentID - content - createdAt - parentID - } - } -}''' % vid, - }]).encode()) - vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid] - vod_info = vod_detail_info['vodInfo'] - title = vod_info['title'] + + # NOTE: It is also possible to extract this info from the Nuxt data on the website, + # however that seems unreliable - sometimes it randomly doesn't return the data, + # at least when using a non-residential IP. + resp = self._call_api(vid, data={ + 'operationName': 'vod_VodReaderService_BatchGetVodDetailInfo', + 'variables': { + 'params': { + 'vids': [vid], + }, + }, + 'extensions': {}, + }) + + vod_detail_info = traverse_obj(resp, ('VodDetailInfos', vid), expected_type=dict) + if not vod_detail_info: + raise ExtractorError('This video not found or not available anymore', expected=True) + vod_info = vod_detail_info.get('vodInfo') + title = vod_info.get('title') + + if try_get(vod_info, lambda x: x['playbackRights']['playbackRights'] != 'Normal'): + playback_rights_setting = vod_info['playbackRights']['playbackRightsSetting'] + if playback_rights_setting == 'SubscriberOnly': + raise ExtractorError('This video is only available for subscribers', expected=True) + else: + raise ExtractorError(f'This video is not available ({playback_rights_setting})', expected=True) language = vod_info.get('languageName') formats = [] @@ -161,28 +221,10 @@ class TrovoVodIE(TrovoBaseIE): 'url': play_url, 'http_headers': self._HEADERS, }) - self._sort_formats(formats) category = vod_info.get('categoryName') get_count = lambda x: int_or_none(vod_info.get(x + 'Num')) - comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or [] - comments = [] - for comment in comment_list: - content = comment.get('content') - if not content: - continue - author = comment.get('author') or {} - parent = comment.get('parentID') - comments.append({ - 'author': author.get('nickName'), - 'author_id': str_or_none(author.get('uid')), - 'id': str_or_none(comment.get('commentID')), - 'text': content, - 'timestamp': int_or_none(comment.get('createdAt')), - 'parent': 'root' if parent == 0 else str_or_none(parent), - }) - info = { 'id': vid, 'title': title, @@ -193,35 +235,81 @@ class TrovoVodIE(TrovoBaseIE): 'view_count': get_count('watch'), 'like_count': get_count('like'), 'comment_count': get_count('comment'), - 'comments': comments, 'categories': [category] if category else None, + '__post_extractor': self.extract_comments(vid), } info.update(self._extract_streamer_info(vod_detail_info)) return info + def _get_comments(self, vid): + for page in itertools.count(1): + comments_json = self._call_api(vid, data={ + 'operationName': 'public_CommentProxyService_GetCommentList', + 'variables': { + 'params': { + 'appInfo': { + 'postID': vid, + }, + 'preview': {}, + 'pageSize': 99, + 'page': page, + }, + }, + 'extensions': { + 'singleReq': 'true', + }, + }) + for comment in comments_json['commentList']: + content = comment.get('content') + if not content: + continue + author = comment.get('author') or {} + parent = comment.get('parentID') + yield { + 'author': author.get('nickName'), + 'author_id': str_or_none(author.get('uid')), + 'id': str_or_none(comment.get('commentID')), + 'text': content, + 'timestamp': int_or_none(comment.get('createdAt')), + 'parent': 'root' if parent == 0 else str_or_none(parent), + } + + if comments_json['lastPage']: + break -class TrovoChannelBaseIE(TrovoBaseIE): - def _get_vod_json(self, page, uid): - raise NotImplementedError('This method must be implemented by subclasses') - def _entries(self, uid): +class TrovoChannelBaseIE(TrovoBaseIE): + def _entries(self, spacename): for page in itertools.count(1): - vod_json = self._get_vod_json(page, uid) + vod_json = self._call_api(spacename, data={ + 'operationName': self._OPERATION, + 'variables': { + 'params': { + 'terminalSpaceID': { + 'spaceName': spacename, + }, + 'currPage': page, + 'pageSize': 99, + }, + }, + 'extensions': { + 'singleReq': 'true', + }, + }) vods = vod_json.get('vodInfos', []) for vod in vods: + vid = vod.get('vid') + room = traverse_obj(vod, ('spaceInfo', 'roomID')) yield self.url_result( - 'https://trovo.live/%s/%s' % (self._TYPE, vod.get('vid')), + f'https://trovo.live/s/{spacename}/{room}?vid={vid}', ie=TrovoVodIE.ie_key()) - has_more = vod_json['hasMore'] + has_more = vod_json.get('hasMore') if not has_more: break def _real_extract(self, url): - id = self._match_id(url) - uid = str(self._call_api(id, query={ - 'query': '{getLiveInfo(params:{userName:"%s"}){streamerInfo{uid}}}' % id - })['data']['getLiveInfo']['streamerInfo']['uid']) - return self.playlist_result(self._entries(uid), playlist_id=uid) + spacename = self._match_id(url) + return self.playlist_result(self._entries(spacename), playlist_id=spacename) class TrovoChannelVodIE(TrovoChannelBaseIE): @@ -232,17 +320,11 @@ class TrovoChannelVodIE(TrovoChannelBaseIE): 'url': 'trovovod:OneTappedYou', 'playlist_mincount': 24, 'info_dict': { - 'id': '100719456', + 'id': 'OneTappedYou', }, }] - _QUERY = '{getChannelLtvVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s}){hasMore,vodInfos{vid}}}' - _TYPE = 'video' - - def _get_vod_json(self, page, uid): - return self._call_api(uid, query={ - 'query': self._QUERY % (page, uid) - })['data']['getChannelLtvVideoInfos'] + _OPERATION = 'vod_VodReaderService_GetChannelLtvVideoInfos' class TrovoChannelClipIE(TrovoChannelBaseIE): @@ -253,14 +335,8 @@ class TrovoChannelClipIE(TrovoChannelBaseIE): 'url': 'trovoclip:OneTappedYou', 'playlist_mincount': 29, 'info_dict': { - 'id': '100719456', + 'id': 'OneTappedYou', }, }] - _QUERY = '{getChannelClipVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s,albumType:VOD_CLIP_ALBUM_TYPE_LATEST}){hasMore,vodInfos{vid}}}' - _TYPE = 'clip' - - def _get_vod_json(self, page, uid): - return self._call_api(uid, query={ - 'query': self._QUERY % (page, uid) - })['data']['getChannelClipVideoInfos'] + _OPERATION = 'vod_VodReaderService_GetChannelClipVideoInfos' |