diff options
Diffstat (limited to 'hypervideo_dl/extractor/livestream.py')
-rw-r--r-- | hypervideo_dl/extractor/livestream.py | 97 |
1 files changed, 61 insertions, 36 deletions
diff --git a/hypervideo_dl/extractor/livestream.py b/hypervideo_dl/extractor/livestream.py index d883eaf..a05a0fa 100644 --- a/hypervideo_dl/extractor/livestream.py +++ b/hypervideo_dl/extractor/livestream.py @@ -1,33 +1,36 @@ -import re import itertools +import re from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) +from ..compat import compat_str, compat_urlparse from ..utils import ( + determine_ext, find_xpath_attr, - xpath_attr, - xpath_with_ns, - xpath_text, - orderedSet, - update_url_query, - int_or_none, float_or_none, + int_or_none, + orderedSet, parse_iso8601, - determine_ext, + traverse_obj, + update_url_query, + xpath_attr, + xpath_text, + xpath_with_ns, ) class LivestreamIE(InfoExtractor): IE_NAME = 'livestream' - _VALID_URL = r'https?://(?:new\.)?livestream\.com/(?:accounts/(?P<account_id>\d+)|(?P<account_name>[^/]+))/(?:events/(?P<event_id>\d+)|(?P<event_name>[^/]+))(?:/videos/(?P<id>\d+))?' + _VALID_URL = r'''(?x) + https?://(?:new\.)?livestream\.com/ + (?:accounts/(?P<account_id>\d+)|(?P<account_name>[^/]+)) + (?:/events/(?P<event_id>\d+)|/(?P<event_name>[^/]+))? + (?:/videos/(?P<id>\d+))? + ''' _EMBED_REGEX = [r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"'] _TESTS = [{ 'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', - 'md5': '53274c76ba7754fb0e8d072716f2292b', + 'md5': '7876c5f5dc3e711b6b73acce4aac1527', 'info_dict': { 'id': '4719370', 'ext': 'mp4', @@ -37,22 +40,37 @@ class LivestreamIE(InfoExtractor): 'duration': 5968.0, 'like_count': int, 'view_count': int, + 'comment_count': int, 'thumbnail': r're:^http://.*\.jpg$' } }, { - 'url': 'http://new.livestream.com/tedx/cityenglish', + 'url': 'https://livestream.com/coheedandcambria/websterhall', 'info_dict': { - 'title': 'TEDCity2.0 (English)', - 'id': '2245590', + 'id': '1585861', + 'title': 'Live From Webster Hall' }, - 'playlist_mincount': 4, + 'playlist_mincount': 1, }, { - 'url': 'http://new.livestream.com/chess24/tatasteelchess', + 'url': 'https://livestream.com/dayananda/events/7954027', 'info_dict': { - 'title': 'Tata Steel Chess', - 'id': '3705884', + 'title': 'Live from Mevo', + 'id': '7954027', }, - 'playlist_mincount': 60, + 'playlist_mincount': 4, + }, { + 'url': 'https://livestream.com/accounts/82', + 'info_dict': { + 'id': '253978', + 'view_count': int, + 'title': 'trsr', + 'comment_count': int, + 'like_count': int, + 'upload_date': '20120306', + 'timestamp': 1331042383, + 'thumbnail': 'http://img.new.livestream.com/videos/0000000000000372/cacbeed6-fb68-4b5e-ad9c-e148124e68a9_640x427.jpg', + 'duration': 15.332, + 'ext': 'mp4' + } }, { 'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640', 'only_matching': True, @@ -62,7 +80,8 @@ class LivestreamIE(InfoExtractor): }] _API_URL_TEMPLATE = 'http://livestream.com/api/accounts/%s/events/%s' - def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): + def _parse_smil_formats_and_subtitles( + self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None): base_ele = find_xpath_attr( smil, self._xpath_ns('.//meta', namespace), 'name', 'httpBase') base = base_ele.get('content') if base_ele is not None else 'http://livestreamvod-f.akamaihd.net/' @@ -86,7 +105,7 @@ class LivestreamIE(InfoExtractor): 'tbr': tbr, 'preference': -1000, # Strictly inferior than all other formats? }) - return formats + return formats, {} def _extract_video_info(self, video_data): video_id = compat_str(video_data['id']) @@ -179,7 +198,7 @@ class LivestreamIE(InfoExtractor): 'is_live': is_live, } - def _extract_event(self, event_data): + def _generate_event_playlist(self, event_data): event_id = compat_str(event_data['id']) account_id = compat_str(event_data['owner_account_id']) feed_root_url = self._API_URL_TEMPLATE % (account_id, event_id) + '/feed.json' @@ -189,7 +208,6 @@ class LivestreamIE(InfoExtractor): return self._extract_stream_info(stream_info) last_video = None - entries = [] for i in itertools.count(1): if last_video is None: info_url = feed_root_url @@ -197,31 +215,38 @@ class LivestreamIE(InfoExtractor): info_url = '{root}?&id={id}&newer=-1&type=video'.format( root=feed_root_url, id=last_video) videos_info = self._download_json( - info_url, event_id, 'Downloading page {0}'.format(i))['data'] + info_url, event_id, f'Downloading page {i}')['data'] videos_info = [v['data'] for v in videos_info if v['type'] == 'video'] if not videos_info: break for v in videos_info: v_id = compat_str(v['id']) - entries.append(self.url_result( - 'http://livestream.com/accounts/%s/events/%s/videos/%s' % (account_id, event_id, v_id), - 'Livestream', v_id, v.get('caption'))) + yield self.url_result( + f'http://livestream.com/accounts/{account_id}/events/{event_id}/videos/{v_id}', + LivestreamIE, v_id, v.get('caption')) last_video = videos_info[-1]['id'] - return self.playlist_result(entries, event_id, event_data['full_name']) def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') event = mobj.group('event_id') or mobj.group('event_name') account = mobj.group('account_id') or mobj.group('account_name') - api_url = self._API_URL_TEMPLATE % (account, event) + api_url = f'http://livestream.com/api/accounts/{account}' + if video_id: video_data = self._download_json( - api_url + '/videos/%s' % video_id, video_id) + f'{api_url}/events/{event}/videos/{video_id}', video_id) return self._extract_video_info(video_data) - else: - event_data = self._download_json(api_url, video_id) - return self._extract_event(event_data) + elif event: + event_data = self._download_json(f'{api_url}/events/{event}', None) + return self.playlist_result( + self._generate_event_playlist(event_data), str(event_data['id']), event_data['full_name']) + + account_data = self._download_json(api_url, None) + items = traverse_obj(account_data, (('upcoming_events', 'past_events'), 'data', ...)) + return self.playlist_result( + itertools.chain.from_iterable(map(self._generate_event_playlist, items)), + account_data.get('id'), account_data.get('full_name')) # The original version of Livestream uses a different system |