aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/extractor/nebula.py
diff options
context:
space:
mode:
Diffstat (limited to 'hypervideo_dl/extractor/nebula.py')
-rw-r--r--hypervideo_dl/extractor/nebula.py368
1 files changed, 209 insertions, 159 deletions
diff --git a/hypervideo_dl/extractor/nebula.py b/hypervideo_dl/extractor/nebula.py
index 9698a35..77f2535 100644
--- a/hypervideo_dl/extractor/nebula.py
+++ b/hypervideo_dl/extractor/nebula.py
@@ -1,22 +1,161 @@
# coding: utf-8
from __future__ import unicode_literals
+import itertools
import json
import time
+import urllib
-from urllib.error import HTTPError
-from .common import InfoExtractor
-from ..compat import compat_str, compat_urllib_parse_unquote, compat_urllib_parse_quote
from ..utils import (
ExtractorError,
parse_iso8601,
try_get,
- urljoin,
)
+from .common import InfoExtractor
+
+
+class NebulaBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'watchnebula'
+
+ _nebula_api_token = None
+ _nebula_bearer_token = None
+ _zype_access_token = None
+
+ def _perform_nebula_auth(self):
+ username, password = self._get_login_info()
+ if not (username and password):
+ self.raise_login_required()
+
+ data = json.dumps({'email': username, 'password': password}).encode('utf8')
+ response = self._download_json(
+ 'https://api.watchnebula.com/api/v1/auth/login/',
+ data=data, fatal=False, video_id=None,
+ headers={
+ 'content-type': 'application/json',
+ # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
+ 'cookie': ''
+ },
+ note='Logging in to Nebula with supplied credentials',
+ errnote='Authentication failed or rejected')
+ if not response or not response.get('key'):
+ self.raise_login_required()
+
+ # save nebula token as cookie
+ self._set_cookie(
+ 'nebula.app', 'nebula-auth',
+ urllib.parse.quote(
+ json.dumps({
+ "apiToken": response["key"],
+ "isLoggingIn": False,
+ "isLoggingOut": False,
+ }, separators=(",", ":"))),
+ expire_time=int(time.time()) + 86400 * 365,
+ )
+
+ return response['key']
+
+ def _retrieve_nebula_api_token(self):
+ """
+ Check cookie jar for valid token. Try to authenticate using credentials if no valid token
+ can be found in the cookie jar.
+ """
+ nebula_cookies = self._get_cookies('https://nebula.app')
+ nebula_cookie = nebula_cookies.get('nebula-auth')
+ if nebula_cookie:
+ self.to_screen('Authenticating to Nebula with token from cookie jar')
+ nebula_cookie_value = urllib.parse.unquote(nebula_cookie.value)
+ nebula_api_token = self._parse_json(nebula_cookie_value, None).get('apiToken')
+ if nebula_api_token:
+ return nebula_api_token
+
+ return self._perform_nebula_auth()
+
+ def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''):
+ assert method in ('GET', 'POST',)
+ assert auth_type in ('api', 'bearer',)
+
+ def inner_call():
+ authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}'
+ return self._download_json(
+ url, video_id, note=note, headers={'Authorization': authorization},
+ data=b'' if method == 'POST' else None)
+
+ try:
+ return inner_call()
+ except ExtractorError as exc:
+ # if 401 or 403, attempt credential re-auth and retry
+ if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403):
+ self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
+ self._perform_login()
+ return inner_call()
+ else:
+ raise
+
+ def _fetch_nebula_bearer_token(self):
+ """
+ Get a Bearer token for the Nebula API. This will be required to fetch video meta data.
+ """
+ response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/',
+ method='POST',
+ note='Authorizing to Nebula')
+ return response['token']
+
+ def _fetch_zype_access_token(self):
+ """
+ Get a Zype access token, which is required to access video streams -- in our case: to
+ generate video URLs.
+ """
+ user_object = self._call_nebula_api('https://api.watchnebula.com/api/v1/auth/user/', note='Retrieving Zype access token')
+
+ access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], str)
+ if not access_token:
+ if try_get(user_object, lambda x: x['is_subscribed'], bool):
+ # TODO: Reimplement the same Zype token polling the Nebula frontend implements
+ # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532
+ raise ExtractorError(
+ 'Unable to extract Zype access token from Nebula API authentication endpoint. '
+ 'Open an arbitrary video in a browser with this account to generate a token',
+ expected=True)
+ raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint')
+ return access_token
+
+ def _build_video_info(self, episode):
+ zype_id = episode['zype_id']
+ zype_video_url = f'https://player.zype.com/embed/{zype_id}.html?access_token={self._zype_access_token}'
+ channel_slug = episode['channel_slug']
+ return {
+ 'id': episode['zype_id'],
+ 'display_id': episode['slug'],
+ '_type': 'url_transparent',
+ 'ie_key': 'Zype',
+ 'url': zype_video_url,
+ 'title': episode['title'],
+ 'description': episode['description'],
+ 'timestamp': parse_iso8601(episode['published_at']),
+ 'thumbnails': [{
+ # 'id': tn.get('name'), # this appears to be null
+ 'url': tn['original'],
+ 'height': key,
+ } for key, tn in episode['assets']['thumbnail'].items()],
+ 'duration': episode['duration'],
+ 'channel': episode['channel_title'],
+ 'channel_id': channel_slug,
+ 'channel_url': f'https://nebula.app/{channel_slug}',
+ 'uploader': episode['channel_title'],
+ 'uploader_id': channel_slug,
+ 'uploader_url': f'https://nebula.app/{channel_slug}',
+ 'series': episode['channel_title'],
+ 'creator': episode['channel_title'],
+ }
+ def _perform_login(self, username=None, password=None):
+ # FIXME: username should be passed from here to inner functions
+ self._nebula_api_token = self._retrieve_nebula_api_token()
+ self._nebula_bearer_token = self._fetch_nebula_bearer_token()
+ self._zype_access_token = self._fetch_zype_access_token()
-class NebulaIE(InfoExtractor):
+class NebulaIE(NebulaBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P<id>[-\w]+)'
_TESTS = [
{
@@ -30,12 +169,13 @@ class NebulaIE(InfoExtractor):
'upload_date': '20180731',
'timestamp': 1533009600,
'channel': 'Lindsay Ellis',
+ 'channel_id': 'lindsayellis',
'uploader': 'Lindsay Ellis',
+ 'uploader_id': 'lindsayellis',
},
'params': {
'usenetrc': True,
},
- 'skip': 'All Nebula content requires authentication',
},
{
'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
@@ -47,13 +187,14 @@ class NebulaIE(InfoExtractor):
'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
'upload_date': '20200327',
'timestamp': 1585348140,
- 'channel': 'The Logistics of D-Day',
- 'uploader': 'The Logistics of D-Day',
+ 'channel': 'Real Engineering',
+ 'channel_id': 'realengineering',
+ 'uploader': 'Real Engineering',
+ 'uploader_id': 'realengineering',
},
'params': {
'usenetrc': True,
},
- 'skip': 'All Nebula content requires authentication',
},
{
'url': 'https://nebula.app/videos/money-episode-1-the-draw',
@@ -66,173 +207,82 @@ class NebulaIE(InfoExtractor):
'upload_date': '20200323',
'timestamp': 1584980400,
'channel': 'Tom Scott Presents: Money',
+ 'channel_id': 'tom-scott-presents-money',
'uploader': 'Tom Scott Presents: Money',
+ 'uploader_id': 'tom-scott-presents-money',
},
'params': {
'usenetrc': True,
},
- 'skip': 'All Nebula content requires authentication',
},
{
'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
'only_matching': True,
},
]
- _NETRC_MACHINE = 'watchnebula'
- _nebula_token = None
+ def _fetch_video_metadata(self, slug):
+ return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/',
+ video_id=slug,
+ auth_type='bearer',
+ note='Fetching video meta data')
- def _retrieve_nebula_auth(self):
- """
- Log in to Nebula, and returns a Nebula API token
- """
+ def _real_extract(self, url):
+ slug = self._match_id(url)
+ video = self._fetch_video_metadata(slug)
+ return self._build_video_info(video)
- username, password = self._get_login_info()
- if not (username and password):
- self.raise_login_required()
- self.report_login()
- data = json.dumps({'email': username, 'password': password}).encode('utf8')
- response = self._download_json(
- 'https://api.watchnebula.com/api/v1/auth/login/',
- data=data, fatal=False, video_id=None,
- headers={
- 'content-type': 'application/json',
- # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
- 'cookie': ''
+class NebulaCollectionIE(NebulaBaseIE):
+ IE_NAME = 'nebula:collection'
+ _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/(?!videos/)(?P<id>[-\w]+)'
+ _TESTS = [
+ {
+ 'url': 'https://nebula.app/tom-scott-presents-money',
+ 'info_dict': {
+ 'id': 'tom-scott-presents-money',
+ 'title': 'Tom Scott Presents: Money',
+ 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
},
- note='Authenticating to Nebula with supplied credentials',
- errnote='Authentication failed or rejected')
- if not response or not response.get('key'):
- self.raise_login_required()
-
- # save nebula token as cookie
- self._set_cookie(
- 'nebula.app', 'nebula-auth',
- compat_urllib_parse_quote(
- json.dumps({
- "apiToken": response["key"],
- "isLoggingIn": False,
- "isLoggingOut": False,
- }, separators=(",", ":"))),
- expire_time=int(time.time()) + 86400 * 365,
- )
-
- return response['key']
-
- def _retrieve_zype_api_key(self, page_url, display_id):
- """
- Retrieves the Zype API key
- """
-
- # Find the js that has the API key from the webpage and download it
- webpage = self._download_webpage(page_url, video_id=display_id)
- main_script_relpath = self._search_regex(
- r'<script[^>]*src="(?P<script_relpath>[^"]*main.[0-9a-f]*.chunk.js)"[^>]*>', webpage,
- group='script_relpath', name='script relative path', fatal=True)
- main_script_abspath = urljoin(page_url, main_script_relpath)
- main_script = self._download_webpage(main_script_abspath, video_id=display_id,
- note='Retrieving Zype API key')
-
- api_key = self._search_regex(
- r'REACT_APP_ZYPE_API_KEY\s*:\s*"(?P<api_key>[\w-]*)"', main_script,
- group='api_key', name='API key', fatal=True)
-
- return api_key
-
- def _call_zype_api(self, path, params, video_id, api_key, note):
- """
- A helper for making calls to the Zype API.
- """
- query = {'api_key': api_key, 'per_page': 1}
- query.update(params)
- return self._download_json('https://api.zype.com' + path, video_id, query=query, note=note)
-
- def _call_nebula_api(self, path, video_id, access_token, note):
- """
- A helper for making calls to the Nebula API.
- """
- return self._download_json('https://api.watchnebula.com/api/v1' + path, video_id, headers={
- 'Authorization': 'Token {access_token}'.format(access_token=access_token)
- }, note=note)
-
- def _fetch_zype_access_token(self, video_id):
- try:
- user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token')
- except ExtractorError as exc:
- # if 401, attempt credential auth and retry
- if exc.cause and isinstance(exc.cause, HTTPError) and exc.cause.code == 401:
- self._nebula_token = self._retrieve_nebula_auth()
- user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token')
- else:
- raise
-
- access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], compat_str)
- if not access_token:
- if try_get(user_object, lambda x: x['is_subscribed'], bool):
- # TODO: Reimplement the same Zype token polling the Nebula frontend implements
- # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532
- raise ExtractorError(
- 'Unable to extract Zype access token from Nebula API authentication endpoint. '
- 'Open an arbitrary video in a browser with this account to generate a token',
- expected=True)
- raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint')
- return access_token
-
- def _extract_channel_title(self, video_meta):
- # TODO: Implement the API calls giving us the channel list,
- # so that we can do the title lookup and then figure out the channel URL
- categories = video_meta.get('categories', []) if video_meta else []
- # the channel name is the value of the first category
- for category in categories:
- if category.get('value'):
- return category['value'][0]
-
- def _real_initialize(self):
- # check cookie jar for valid token
- nebula_cookies = self._get_cookies('https://nebula.app')
- nebula_cookie = nebula_cookies.get('nebula-auth')
- if nebula_cookie:
- self.to_screen('Authenticating to Nebula with token from cookie jar')
- nebula_cookie_value = compat_urllib_parse_unquote(nebula_cookie.value)
- self._nebula_token = self._parse_json(nebula_cookie_value, None).get('apiToken')
+ 'playlist_count': 5,
+ 'params': {
+ 'usenetrc': True,
+ },
+ }, {
+ 'url': 'https://nebula.app/lindsayellis',
+ 'info_dict': {
+ 'id': 'lindsayellis',
+ 'title': 'Lindsay Ellis',
+ 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
+ },
+ 'playlist_mincount': 100,
+ 'params': {
+ 'usenetrc': True,
+ },
+ },
+ ]
- # try to authenticate using credentials if no valid token has been found
- if not self._nebula_token:
- self._nebula_token = self._retrieve_nebula_auth()
+ def _generate_playlist_entries(self, collection_id, channel):
+ episodes = channel['episodes']['results']
+ for page_num in itertools.count(2):
+ for episode in episodes:
+ yield self._build_video_info(episode)
+ next_url = channel['episodes']['next']
+ if not next_url:
+ break
+ channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer',
+ note=f'Retrieving channel page {page_num}')
+ episodes = channel['episodes']['results']
def _real_extract(self, url):
- display_id = self._match_id(url)
- api_key = self._retrieve_zype_api_key(url, display_id)
-
- response = self._call_zype_api('/videos', {'friendly_title': display_id},
- display_id, api_key, note='Retrieving metadata from Zype')
- if len(response.get('response') or []) != 1:
- raise ExtractorError('Unable to find video on Zype API')
- video_meta = response['response'][0]
-
- video_id = video_meta['_id']
- zype_access_token = self._fetch_zype_access_token(display_id)
+ collection_id = self._match_id(url)
+ channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/'
+ channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel')
+ channel_details = channel['details']
- channel_title = self._extract_channel_title(video_meta)
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- '_type': 'url_transparent',
- 'ie_key': 'Zype',
- 'url': 'https://player.zype.com/embed/%s.html?access_token=%s' % (video_id, zype_access_token),
- 'title': video_meta.get('title'),
- 'description': video_meta.get('description'),
- 'timestamp': parse_iso8601(video_meta.get('published_at')),
- 'thumbnails': [{
- 'id': tn.get('name'), # this appears to be null
- 'url': tn['url'],
- 'width': tn.get('width'),
- 'height': tn.get('height'),
- } for tn in video_meta.get('thumbnails', [])],
- 'duration': video_meta.get('duration'),
- 'channel': channel_title,
- 'uploader': channel_title, # we chose uploader = channel name
- # TODO: uploader_url, channel_id, channel_url
- }
+ return self.playlist_result(
+ entries=self._generate_playlist_entries(collection_id, channel),
+ playlist_id=collection_id,
+ playlist_title=channel_details['title'],
+ playlist_description=channel_details['description']
+ )