aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/crunchyroll.py
diff options
context:
space:
mode:
authorJeff Huffman <tejing@tejing.com>2022-01-28 16:33:51 -0800
committerGitHub <noreply@github.com>2022-01-29 06:03:51 +0530
commit706dfe441b3cf01c0e2b294afc7d293211a74e94 (patch)
tree6cf0c6da0d573d76ac1cb1a448e05f0930e7f404 /yt_dlp/extractor/crunchyroll.py
parentc4da5ff971f22bf3c93ac521d6805f7fb561284b (diff)
downloadhypervideo-pre-706dfe441b3cf01c0e2b294afc7d293211a74e94.tar.lz
hypervideo-pre-706dfe441b3cf01c0e2b294afc7d293211a74e94.tar.xz
hypervideo-pre-706dfe441b3cf01c0e2b294afc7d293211a74e94.zip
[crunchyroll:beta] Add cookies support (#2506)
* Extract directly from the beta API when cookies are passed. If login cookie is absent, the extraction is delegated to `CrunchyrollIE`. This causes different metadata to be extracted (including formats and video id) and therefore results in a different archive entry. For now, this issue is unavoidable since the browser also redirects to the old site when not logged in. * Adds extractor-args `format` and `hardsub` to control the source and subtitles of the extracted formats Closes #1911 Authored by: tejing1
Diffstat (limited to 'yt_dlp/extractor/crunchyroll.py')
-rw-r--r--yt_dlp/extractor/crunchyroll.py123
1 files changed, 116 insertions, 7 deletions
diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py
index cd35728e5..5253e7e4b 100644
--- a/yt_dlp/extractor/crunchyroll.py
+++ b/yt_dlp/extractor/crunchyroll.py
@@ -1,6 +1,7 @@
# coding: utf-8
from __future__ import unicode_literals
+import base64
import re
import json
import zlib
@@ -23,13 +24,16 @@ from ..utils import (
bytes_to_intlist,
extract_attributes,
float_or_none,
+ format_field,
intlist_to_bytes,
int_or_none,
+ join_nonempty,
lowercase_escape,
merge_dicts,
qualities,
remove_end,
sanitized_Request,
+ traverse_obj,
try_get,
urlencode_postdata,
xpath_text,
@@ -733,13 +737,118 @@ class CrunchyrollBetaIE(CrunchyrollBaseIE):
def _real_extract(self, url):
lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'internal_id', 'id')
webpage = self._download_webpage(url, display_id)
- episode_data = self._parse_json(
- self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'episode data'),
- display_id)['content']['byId'][internal_id]
- video_id = episode_data['external_id'].split('.')[1]
- series_id = episode_data['episode_metadata']['series_slug_title']
- return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}',
- CrunchyrollIE.ie_key(), video_id)
+ initial_state = self._parse_json(
+ self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'initial state'),
+ display_id)
+ episode_data = initial_state['content']['byId'][internal_id]
+ if not self._get_cookies(url).get('etp_rt'):
+ video_id = episode_data['external_id'].split('.')[1]
+ series_id = episode_data['episode_metadata']['series_slug_title']
+ return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}',
+ CrunchyrollIE.ie_key(), video_id)
+
+ app_config = self._parse_json(
+ self._search_regex(r'__APP_CONFIG__\s*=\s*({.+?})\s*;', webpage, 'app config'),
+ display_id)
+ client_id = app_config['cxApiParams']['accountAuthClientId']
+ api_domain = app_config['cxApiParams']['apiDomain']
+ basic_token = str(base64.b64encode(('%s:' % client_id).encode('ascii')), 'ascii')
+ auth_response = self._download_json(
+ f'{api_domain}/auth/v1/token', display_id,
+ note='Authenticating with cookie',
+ headers={
+ 'Authorization': 'Basic ' + basic_token
+ }, data='grant_type=etp_rt_cookie'.encode('ascii'))
+ policy_response = self._download_json(
+ f'{api_domain}/index/v2', display_id,
+ note='Retrieving signed policy',
+ headers={
+ 'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']
+ })
+ bucket = policy_response['cms']['bucket']
+ params = {
+ 'Policy': policy_response['cms']['policy'],
+ 'Signature': policy_response['cms']['signature'],
+ 'Key-Pair-Id': policy_response['cms']['key_pair_id']
+ }
+ locale = traverse_obj(initial_state, ('localization', 'locale'))
+ if locale:
+ params['locale'] = locale
+ episode_response = self._download_json(
+ f'{api_domain}/cms/v2{bucket}/episodes/{internal_id}', display_id,
+ note='Retrieving episode metadata',
+ query=params)
+ if episode_response.get('is_premium_only') and not episode_response.get('playback'):
+ raise ExtractorError('This video is for premium members only.', expected=True)
+ stream_response = self._download_json(
+ episode_response['playback'], display_id,
+ note='Retrieving stream info')
+
+ thumbnails = []
+ for thumbnails_data in traverse_obj(episode_response, ('images', 'thumbnail')):
+ for thumbnail_data in thumbnails_data:
+ thumbnails.append({
+ 'url': thumbnail_data.get('source'),
+ 'width': thumbnail_data.get('width'),
+ 'height': thumbnail_data.get('height'),
+ })
+ subtitles = {}
+ for lang, subtitle_data in stream_response.get('subtitles').items():
+ subtitles[lang] = [{
+ 'url': subtitle_data.get('url'),
+ 'ext': subtitle_data.get('format')
+ }]
+
+ requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
+ hardsub_preference = qualities(requested_hardsubs[::-1])
+ requested_formats = self._configuration_arg('format') or ['adaptive_hls']
+
+ formats = []
+ for stream_type, streams in stream_response.get('streams', {}).items():
+ if stream_type not in requested_formats:
+ continue
+ for stream in streams.values():
+ hardsub_lang = stream.get('hardsub_locale') or ''
+ if hardsub_lang.lower() not in requested_hardsubs:
+ continue
+ format_id = join_nonempty(
+ stream_type,
+ format_field(stream, 'hardsub_locale', 'hardsub-%s'))
+ if not stream.get('url'):
+ continue
+ if stream_type.split('_')[-1] == 'hls':
+ adaptive_formats = self._extract_m3u8_formats(
+ stream['url'], display_id, 'mp4', m3u8_id=format_id,
+ note='Downloading %s information' % format_id,
+ fatal=False)
+ elif stream_type.split('_')[-1] == 'dash':
+ adaptive_formats = self._extract_mpd_formats(
+ stream['url'], display_id, mpd_id=format_id,
+ note='Downloading %s information' % format_id,
+ fatal=False)
+ for f in adaptive_formats:
+ if f.get('acodec') != 'none':
+ f['language'] = stream_response.get('audio_locale')
+ f['quality'] = hardsub_preference(hardsub_lang.lower())
+ formats.extend(adaptive_formats)
+ self._sort_formats(formats)
+
+ return {
+ 'id': internal_id,
+ 'title': '%s Episode %s – %s' % (episode_response.get('season_title'), episode_response.get('episode'), episode_response.get('title')),
+ 'description': episode_response.get('description').replace(r'\r\n', '\n'),
+ 'duration': float_or_none(episode_response.get('duration_ms'), 1000),
+ 'thumbnails': thumbnails,
+ 'series': episode_response.get('series_title'),
+ 'series_id': episode_response.get('series_id'),
+ 'season': episode_response.get('season_title'),
+ 'season_id': episode_response.get('season_id'),
+ 'season_number': episode_response.get('season_number'),
+ 'episode': episode_response.get('title'),
+ 'episode_number': episode_response.get('sequence_number'),
+ 'subtitles': subtitles,
+ 'formats': formats
+ }
class CrunchyrollBetaShowIE(CrunchyrollBaseIE):