aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/extractor/crunchyroll.py
diff options
context:
space:
mode:
authorJesús <heckyel@hyperbola.info>2021-10-18 15:24:21 -0500
committerJesús <heckyel@hyperbola.info>2021-10-18 15:24:21 -0500
commit5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e (patch)
tree65209bc739db35e31f1c9b5b868eb5df4fe12ae3 /hypervideo_dl/extractor/crunchyroll.py
parent27fe903c511691c078942bef5ee9a05a43b15c8f (diff)
downloadhypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.tar.lz
hypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.tar.xz
hypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.zip
update from upstream
Diffstat (limited to 'hypervideo_dl/extractor/crunchyroll.py')
-rw-r--r--hypervideo_dl/extractor/crunchyroll.py133
1 files changed, 102 insertions, 31 deletions
diff --git a/hypervideo_dl/extractor/crunchyroll.py b/hypervideo_dl/extractor/crunchyroll.py
index bc2d1fa..511ac1b 100644
--- a/hypervideo_dl/extractor/crunchyroll.py
+++ b/hypervideo_dl/extractor/crunchyroll.py
@@ -29,6 +29,7 @@ from ..utils import (
merge_dicts,
remove_end,
sanitized_Request,
+ try_get,
urlencode_postdata,
xpath_text,
)
@@ -120,7 +121,7 @@ class CrunchyrollBaseIE(InfoExtractor):
class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
IE_NAME = 'crunchyroll'
- _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
+ _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<id>[0-9]+))(?:[/?&]|$)'
_TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
'info_dict': {
@@ -412,8 +413,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
return subtitles
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('video_id')
+ mobj = self._match_valid_url(url)
+ video_id = mobj.group('id')
if mobj.group('prefix') == 'm':
mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')
@@ -428,7 +429,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
r'<div class="showmedia-trailer-notice">(.+?)</div>',
webpage, 'trailer-notice', default='')
if note_m:
- raise ExtractorError(note_m)
+ raise ExtractorError(note_m, expected=True)
mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage)
if mobj:
@@ -458,6 +459,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
video_description = (self._parse_json(self._html_search_regex(
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
+
+ thumbnails = []
+ thumbnail_url = (self._parse_json(self._html_search_regex(
+ r'<script type="application\/ld\+json">\n\s*(.+?)<\/script>',
+ webpage, 'thumbnail_url', default='{}'), video_id)).get('image')
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': 1920,
+ 'height': 1080
+ })
+
if video_description:
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
video_uploader = self._html_search_regex(
@@ -473,15 +486,11 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
stream.get('url'), video_id, stream.get('format'),
audio_lang, hardsub_lang)
for f in vrv_formats:
- if not hardsub_lang:
- f['preference'] = 1
- language_preference = 0
- if audio_lang == language:
- language_preference += 1
- if hardsub_lang == language:
- language_preference += 1
- if language_preference:
- f['language_preference'] = language_preference
+ f['language_preference'] = 1 if audio_lang == language else 0
+ f['quality'] = (
+ 1 if not hardsub_lang
+ else 0 if hardsub_lang == language
+ else -1)
formats.extend(vrv_formats)
if not formats:
available_fmts = []
@@ -571,7 +580,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'ext': 'flv',
})
formats.append(format_info)
- self._sort_formats(formats, ('preference', 'language_preference', 'height', 'width', 'tbr', 'fps'))
+ self._sort_formats(formats)
metadata = self._call_rpc_api(
'VideoPlayer_GetMediaMetadata', video_id,
@@ -596,21 +605,25 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
webpage, 'series', fatal=False)
- season = episode = episode_number = duration = thumbnail = None
+ season = episode = episode_number = duration = None
if isinstance(metadata, compat_etree_Element):
season = xpath_text(metadata, 'series_title')
episode = xpath_text(metadata, 'episode_title')
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
duration = float_or_none(media_metadata.get('duration'), 1000)
- thumbnail = xpath_text(metadata, 'episode_image_url')
if not episode:
episode = media_metadata.get('title')
if not episode_number:
episode_number = int_or_none(media_metadata.get('episode_number'))
- if not thumbnail:
- thumbnail = media_metadata.get('thumbnail', {}).get('url')
+ thumbnail_url = try_get(media, lambda x: x['thumbnail']['url'])
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': 640,
+ 'height': 360
+ })
season_number = int_or_none(self._search_regex(
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
@@ -623,7 +636,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
'title': video_title,
'description': video_description,
'duration': duration,
- 'thumbnail': thumbnail,
+ 'thumbnails': thumbnails,
'uploader': video_uploader,
'series': series,
'season': season,
@@ -637,10 +650,10 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
IE_NAME = 'crunchyroll:playlist'
- _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
+ _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:\w{1,2}/)?(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
_TESTS = [{
- 'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
+ 'url': 'https://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
'info_dict': {
'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
@@ -659,28 +672,86 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
# geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14
'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1',
'only_matching': True,
+ }, {
+ 'url': 'http://www.crunchyroll.com/fr/ladies-versus-butlers',
+ 'only_matching': True,
}]
def _real_extract(self, url):
show_id = self._match_id(url)
webpage = self._download_webpage(
- self._add_skip_wall(url), show_id,
+ # https:// gives a 403, but http:// does not
+ self._add_skip_wall(url).replace('https://', 'http://'), show_id,
headers=self.geo_verification_headers())
title = self._html_search_meta('name', webpage, default=None)
- episode_paths = re.findall(
- r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"',
- webpage)
- entries = [
- self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll', ep_id)
- for ep_id, ep in episode_paths
- ]
- entries.reverse()
+ episode_re = r'<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"'
+ season_re = r'<a [^>]+season-dropdown[^>]+>([^<]+)'
+ paths = re.findall(f'(?s){episode_re}|{season_re}', webpage)
+
+ entries, current_season = [], None
+ for ep_id, ep, season in paths:
+ if season:
+ current_season = season
+ continue
+ entries.append(self.url_result(
+ f'http://www.crunchyroll.com{ep}', CrunchyrollIE.ie_key(), ep_id, season=current_season))
return {
'_type': 'playlist',
'id': show_id,
'title': title,
- 'entries': entries,
+ 'entries': reversed(entries),
}
+
+
+class CrunchyrollBetaIE(CrunchyrollBaseIE):
+ IE_NAME = 'crunchyroll:beta'
+ _VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{1,2}/)?)watch/(?P<internal_id>\w+)/(?P<id>[\w\-]+)/?(?:\?|$)'
+ _TESTS = [{
+ 'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y/to-the-future',
+ 'info_dict': {
+ 'id': '696363',
+ 'ext': 'mp4',
+ 'timestamp': 1459610100,
+ 'description': 'md5:a022fbec4fbb023d43631032c91ed64b',
+ 'uploader': 'Toei Animation',
+ 'title': 'World Trigger Episode 73 – To the Future',
+ 'upload_date': '20160402',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ 'expected_warnings': ['Unable to download XML']
+ }]
+
+ def _real_extract(self, url):
+ lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'internal_id', 'id')
+ webpage = self._download_webpage(url, display_id)
+ episode_data = self._parse_json(
+ self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'episode data'),
+ display_id)['content']['byId'][internal_id]
+ video_id = episode_data['external_id'].split('.')[1]
+ series_id = episode_data['episode_metadata']['series_slug_title']
+ return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}',
+ CrunchyrollIE.ie_key(), video_id)
+
+
+class CrunchyrollBetaShowIE(CrunchyrollBaseIE):
+ IE_NAME = 'crunchyroll:playlist:beta'
+ _VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{1,2}/)?)series/\w+/(?P<id>[\w\-]+)/?(?:\?|$)'
+ _TESTS = [{
+ 'url': 'https://beta.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA',
+ 'info_dict': {
+ 'id': 'girl-friend-beta',
+ 'title': 'Girl Friend BETA',
+ },
+ 'playlist_mincount': 10,
+ }, {
+ 'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR/Girl-Friend-BETA',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ lang, series_id = self._match_valid_url(url).group('lang', 'id')
+ return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id.lower()}',
+ CrunchyrollShowPlaylistIE.ie_key(), series_id)