From 5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs?= Date: Mon, 18 Oct 2021 15:24:21 -0500 Subject: update from upstream --- hypervideo_dl/extractor/crunchyroll.py | 133 +++++++++++++++++++++++++-------- 1 file changed, 102 insertions(+), 31 deletions(-) (limited to 'hypervideo_dl/extractor/crunchyroll.py') diff --git a/hypervideo_dl/extractor/crunchyroll.py b/hypervideo_dl/extractor/crunchyroll.py index bc2d1fa..511ac1b 100644 --- a/hypervideo_dl/extractor/crunchyroll.py +++ b/hypervideo_dl/extractor/crunchyroll.py @@ -29,6 +29,7 @@ from ..utils import ( merge_dicts, remove_end, sanitized_Request, + try_get, urlencode_postdata, xpath_text, ) @@ -120,7 +121,7 @@ class CrunchyrollBaseIE(InfoExtractor): class CrunchyrollIE(CrunchyrollBaseIE, VRVIE): IE_NAME = 'crunchyroll' - _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P[0-9]+))(?:[/?&]|$)' + _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P[0-9]+))(?:[/?&]|$)' _TESTS = [{ 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', 'info_dict': { @@ -412,8 +413,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text return subtitles def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('video_id') + mobj = self._match_valid_url(url) + video_id = mobj.group('id') if mobj.group('prefix') == 'm': mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage') @@ -428,7 +429,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text r'
(.+?)
', webpage, 'trailer-notice', default='') if note_m: - raise ExtractorError(note_m) + raise ExtractorError(note_m, expected=True) mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P{.+?})\]\)', webpage) if mobj: @@ -458,6 +459,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text video_description = (self._parse_json(self._html_search_regex( r']*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id, webpage, 'description', default='{}'), video_id) or media_metadata).get('description') + + thumbnails = [] + thumbnail_url = (self._parse_json(self._html_search_regex( + r'