diff options
Diffstat (limited to 'hypervideo_dl/extractor/iqiyi.py')
-rw-r--r-- | hypervideo_dl/extractor/iqiyi.py | 35 |
1 files changed, 20 insertions, 15 deletions
diff --git a/hypervideo_dl/extractor/iqiyi.py b/hypervideo_dl/extractor/iqiyi.py index c41f6db..94bcad4 100644 --- a/hypervideo_dl/extractor/iqiyi.py +++ b/hypervideo_dl/extractor/iqiyi.py @@ -270,12 +270,14 @@ class IqIE(InfoExtractor): '1': 'zh_CN', '2': 'zh_TW', '3': 'en', - '4': 'kor', + '4': 'ko', + '5': 'ja', '18': 'th', '21': 'my', '23': 'vi', '24': 'id', '26': 'es', + '27': 'pt', '28': 'ar', } @@ -355,13 +357,16 @@ class IqIE(InfoExtractor): if player_js_cache: return player_js_cache webpack_js_url = self._proto_relative_url(self._search_regex( - r'<script src="((?:https?)?//stc.iqiyipic.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL')) + r'<script src="((?:https?:)?//stc\.iqiyipic\.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL')) webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS') - webpack_map1, webpack_map2 = [self._parse_json(js_map, video_id, transform_source=js_to_json) for js_map in self._search_regex( - r'\(({[^}]*})\[\w+\][^\)]*\)\s*\+\s*["\']\.["\']\s*\+\s*({[^}]*})\[\w+\]\+["\']\.js', webpack_js, 'JS locations', group=(1, 2))] - for module_index in reversed(list(webpack_map2.keys())): + webpack_map = self._search_json( + r'["\']\s*\+\s*', webpack_js, 'JS locations', video_id, + contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\da-f]+["\']\s*,?\s*)+}', + end_pattern=r'\[\w+\]\+["\']\.js', transform_source=js_to_json) + + for module_index in reversed(webpack_map): module_js = self._download_webpage( - f'https://stc.iqiyipic.com/_next/static/chunks/{webpack_map1.get(module_index, module_index)}.{webpack_map2[module_index]}.js', + f'https://stc.iqiyipic.com/_next/static/chunks/{module_index}.{webpack_map[module_index]}.js', video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or '' if 'vms request' in module_js: self.cache.store('iq', 'player_js', module_js) @@ -373,11 +378,11 @@ class IqIE(InfoExtractor): self._extract_vms_player_js(webpage, video_id), 'signature function') def _update_bid_tags(self, webpage, video_id): - extracted_bid_tags = self._parse_json( - self._search_regex( - r'arguments\[1\][^,]*,\s*function\s*\([^\)]*\)\s*{\s*"use strict";?\s*var \w=({.+}})\s*,\s*\w\s*=\s*{\s*getNewVd', - self._extract_vms_player_js(webpage, video_id), 'video tags', default=''), - video_id, transform_source=js_to_json, fatal=False) + extracted_bid_tags = self._search_json( + r'function\s*\([^)]*\)\s*\{\s*"use strict";?\s*var \w\s*=\s*', + self._extract_vms_player_js(webpage, video_id), 'video tags', video_id, + contains_pattern=r'{\s*\d+\s*:\s*\{\s*nbid\s*:.+}\s*}', + end_pattern=r'\s*,\s*\w\s*=\s*\{\s*getNewVd', fatal=False, transform_source=js_to_json) if not extracted_bid_tags: return self._BID_TAGS = { @@ -412,7 +417,7 @@ class IqIE(InfoExtractor): 'langCode': self._get_cookie('lang', 'en_us'), 'deviceId': self._get_cookie('QC005', '') }, fatal=False) - ut_list = traverse_obj(vip_data, ('data', 'all_vip', ..., 'vipType'), expected_type=str_or_none, default=[]) + ut_list = traverse_obj(vip_data, ('data', 'all_vip', ..., 'vipType'), expected_type=str_or_none) else: ut_list = ['0'] @@ -444,7 +449,7 @@ class IqIE(InfoExtractor): self.report_warning('This preview video is limited%s' % format_field(preview_time, None, ' to %s seconds')) # TODO: Extract audio-only formats - for bid in set(traverse_obj(initial_format_data, ('program', 'video', ..., 'bid'), expected_type=str_or_none, default=[])): + for bid in set(traverse_obj(initial_format_data, ('program', 'video', ..., 'bid'), expected_type=str_or_none)): dash_path = dash_paths.get(bid) if not dash_path: self.report_warning(f'Unknown format id: {bid}. It is currently not being extracted') @@ -455,7 +460,7 @@ class IqIE(InfoExtractor): fatal=False), 'data', expected_type=dict) video_format = traverse_obj(format_data, ('program', 'video', lambda _, v: str(v['bid']) == bid), - expected_type=dict, default=[], get_all=False) or {} + expected_type=dict, get_all=False) or {} extracted_formats = [] if video_format.get('m3u8Url'): extracted_formats.extend(self._extract_m3u8_formats( @@ -496,7 +501,7 @@ class IqIE(InfoExtractor): }) formats.extend(extracted_formats) - for sub_format in traverse_obj(initial_format_data, ('program', 'stl', ...), expected_type=dict, default=[]): + for sub_format in traverse_obj(initial_format_data, ('program', 'stl', ...), expected_type=dict): lang = self._LID_TAGS.get(str_or_none(sub_format.get('lid')), sub_format.get('_name')) subtitles.setdefault(lang, []).extend([{ 'ext': format_ext, |