aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/extractor/iqiyi.py
diff options
context:
space:
mode:
Diffstat (limited to 'hypervideo_dl/extractor/iqiyi.py')
-rw-r--r--hypervideo_dl/extractor/iqiyi.py35
1 files changed, 20 insertions, 15 deletions
diff --git a/hypervideo_dl/extractor/iqiyi.py b/hypervideo_dl/extractor/iqiyi.py
index c41f6db..94bcad4 100644
--- a/hypervideo_dl/extractor/iqiyi.py
+++ b/hypervideo_dl/extractor/iqiyi.py
@@ -270,12 +270,14 @@ class IqIE(InfoExtractor):
'1': 'zh_CN',
'2': 'zh_TW',
'3': 'en',
- '4': 'kor',
+ '4': 'ko',
+ '5': 'ja',
'18': 'th',
'21': 'my',
'23': 'vi',
'24': 'id',
'26': 'es',
+ '27': 'pt',
'28': 'ar',
}
@@ -355,13 +357,16 @@ class IqIE(InfoExtractor):
if player_js_cache:
return player_js_cache
webpack_js_url = self._proto_relative_url(self._search_regex(
- r'<script src="((?:https?)?//stc.iqiyipic.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
+ r'<script src="((?:https?:)?//stc\.iqiyipic\.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS')
- webpack_map1, webpack_map2 = [self._parse_json(js_map, video_id, transform_source=js_to_json) for js_map in self._search_regex(
- r'\(({[^}]*})\[\w+\][^\)]*\)\s*\+\s*["\']\.["\']\s*\+\s*({[^}]*})\[\w+\]\+["\']\.js', webpack_js, 'JS locations', group=(1, 2))]
- for module_index in reversed(list(webpack_map2.keys())):
+ webpack_map = self._search_json(
+ r'["\']\s*\+\s*', webpack_js, 'JS locations', video_id,
+ contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\da-f]+["\']\s*,?\s*)+}',
+ end_pattern=r'\[\w+\]\+["\']\.js', transform_source=js_to_json)
+
+ for module_index in reversed(webpack_map):
module_js = self._download_webpage(
- f'https://stc.iqiyipic.com/_next/static/chunks/{webpack_map1.get(module_index, module_index)}.{webpack_map2[module_index]}.js',
+ f'https://stc.iqiyipic.com/_next/static/chunks/{module_index}.{webpack_map[module_index]}.js',
video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or ''
if 'vms request' in module_js:
self.cache.store('iq', 'player_js', module_js)
@@ -373,11 +378,11 @@ class IqIE(InfoExtractor):
self._extract_vms_player_js(webpage, video_id), 'signature function')
def _update_bid_tags(self, webpage, video_id):
- extracted_bid_tags = self._parse_json(
- self._search_regex(
- r'arguments\[1\][^,]*,\s*function\s*\([^\)]*\)\s*{\s*"use strict";?\s*var \w=({.+}})\s*,\s*\w\s*=\s*{\s*getNewVd',
- self._extract_vms_player_js(webpage, video_id), 'video tags', default=''),
- video_id, transform_source=js_to_json, fatal=False)
+ extracted_bid_tags = self._search_json(
+ r'function\s*\([^)]*\)\s*\{\s*"use strict";?\s*var \w\s*=\s*',
+ self._extract_vms_player_js(webpage, video_id), 'video tags', video_id,
+ contains_pattern=r'{\s*\d+\s*:\s*\{\s*nbid\s*:.+}\s*}',
+ end_pattern=r'\s*,\s*\w\s*=\s*\{\s*getNewVd', fatal=False, transform_source=js_to_json)
if not extracted_bid_tags:
return
self._BID_TAGS = {
@@ -412,7 +417,7 @@ class IqIE(InfoExtractor):
'langCode': self._get_cookie('lang', 'en_us'),
'deviceId': self._get_cookie('QC005', '')
}, fatal=False)
- ut_list = traverse_obj(vip_data, ('data', 'all_vip', ..., 'vipType'), expected_type=str_or_none, default=[])
+ ut_list = traverse_obj(vip_data, ('data', 'all_vip', ..., 'vipType'), expected_type=str_or_none)
else:
ut_list = ['0']
@@ -444,7 +449,7 @@ class IqIE(InfoExtractor):
self.report_warning('This preview video is limited%s' % format_field(preview_time, None, ' to %s seconds'))
# TODO: Extract audio-only formats
- for bid in set(traverse_obj(initial_format_data, ('program', 'video', ..., 'bid'), expected_type=str_or_none, default=[])):
+ for bid in set(traverse_obj(initial_format_data, ('program', 'video', ..., 'bid'), expected_type=str_or_none)):
dash_path = dash_paths.get(bid)
if not dash_path:
self.report_warning(f'Unknown format id: {bid}. It is currently not being extracted')
@@ -455,7 +460,7 @@ class IqIE(InfoExtractor):
fatal=False), 'data', expected_type=dict)
video_format = traverse_obj(format_data, ('program', 'video', lambda _, v: str(v['bid']) == bid),
- expected_type=dict, default=[], get_all=False) or {}
+ expected_type=dict, get_all=False) or {}
extracted_formats = []
if video_format.get('m3u8Url'):
extracted_formats.extend(self._extract_m3u8_formats(
@@ -496,7 +501,7 @@ class IqIE(InfoExtractor):
})
formats.extend(extracted_formats)
- for sub_format in traverse_obj(initial_format_data, ('program', 'stl', ...), expected_type=dict, default=[]):
+ for sub_format in traverse_obj(initial_format_data, ('program', 'stl', ...), expected_type=dict):
lang = self._LID_TAGS.get(str_or_none(sub_format.get('lid')), sub_format.get('_name'))
subtitles.setdefault(lang, []).extend([{
'ext': format_ext,