diff options
author | bashonly <88596187+bashonly@users.noreply.github.com> | 2023-01-02 05:50:37 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-02 05:50:37 +0000 |
commit | d7f98714696a4c9691ed28fb9b63395b9227646a (patch) | |
tree | e5c0c94fde031def5ea0ba1f12b6ebfc18cf8fd6 | |
parent | 13f930abc0c91d8e50336488e4c55defe97aa588 (diff) | |
download | hypervideo-pre-d7f98714696a4c9691ed28fb9b63395b9227646a.tar.lz hypervideo-pre-d7f98714696a4c9691ed28fb9b63395b9227646a.tar.xz hypervideo-pre-d7f98714696a4c9691ed28fb9b63395b9227646a.zip |
[extractor/iqiyi] Fix `Iq` JS regex (#5922)
Closes #5702
Authored by: bashonly
-rw-r--r-- | yt_dlp/extractor/iqiyi.py | 21 |
1 files changed, 12 insertions, 9 deletions
diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index dbc688fb9..eba89f787 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -527,11 +527,14 @@ class IqIE(InfoExtractor): webpack_js_url = self._proto_relative_url(self._search_regex( r'<script src="((?:https?)?//stc.iqiyipic.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL')) webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS') - webpack_map1, webpack_map2 = [self._parse_json(js_map, video_id, transform_source=js_to_json) for js_map in self._search_regex( - r'\(({[^}]*})\[\w+\][^\)]*\)\s*\+\s*["\']\.["\']\s*\+\s*({[^}]*})\[\w+\]\+["\']\.js', webpack_js, 'JS locations', group=(1, 2))] - for module_index in reversed(list(webpack_map2.keys())): + webpack_map = self._search_json( + r'["\']\s*\+\s*', webpack_js, 'JS locations', video_id, + contains_pattern=r'{\s*(?:\d+\s*:\s*["\'][\da-f]+["\']\s*,?\s*)+}', + end_pattern=r'\[\w+\]\+["\']\.js', transform_source=js_to_json) + + for module_index in reversed(webpack_map): module_js = self._download_webpage( - f'https://stc.iqiyipic.com/_next/static/chunks/{webpack_map1.get(module_index, module_index)}.{webpack_map2[module_index]}.js', + f'https://stc.iqiyipic.com/_next/static/chunks/{module_index}.{webpack_map[module_index]}.js', video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or '' if 'vms request' in module_js: self.cache.store('iq', 'player_js', module_js) @@ -543,11 +546,11 @@ class IqIE(InfoExtractor): self._extract_vms_player_js(webpage, video_id), 'signature function') def _update_bid_tags(self, webpage, video_id): - extracted_bid_tags = self._parse_json( - self._search_regex( - r'arguments\[1\][^,]*,\s*function\s*\([^\)]*\)\s*{\s*"use strict";?\s*var \w=({.+}})\s*,\s*\w\s*=\s*{\s*getNewVd', - self._extract_vms_player_js(webpage, video_id), 'video tags', default=''), - video_id, transform_source=js_to_json, fatal=False) + extracted_bid_tags = self._search_json( + r'function\s*\([^)]*\)\s*\{\s*"use strict";?\s*var \w\s*=\s*', + self._extract_vms_player_js(webpage, video_id), 'video tags', video_id, + contains_pattern=r'{\s*\d+\s*:\s*\{\s*nbid\s*:.+}\s*}', + end_pattern=r'\s*,\s*\w\s*=\s*\{\s*getNewVd', fatal=False, transform_source=js_to_json) if not extracted_bid_tags: return self._BID_TAGS = { |