diff options
Diffstat (limited to 'yt_dlp/extractor/common.py')
-rw-r--r-- | yt_dlp/extractor/common.py | 18 |
1 files changed, 7 insertions, 11 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 093a9b5cd..3e3e55798 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1588,15 +1588,13 @@ class InfoExtractor: webpage, 'next.js data', fatal=fatal, **kw), video_id, transform_source=transform_source, fatal=fatal) - def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', return_full_data=False): - ''' Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function. ''' - # not all website do this, but it can be changed - # https://stackoverflow.com/questions/67463109/how-to-change-or-hide-nuxt-and-nuxt-keyword-in-page-source + def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)): + """Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function""" rectx = re.escape(context_name) + FUNCTION_RE = r'\(function\((?P<arg_keys>.*?)\){return\s+(?P<js>{.*?})\s*;?\s*}\((?P<arg_vals>.*?)\)' js, arg_keys, arg_vals = self._search_regex( - (r'<script>window\.%s=\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.+?)\)\);?</script>' % rectx, - r'%s\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)' % rectx), - webpage, context_name, group=['js', 'arg_keys', 'arg_vals']) + (rf'<script>\s*window\.{rectx}={FUNCTION_RE}\s*\)\s*;?\s*</script>', rf'{rectx}\(.*?{FUNCTION_RE}'), + webpage, context_name, group=('js', 'arg_keys', 'arg_vals'), fatal=fatal) args = dict(zip(arg_keys.split(','), arg_vals.split(','))) @@ -1604,10 +1602,8 @@ class InfoExtractor: if val in ('undefined', 'void 0'): args[key] = 'null' - ret = self._parse_json(js_to_json(js, args), video_id) - if return_full_data: - return ret - return ret['data'][0] + ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal) + return traverse_obj(ret, traverse) or {} @staticmethod def _hidden_inputs(html): |