diff options
author | Jesús <heckyel@hyperbola.info> | 2021-12-07 12:26:51 -0500 |
---|---|---|
committer | Jesús <heckyel@hyperbola.info> | 2021-12-07 12:26:51 -0500 |
commit | 495746b9a6d4d32ddfa39ed908092d90a7cd5f3f (patch) | |
tree | 4845e40905136556b7513b9f36e3a70e505ee4c9 /yt_dlp/extractor/common.py | |
parent | 25831c5572c6e1d45bc05a122312516e0d264f8d (diff) | |
parent | ddd24c99493483bde822944e8063064f53464ac1 (diff) | |
download | hypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.tar.lz hypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.tar.xz hypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.zip |
updated from upstream | 07/12/2021 at 12:26
Diffstat (limited to 'yt_dlp/extractor/common.py')
-rw-r--r-- | yt_dlp/extractor/common.py | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 2180f879c..d8fc5272c 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1513,6 +1513,24 @@ class InfoExtractor(object): webpage, 'next.js data', **kw), video_id, **kw) + def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__'): + ''' Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function. ''' + # not all website do this, but it can be changed + # https://stackoverflow.com/questions/67463109/how-to-change-or-hide-nuxt-and-nuxt-keyword-in-page-source + rectx = re.escape(context_name) + js, arg_keys, arg_vals = self._search_regex( + (r'<script>window\.%s=\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.+?)\)\);?</script>' % rectx, + r'%s\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)' % rectx), + webpage, context_name, group=['js', 'arg_keys', 'arg_vals']) + + args = dict(zip(arg_keys.split(','), arg_vals.split(','))) + + for key, val in args.items(): + if val in ('undefined', 'void 0'): + args[key] = 'null' + + return self._parse_json(js_to_json(js, args), video_id)['data'][0] + @staticmethod def _hidden_inputs(html): html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html) |