aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThe Hatsune Daishi <nao20010128@gmail.com>2021-12-08 01:38:50 +0900
committerGitHub <noreply@github.com>2021-12-07 22:08:50 +0530
commit66f4c04e50d9213522095247666d3d90345ad5d1 (patch)
tree340e1cae3ba3343415dbb743dcaa1e71dc68896c
parent93864403ea7c982be9a78af38835ac0747ed12d1 (diff)
downloadhypervideo-pre-66f4c04e50d9213522095247666d3d90345ad5d1.tar.lz
hypervideo-pre-66f4c04e50d9213522095247666d3d90345ad5d1.tar.xz
hypervideo-pre-66f4c04e50d9213522095247666d3d90345ad5d1.zip
[extractor] Add `_search_nuxt_data` (#1921)
Authored by: nao20010128nao
-rw-r--r--yt_dlp/extractor/common.py18
-rw-r--r--yt_dlp/extractor/sovietscloset.py13
2 files changed, 19 insertions, 12 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 2180f879c..d8fc5272c 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1513,6 +1513,24 @@ class InfoExtractor(object):
webpage, 'next.js data', **kw),
video_id, **kw)
+ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__'):
+ ''' Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function. '''
+ # not all website do this, but it can be changed
+ # https://stackoverflow.com/questions/67463109/how-to-change-or-hide-nuxt-and-nuxt-keyword-in-page-source
+ rectx = re.escape(context_name)
+ js, arg_keys, arg_vals = self._search_regex(
+ (r'<script>window\.%s=\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.+?)\)\);?</script>' % rectx,
+ r'%s\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)' % rectx),
+ webpage, context_name, group=['js', 'arg_keys', 'arg_vals'])
+
+ args = dict(zip(arg_keys.split(','), arg_vals.split(',')))
+
+ for key, val in args.items():
+ if val in ('undefined', 'void 0'):
+ args[key] = 'null'
+
+ return self._parse_json(js_to_json(js, args), video_id)['data'][0]
+
@staticmethod
def _hidden_inputs(html):
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py
index 7df23759a..daf1c7450 100644
--- a/yt_dlp/extractor/sovietscloset.py
+++ b/yt_dlp/extractor/sovietscloset.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
- js_to_json,
try_get,
unified_timestamp
)
@@ -14,17 +13,7 @@ class SovietsClosetBaseIE(InfoExtractor):
def parse_nuxt_jsonp(self, nuxt_jsonp_url, video_id, name):
nuxt_jsonp = self._download_webpage(nuxt_jsonp_url, video_id, note=f'Downloading {name} __NUXT_JSONP__')
- js, arg_keys, arg_vals = self._search_regex(
- r'__NUXT_JSONP__\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)',
- nuxt_jsonp, '__NUXT_JSONP__', group=['js', 'arg_keys', 'arg_vals'])
-
- args = dict(zip(arg_keys.split(','), arg_vals.split(',')))
-
- for key, val in args.items():
- if val in ('undefined', 'void 0'):
- args[key] = 'null'
-
- return self._parse_json(js_to_json(js, args), video_id)['data'][0]
+ return self._search_nuxt_data(nuxt_jsonp, video_id, '__NUXT_JSONP__')
def video_meta(self, video_id, game_name, category_name, episode_number, stream_date):
title = game_name