aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/jsinterp.py
diff options
context:
space:
mode:
Diffstat (limited to 'hypervideo_dl/jsinterp.py')
-rw-r--r--hypervideo_dl/jsinterp.py62
1 files changed, 46 insertions, 16 deletions
diff --git a/hypervideo_dl/jsinterp.py b/hypervideo_dl/jsinterp.py
index adc5a19..9d989ad 100644
--- a/hypervideo_dl/jsinterp.py
+++ b/hypervideo_dl/jsinterp.py
@@ -9,6 +9,7 @@ import re
from .utils import (
NO_DEFAULT,
ExtractorError,
+ function_with_repr,
js_to_json,
remove_quotes,
truncate_string,
@@ -19,7 +20,12 @@ from .utils import (
def _js_bit_op(op):
def zeroise(x):
- return 0 if x in (None, JS_Undefined) else x
+ if x in (None, JS_Undefined):
+ return 0
+ with contextlib.suppress(TypeError):
+ if math.isnan(x): # NB: NaN cannot be checked by membership
+ return 0
+ return x
def wrapped(a, b):
return op(zeroise(a), zeroise(b)) & 0xffffffff
@@ -38,7 +44,7 @@ def _js_arith_op(op):
def _js_div(a, b):
- if JS_Undefined in (a, b) or not (a and b):
+ if JS_Undefined in (a, b) or not (a or b):
return float('nan')
return (a or 0) / b if b else float('inf')
@@ -184,7 +190,8 @@ class Debugger:
cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion)
raise
if cls.ENABLED and stmt.strip():
- cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
+ if should_ret or not repr(ret) == stmt:
+ cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
return ret, should_ret
return interpret_statement
@@ -205,8 +212,6 @@ class JSInterpreter:
'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string
}
- _EXC_NAME = '__hypervideo_dl_exception__'
-
def __init__(self, code, objects=None):
self.code, self._functions = code, {}
self._objects = {} if objects is None else objects
@@ -220,6 +225,8 @@ class JSInterpreter:
def _named_object(self, namespace, obj):
self.__named_object_counter += 1
name = f'__hypervideo_dl_jsinterp_obj{self.__named_object_counter}'
+ if callable(obj) and not isinstance(obj, function_with_repr):
+ obj = function_with_repr(obj, f'F<{self.__named_object_counter}>')
namespace[name] = obj
return name
@@ -256,9 +263,11 @@ class JSInterpreter:
elif in_quote == '/' and char in '[]':
in_regex_char_group = char == '['
escaping = not escaping and in_quote and char == '\\'
- after_op = not in_quote and char in OP_CHARS or (char.isspace() and after_op)
+ in_unary_op = (not in_quote and not in_regex_char_group
+ and after_op not in (True, False) and char in '-+')
+ after_op = char if (not in_quote and char in OP_CHARS) else (char.isspace() and after_op)
- if char != delim[pos] or any(counters.values()) or in_quote:
+ if char != delim[pos] or any(counters.values()) or in_quote or in_unary_op:
pos = 0
continue
elif pos != delim_len:
@@ -343,7 +352,10 @@ class JSInterpreter:
inner, outer = self._separate(expr, expr[0], 1)
if expr[0] == '/':
flags, outer = self._regex_flags(outer)
- inner = re.compile(inner[1:], flags=flags)
+ # We don't support regex methods yet, so no point compiling it
+ inner = f'{inner}/{flags}'
+ # Avoid https://github.com/python/cpython/issues/74534
+ # inner = re.compile(inner[1:].replace('[[', r'[\['), flags=flags)
else:
inner = json.loads(js_to_json(f'{inner}{expr[0]}', strict=True))
if not outer:
@@ -354,11 +366,11 @@ class JSInterpreter:
obj = expr[4:]
if obj.startswith('Date('):
left, right = self._separate_at_paren(obj[4:])
- expr = unified_timestamp(
+ date = unified_timestamp(
self.interpret_expression(left, local_vars, allow_recursion), False)
- if not expr:
+ if date is None:
raise self.Exception(f'Failed to parse date {left!r}', expr)
- expr = self._dump(int(expr * 1000), local_vars) + right
+ expr = self._dump(int(date * 1000), local_vars) + right
else:
raise self.Exception(f'Unsupported object {obj}', expr)
@@ -402,10 +414,25 @@ class JSInterpreter:
m = re.match(r'''(?x)
(?P<try>try)\s*\{|
+ (?P<if>if)\s*\(|
(?P<switch>switch)\s*\(|
(?P<for>for)\s*\(
''', expr)
md = m.groupdict() if m else {}
+ if md.get('if'):
+ cndn, expr = self._separate_at_paren(expr[m.end() - 1:])
+ if_expr, expr = self._separate_at_paren(expr.lstrip())
+ # TODO: "else if" is not handled
+ else_expr = None
+ m = re.match(r'else\s*{', expr)
+ if m:
+ else_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
+ cndn = _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion))
+ ret, should_abort = self.interpret_statement(
+ if_expr if cndn else else_expr, local_vars, allow_recursion)
+ if should_abort:
+ return ret, True
+
if md.get('try'):
try_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
err = None
@@ -418,7 +445,7 @@ class JSInterpreter:
err = e
pending = (None, False)
- m = re.match(r'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{'.format(**globals()), expr)
+ m = re.match(fr'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{', expr)
if m:
sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
if err:
@@ -752,7 +779,7 @@ class JSInterpreter:
obj = {}
obj_m = re.search(
r'''(?x)
- (?<!this\.)%s\s*=\s*{\s*
+ (?<!\.)%s\s*=\s*{\s*
(?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
}\s*;
''' % (re.escape(objname), _FUNC_NAME_RE),
@@ -768,7 +795,8 @@ class JSInterpreter:
fields)
for f in fields_m:
argnames = f.group('args').split(',')
- obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code'))
+ name = remove_quotes(f.group('key'))
+ obj[name] = function_with_repr(self.build_function(argnames, f.group('code')), f'F<{name}>')
return obj
@@ -784,13 +812,15 @@ class JSInterpreter:
\((?P<args>[^)]*)\)\s*
(?P<code>{.+})''' % {'name': re.escape(funcname)},
self.code)
- code, _ = self._separate_at_paren(func_m.group('code'))
if func_m is None:
raise self.Exception(f'Could not find JS function "{funcname}"')
+ code, _ = self._separate_at_paren(func_m.group('code'))
return [x.strip() for x in func_m.group('args').split(',')], code
def extract_function(self, funcname):
- return self.extract_function_from_code(*self.extract_function_code(funcname))
+ return function_with_repr(
+ self.extract_function_from_code(*self.extract_function_code(funcname)),
+ f'F<{funcname}>')
def extract_function_from_code(self, argnames, code, *global_stack):
local_vars = {}