aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/jsinterp.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/jsinterp.py')
-rw-r--r--youtube_dl/jsinterp.py140
1 files changed, 121 insertions, 19 deletions
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 7835187f5..69c8f77ca 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -1,10 +1,12 @@
# coding: utf-8
from __future__ import unicode_literals
+import calendar
import itertools
import json
import operator
import re
+import time
from functools import update_wrapper, wraps
@@ -12,8 +14,10 @@ from .utils import (
error_to_compat_str,
ExtractorError,
float_or_none,
+ int_or_none,
js_to_json,
remove_quotes,
+ str_or_none,
unified_timestamp,
variadic,
write_string,
@@ -24,6 +28,8 @@ from .compat import (
compat_collections_chain_map as ChainMap,
compat_contextlib_suppress,
compat_filter as filter,
+ compat_int,
+ compat_integer_types,
compat_itertools_zip_longest as zip_longest,
compat_map as map,
compat_numeric_types,
@@ -70,14 +76,27 @@ class JS_Undefined(object):
pass
-def _js_bit_op(op):
+def _js_bit_op(op, is_shift=False):
- def zeroise(x):
- return 0 if x in (None, JS_Undefined, _NaN, _Infinity) else x
+ def zeroise(x, is_shift_arg=False):
+ if isinstance(x, compat_integer_types):
+ return (x % 32) if is_shift_arg else (x & 0xffffffff)
+ try:
+ x = float(x)
+ if is_shift_arg:
+ x = int(x % 32)
+ elif x < 0:
+ x = -compat_int(-x % 0xffffffff)
+ else:
+ x = compat_int(x % 0xffffffff)
+ except (ValueError, TypeError):
+ # also here for int(NaN), including float('inf') % 32
+ x = 0
+ return x
@wraps_op(op)
def wrapped(a, b):
- return op(zeroise(a), zeroise(b)) & 0xffffffff
+ return op(zeroise(a), zeroise(b, is_shift)) & 0xffffffff
return wrapped
@@ -135,6 +154,7 @@ def _js_to_primitive(v):
)
+# more exact: yt-dlp/yt-dlp#12110
def _js_toString(v):
return (
'undefined' if v is JS_Undefined
@@ -143,7 +163,7 @@ def _js_toString(v):
else 'null' if v is None
# bool <= int: do this first
else ('false', 'true')[v] if isinstance(v, bool)
- else '{0:.7f}'.format(v).rstrip('.0') if isinstance(v, compat_numeric_types)
+ else re.sub(r'(?<=\d)\.?0*$', '', '{0:.7f}'.format(v)) if isinstance(v, compat_numeric_types)
else _js_to_primitive(v))
@@ -253,8 +273,8 @@ def _js_typeof(expr):
# avoid dict to maintain order
# definition None => Defined in JSInterpreter._operator
_OPERATORS = (
- ('>>', _js_bit_op(operator.rshift)),
- ('<<', _js_bit_op(operator.lshift)),
+ ('>>', _js_bit_op(operator.rshift, True)),
+ ('<<', _js_bit_op(operator.lshift, True)),
('+', _js_add),
('-', _js_arith_op(operator.sub)),
('*', _js_arith_op(operator.mul)),
@@ -389,6 +409,7 @@ class JSInterpreter(object):
class Exception(ExtractorError):
def __init__(self, msg, *args, **kwargs):
expr = kwargs.pop('expr', None)
+ msg = str_or_none(msg, default='"None"')
if expr is not None:
msg = '{0} in: {1!r:.100}'.format(msg.rstrip(), expr)
super(JSInterpreter.Exception, self).__init__(msg, *args, **kwargs)
@@ -416,6 +437,7 @@ class JSInterpreter(object):
flags, _ = self.regex_flags(flags)
# First, avoid https://github.com/python/cpython/issues/74534
self.__self = None
+ pattern_txt = str_or_none(pattern_txt) or '(?:)'
self.__pattern_txt = pattern_txt.replace('[[', r'[\[')
self.__flags = flags
@@ -460,6 +482,73 @@ class JSInterpreter(object):
flags |= cls.RE_FLAGS[ch]
return flags, expr[idx + 1:]
+ class JS_Date(object):
+ _t = None
+
+ @staticmethod
+ def __ymd_etc(*args, **kw_is_utc):
+ # args: year, monthIndex, day, hours, minutes, seconds, milliseconds
+ is_utc = kw_is_utc.get('is_utc', False)
+
+ args = list(args[:7])
+ args += [0] * (9 - len(args))
+ args[1] += 1 # month 0..11 -> 1..12
+ ms = args[6]
+ for i in range(6, 9):
+ args[i] = -1 # don't know
+ if is_utc:
+ args[-1] = 1
+ # TODO: [MDN] When a segment overflows or underflows its expected
+ # range, it usually "carries over to" or "borrows from" the higher segment.
+ try:
+ mktime = calendar.timegm if is_utc else time.mktime
+ return mktime(time.struct_time(args)) * 1000 + ms
+ except (OverflowError, ValueError):
+ return None
+
+ @classmethod
+ def UTC(cls, *args):
+ t = cls.__ymd_etc(*args, is_utc=True)
+ return _NaN if t is None else t
+
+ @staticmethod
+ def parse(date_str, **kw_is_raw):
+ is_raw = kw_is_raw.get('is_raw', False)
+
+ t = unified_timestamp(str_or_none(date_str), False)
+ return int(t * 1000) if t is not None else t if is_raw else _NaN
+
+ @staticmethod
+ def now(**kw_is_raw):
+ is_raw = kw_is_raw.get('is_raw', False)
+
+ t = time.time()
+ return int(t * 1000) if t is not None else t if is_raw else _NaN
+
+ def __init__(self, *args):
+ if not args:
+ args = [self.now(is_raw=True)]
+ if len(args) == 1:
+ if isinstance(args[0], JSInterpreter.JS_Date):
+ self._t = int_or_none(args[0].valueOf(), default=None)
+ else:
+ arg_type = _js_typeof(args[0])
+ if arg_type == 'string':
+ self._t = self.parse(args[0], is_raw=True)
+ elif arg_type == 'number':
+ self._t = int(args[0])
+ else:
+ self._t = self.__ymd_etc(*args)
+
+ def toString(self):
+ try:
+ return time.strftime('%a %b %0d %Y %H:%M:%S %Z%z', self._t).rstrip()
+ except TypeError:
+ return "Invalid Date"
+
+ def valueOf(self):
+ return _NaN if self._t is None else self._t
+
@classmethod
def __op_chars(cls):
op_chars = set(';,[')
@@ -584,18 +673,21 @@ class JSInterpreter(object):
except Exception as e:
raise self.Exception('Failed to evaluate {left_val!r:.50} {op} {right_val!r:.50}'.format(**locals()), expr, cause=e)
- def _index(self, obj, idx, allow_undefined=True):
+ def _index(self, obj, idx, allow_undefined=None):
if idx == 'length' and isinstance(obj, list):
return len(obj)
try:
return obj[int(idx)] if isinstance(obj, list) else obj[compat_str(idx)]
- except (TypeError, KeyError, IndexError) as e:
- if allow_undefined:
- # when is not allowed?
+ except (TypeError, KeyError, IndexError, ValueError) as e:
+ # allow_undefined is None gives correct behaviour
+ if allow_undefined or (
+ allow_undefined is None and not isinstance(e, TypeError)):
return JS_Undefined
raise self.Exception('Cannot get index {idx!r:.100}'.format(**locals()), expr=repr(obj), cause=e)
def _dump(self, obj, namespace):
+ if obj is JS_Undefined:
+ return 'undefined'
try:
return json.dumps(obj)
except TypeError:
@@ -700,7 +792,7 @@ class JSInterpreter(object):
new_kw, _, obj = expr.partition('new ')
if not new_kw:
- for klass, konstr in (('Date', lambda x: int(unified_timestamp(x, False) * 1000)),
+ for klass, konstr in (('Date', lambda *x: self.JS_Date(*x).valueOf()),
('RegExp', self.JS_RegExp),
('Error', self.Exception)):
if not obj.startswith(klass + '('):
@@ -948,6 +1040,10 @@ class JSInterpreter(object):
left_val = self._index(left_val, idx)
if isinstance(idx, float):
idx = int(idx)
+ if isinstance(left_val, list) and len(left_val) <= int_or_none(idx, default=-1):
+ # JS Array is a sparsely assignable list
+ # TODO: handle extreme sparsity without memory bloat, eg using auxiliary dict
+ left_val.extend((idx - len(left_val) + 1) * [JS_Undefined])
left_val[idx] = self._operator(
m.group('op'), self._index(left_val, idx) if m.group('op') else None,
m.group('expr'), expr, local_vars, allow_recursion)
@@ -1019,6 +1115,7 @@ class JSInterpreter(object):
'String': compat_str,
'Math': float,
'Array': list,
+ 'Date': self.JS_Date,
}
obj = local_vars.get(variable)
if obj in (JS_Undefined, None):
@@ -1071,6 +1168,8 @@ class JSInterpreter(object):
assertion(len(argvals) == 2, 'takes two arguments')
return argvals[0] ** argvals[1]
raise self.Exception('Unsupported Math method ' + member, expr=expr)
+ elif obj is self.JS_Date:
+ return getattr(obj, member)(*argvals)
if member == 'split':
assertion(len(argvals) <= 2, 'takes at most two arguments')
@@ -1111,9 +1210,10 @@ class JSInterpreter(object):
elif member == 'join':
assertion(isinstance(obj, list), 'must be applied on a list')
assertion(len(argvals) <= 1, 'takes at most one argument')
- return (',' if len(argvals) == 0 else argvals[0]).join(
- ('' if x in (None, JS_Undefined) else _js_toString(x))
- for x in obj)
+ return (',' if len(argvals) == 0 or argvals[0] in (None, JS_Undefined)
+ else argvals[0]).join(
+ ('' if x in (None, JS_Undefined) else _js_toString(x))
+ for x in obj)
elif member == 'reverse':
assertion(not argvals, 'does not take any arguments')
obj.reverse()
@@ -1271,19 +1371,21 @@ class JSInterpreter(object):
code, _ = self._separate_at_paren(func_m.group('code')) # refine the match
return self.build_arglist(func_m.group('args')), code
- def extract_function(self, funcname):
+ def extract_function(self, funcname, *global_stack):
return function_with_repr(
- self.extract_function_from_code(*self.extract_function_code(funcname)),
+ self.extract_function_from_code(*itertools.chain(
+ self.extract_function_code(funcname), global_stack)),
'F<%s>' % (funcname,))
def extract_function_from_code(self, argnames, code, *global_stack):
local_vars = {}
+ start = None
while True:
- mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code)
+ mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code[start:])
if mobj is None:
break
- start, body_start = mobj.span()
+ start, body_start = ((start or 0) + x for x in mobj.span())
body, remaining = self._separate_at_paren(code[body_start - 1:])
name = self._named_object(local_vars, self.extract_function_from_code(
[x.strip() for x in mobj.group('args').split(',')],