diff options
| author | dirkf <fieldhouse@gmx.net> | 2022-08-31 23:22:48 +0100 | 
|---|---|---|
| committer | dirkf <fieldhouse@gmx.net> | 2022-09-01 10:57:12 +0100 | 
| commit | 55c823634db890a328ffc23588fcd6f35d9b3ddf (patch) | |
| tree | 22d6c4e803eff07cb82a23f9b4b519b10b504b5b | |
| parent | 4050e10a4c3445c5399239567eb074acb2f65c18 (diff) | |
[jsinterp] Handle new YT players 113ca41c, c57c113c
* add NaN
* allow any white-space character for `after_op`
* align with yt-dlp f26af78a8ac11d9d617ed31ea5282cfaa5bcbcfa (charcodeAt and bitwise overflow)
* allow escaping in regex, fixing player c57c113c
| -rw-r--r-- | test/test_jsinterp.py | 21 | ||||
| -rw-r--r-- | test/test_youtube_signature.py | 16 | ||||
| -rw-r--r-- | youtube_dl/jsinterp.py | 46 | 
3 files changed, 65 insertions, 18 deletions
| diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index fb4882d00..5121c8cf8 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -136,6 +136,11 @@ class TestJSInterpreter(unittest.TestCase):      def test_builtins(self):          jsi = JSInterpreter(''' +        function x() { return NaN } +        ''') +        self.assertTrue(math.isnan(jsi.call_function('x'))) + +        jsi = JSInterpreter('''          function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }          ''')          self.assertEqual(jsi.call_function('x'), 86000) @@ -385,6 +390,22 @@ class TestJSInterpreter(unittest.TestCase):          ''')          self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I) +    def test_char_code_at(self): +        jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}') +        self.assertEqual(jsi.call_function('x', 0), 116) +        self.assertEqual(jsi.call_function('x', 1), 101) +        self.assertEqual(jsi.call_function('x', 2), 115) +        self.assertEqual(jsi.call_function('x', 3), 116) +        self.assertEqual(jsi.call_function('x', 4), None) +        self.assertEqual(jsi.call_function('x', 'not_a_number'), 116) + +    def test_bitwise_operators_overflow(self): +        jsi = JSInterpreter('function x(){return -524999584 << 5}') +        self.assertEqual(jsi.call_function('x'), 379882496) + +        jsi = JSInterpreter('function x(){return 1236566549 << 5}') +        self.assertEqual(jsi.call_function('x'), 915423904) +  if __name__ == '__main__':      unittest.main() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 4bb0a30b0..ec914a871 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -112,9 +112,25 @@ _NSIG_TESTS = [          'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw',      ),      ( +        'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js', +        '5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw', +    ), +    (          'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js',          '5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ',      ), +    ( +        'https://www.youtube.com/s/player/c2199353/player_ias.vflset/en_US/base.js', +        '5EHDMgYLV6HPGk_Mu-kk', 'AD5rgS85EkrE7', +    ), +    ( +        'https://www.youtube.com/s/player/113ca41c/player_ias.vflset/en_US/base.js', +        'cgYl-tlYkhjT7A', 'hI7BBr2zUgcmMg', +    ), +    ( +        'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js', +        '-Txvy6bT5R6LqgnQNx', 'dcklJCnRUHbgSg', +    ),  ] diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 08726e478..d13329396 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -23,10 +23,11 @@ from .compat import (  def _js_bit_op(op): +    def zeroise(x): +        return 0 if x in (None, JS_Undefined) else x +      def wrapped(a, b): -        def zeroise(x): -            return 0 if x in (None, JS_Undefined) else x -        return op(zeroise(a), zeroise(b)) +        return op(zeroise(a), zeroise(b)) & 0xffffffff      return wrapped @@ -44,7 +45,7 @@ def _js_arith_op(op):  def _js_div(a, b):      if JS_Undefined in (a, b) or not (a and b):          return float('nan') -    return float('inf') if not b else operator.truediv(a or 0, b) +    return operator.truediv(a or 0, b) if b else float('inf')  def _js_mod(a, b): @@ -260,13 +261,14 @@ class JSInterpreter(object):                      counters[_MATCHING_PARENS[char]] += 1                  elif char in counters:                      counters[char] -= 1 -            if not escaping and char in _QUOTES and in_quote in (char, None): -                if in_quote or after_op or char != '/': -                    in_quote = None if in_quote and not in_regex_char_group else char -            elif in_quote == '/' and char in '[]': -                in_regex_char_group = char == '[' +            if not escaping: +                if char in _QUOTES and in_quote in (char, None): +                    if in_quote or after_op or char != '/': +                        in_quote = None if in_quote and not in_regex_char_group else char +                elif in_quote == '/' and char in '[]': +                    in_regex_char_group = char == '['              escaping = not escaping and in_quote and char == '\\' -            after_op = not in_quote and char in cls.OP_CHARS or (char == ' ' and after_op) +            after_op = not in_quote and (char in cls.OP_CHARS or (char.isspace() and after_op))              if char != delim[pos] or any(counters.values()) or in_quote:                  pos = skipping = 0 @@ -590,6 +592,8 @@ class JSInterpreter(object):          elif expr == 'undefined':              return JS_Undefined, should_return +        elif expr == 'NaN': +            return float('NaN'), should_return          elif md.get('return'):              return local_vars[m.group('name')], should_return @@ -635,7 +639,8 @@ class JSInterpreter(object):              def assertion(cndn, msg):                  """ assert, but without risk of getting optimized out """                  if not cndn: -                    raise ExtractorError('{member} {msg}'.format(**locals()), expr=expr) +                    memb = member +                    raise self.Exception('{member} {msg}'.format(**locals()), expr=expr)              def eval_method():                  if (variable, member) == ('console', 'debug'): @@ -737,6 +742,13 @@ class JSInterpreter(object):                          return obj.index(idx, start)                      except ValueError:                          return -1 +                elif member == 'charCodeAt': +                    assertion(isinstance(obj, compat_str), 'must be applied on a string') +                    # assertion(len(argvals) == 1, 'takes exactly one argument') # but not enforced +                    idx = argvals[0] if isinstance(argvals[0], int) else 0 +                    if idx >= len(obj): +                        return None +                    return ord(obj[idx])                  idx = int(member) if isinstance(obj, list) else member                  return obj[idx](argvals, allow_recursion=allow_recursion) @@ -820,12 +832,10 @@ class JSInterpreter(object):              if mobj is None:                  break              start, body_start = mobj.span() -            body, remaining = self._separate_at_paren(code[body_start - 1:]) -            name = self._named_object( -                local_vars, -                self.extract_function_from_code( -                    self.build_arglist(mobj.group('args')), -                    body, local_vars, *global_stack)) +            body, remaining = self._separate_at_paren(code[body_start - 1:], '}') +            name = self._named_object(local_vars, self.extract_function_from_code( +                [x.strip() for x in mobj.group('args').split(',')], +                body, local_vars, *global_stack))              code = code[:start] + name + remaining          return self.build_function(argnames, code, local_vars, *global_stack) @@ -854,7 +864,7 @@ class JSInterpreter(object):                  zip_longest(argnames, args, fillvalue=None))              global_stack[0].update(kwargs)              var_stack = LocalNameSpace(*global_stack) -            ret, should_abort = self.interpret_statement(code.replace('\n', ''), var_stack, allow_recursion - 1) +            ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)              if should_abort:                  return ret          return resf | 
