diff options
| author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2014-07-15 22:46:39 +0200 | 
|---|---|---|
| committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2014-07-15 22:46:39 +0200 | 
| commit | ad25aee2458527c38bdd3a8f1dec318ea807052c (patch) | |
| tree | be3828f023a6f32978d6c00798e46f0fabc078f3 | |
| parent | bd1f325b427eaea944b4b01ef4ee7c3559caac5c (diff) | |
[youtube & jsinterp] Fix signature extraction (fixes #3255)
Some functions are defined now inside an object, the jsinterp will search its definition if the variable is not defined in the local namespace.
| -rw-r--r-- | test/test_youtube_signature.py | 6 | ||||
| -rw-r--r-- | youtube_dl/jsinterp.py | 40 | 
2 files changed, 43 insertions, 3 deletions
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 8d46fe108..d95533959 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -34,6 +34,12 @@ _TESTS = [          u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',      ),      ( +        u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js', +        u'js', +        84, +        u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=', +    ), +    (          u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',          u'js',          u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 3bbb07704..ae5bca2e6 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -11,6 +11,7 @@ class JSInterpreter(object):      def __init__(self, code):          self.code = code          self._functions = {} +        self._objects = {}      def interpret_statement(self, stmt, local_vars, allow_recursion=20):          if allow_recursion < 0: @@ -55,7 +56,19 @@ class JSInterpreter(object):          m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)          if m:              member = m.group('member') -            val = local_vars[m.group('in')] +            variable = m.group('in') + +            if variable not in local_vars: +                if variable not in self._objects: +                    self._objects[variable] = self.extract_object(variable) +                obj = self._objects[variable] +                key, args = member.split('(', 1) +                args = args.strip(')') +                argvals = [int(v) if v.isdigit() else local_vars[v] +                           for v in args.split(',')] +                return obj[key](argvals) + +            val = local_vars[variable]              if member == 'split("")':                  return list(val)              if member == 'join("")': @@ -97,6 +110,25 @@ class JSInterpreter(object):              return self._functions[fname](argvals)          raise ExtractorError('Unsupported JS expression %r' % expr) +    def extract_object(self, objname): +        obj = {} +        obj_m = re.search( +            (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) + +            r'\s*(?P<fields>([a-zA-Z$]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' + +            r'\}\s*;', +            self.code) +        fields = obj_m.group('fields') +        # Currently, it only supports function definitions +        fields_m = re.finditer( +            r'(?P<key>[a-zA-Z$]+)\s*:\s*function' +            r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', +            fields) +        for f in fields_m: +            argnames = f.group('args').split(',') +            obj[f.group('key')] = self.build_function(argnames, f.group('code')) + +        return obj +      def extract_function(self, funcname):          func_m = re.search(              (r'(?:function %s|[{;]%s\s*=\s*function)' % ( @@ -107,10 +139,12 @@ class JSInterpreter(object):              raise ExtractorError('Could not find JS function %r' % funcname)          argnames = func_m.group('args').split(',') +        return self.build_function(argnames, func_m.group('code')) + +    def build_function(self, argnames, code):          def resf(args):              local_vars = dict(zip(argnames, args)) -            for stmt in func_m.group('code').split(';'): +            for stmt in code.split(';'):                  res = self.interpret_statement(stmt, local_vars)              return res          return resf -  | 
