diff options
Diffstat (limited to 'youtube_dl/swfinterp.py')
-rw-r--r-- | youtube_dl/swfinterp.py | 264 |
1 files changed, 242 insertions, 22 deletions
diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index b63c65b20..e60505ace 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -4,8 +4,8 @@ import collections import io import zlib +from .compat import compat_str from .utils import ( - compat_str, ExtractorError, struct_unpack, ) @@ -62,15 +62,17 @@ class _ScopeDict(dict): class _AVMClass(object): - def __init__(self, name_idx, name): + def __init__(self, name_idx, name, static_properties=None): self.name_idx = name_idx self.name = name self.method_names = {} self.method_idxs = {} self.methods = {} self.method_pyfunctions = {} + self.static_properties = static_properties if static_properties else {} self.variables = _ScopeDict(self) + self.constants = {} def make_object(self): return _AVMClass_Object(self) @@ -148,8 +150,38 @@ def _read_byte(reader): return res +StringClass = _AVMClass('(no name idx)', 'String') +ByteArrayClass = _AVMClass('(no name idx)', 'ByteArray') +TimerClass = _AVMClass('(no name idx)', 'Timer') +TimerEventClass = _AVMClass('(no name idx)', 'TimerEvent', {'TIMER': 'timer'}) +_builtin_classes = { + StringClass.name: StringClass, + ByteArrayClass.name: ByteArrayClass, + TimerClass.name: TimerClass, + TimerEventClass.name: TimerEventClass, +} + + +class _Undefined(object): + def __bool__(self): + return False + __nonzero__ = __bool__ + + def __hash__(self): + return 0 + + def __str__(self): + return 'undefined' + __repr__ = __str__ + +undefined = _Undefined() + + class SWFInterpreter(object): def __init__(self, file_contents): + self._patched_functions = { + (TimerClass, 'addEventListener'): lambda params: undefined, + } code_tag = next(tag for tag_code, tag in _extract_tags(file_contents) if tag_code == 82) @@ -170,11 +202,13 @@ class SWFInterpreter(object): # Constant pool int_count = u30() + self.constant_ints = [0] for _c in range(1, int_count): - s32() + self.constant_ints.append(s32()) + self.constant_uints = [0] uint_count = u30() for _c in range(1, uint_count): - u32() + self.constant_uints.append(u32()) double_count = u30() read_bytes(max(0, (double_count - 1)) * 8) string_count = u30() @@ -212,6 +246,10 @@ class SWFInterpreter(object): u30() # namespace_idx name_idx = u30() self.multinames.append(self.constant_strings[name_idx]) + elif kind == 0x09: + name_idx = u30() + u30() + self.multinames.append(self.constant_strings[name_idx]) else: self.multinames.append(_Multiname(kind)) for _c2 in range(MULTINAME_SIZES[kind]): @@ -258,13 +296,28 @@ class SWFInterpreter(object): kind = kind_full & 0x0f attrs = kind_full >> 4 methods = {} - if kind in [0x00, 0x06]: # Slot or Const + constants = None + if kind == 0x00: # Slot u30() # Slot id u30() # type_name_idx vindex = u30() if vindex != 0: read_byte() # vkind - elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter + elif kind == 0x06: # Const + u30() # Slot id + u30() # type_name_idx + vindex = u30() + vkind = 'any' + if vindex != 0: + vkind = read_byte() + if vkind == 0x03: # Constant_Int + value = self.constant_ints[vindex] + elif vkind == 0x04: # Constant_UInt + value = self.constant_uints[vindex] + else: + return {}, None # Ignore silently for now + constants = {self.multinames[trait_name_idx]: value} + elif kind in (0x01, 0x02, 0x03): # Method / Getter / Setter u30() # disp_id method_idx = u30() methods[self.multinames[trait_name_idx]] = method_idx @@ -283,7 +336,7 @@ class SWFInterpreter(object): for _c3 in range(metadata_count): u30() # metadata index - return methods + return methods, constants # Classes class_count = u30() @@ -305,18 +358,22 @@ class SWFInterpreter(object): u30() # iinit trait_count = u30() for _c2 in range(trait_count): - trait_methods = parse_traits_info() + trait_methods, trait_constants = parse_traits_info() avm_class.register_methods(trait_methods) + if trait_constants: + avm_class.constants.update(trait_constants) assert len(classes) == class_count self._classes_by_name = dict((c.name, c) for c in classes) for avm_class in classes: - u30() # cinit + avm_class.cinit_idx = u30() trait_count = u30() for _c2 in range(trait_count): - trait_methods = parse_traits_info() + trait_methods, trait_constants = parse_traits_info() avm_class.register_methods(trait_methods) + if trait_constants: + avm_class.constants.update(trait_constants) # Scripts script_count = u30() @@ -329,6 +386,7 @@ class SWFInterpreter(object): # Method bodies method_body_count = u30() Method = collections.namedtuple('Method', ['code', 'local_count']) + self._all_methods = [] for _c in range(method_body_count): method_idx = u30() u30() # max_stack @@ -337,9 +395,10 @@ class SWFInterpreter(object): u30() # max_scope_depth code_length = u30() code = read_bytes(code_length) + m = Method(code, local_count) + self._all_methods.append(m) for avm_class in classes: if method_idx in avm_class.method_idxs: - m = Method(code, local_count) avm_class.methods[avm_class.method_idxs[method_idx]] = m exception_count = u30() for _c2 in range(exception_count): @@ -354,13 +413,27 @@ class SWFInterpreter(object): assert p + code_reader.tell() == len(code_tag) - def extract_class(self, class_name): + def patch_function(self, avm_class, func_name, f): + self._patched_functions[(avm_class, func_name)] = f + + def extract_class(self, class_name, call_cinit=True): try: - return self._classes_by_name[class_name] + res = self._classes_by_name[class_name] except KeyError: raise ExtractorError('Class %r not found' % class_name) + if call_cinit and hasattr(res, 'cinit_idx'): + res.register_methods({'$cinit': res.cinit_idx}) + res.methods['$cinit'] = self._all_methods[res.cinit_idx] + cinit = self.extract_function(res, '$cinit') + cinit([]) + + return res + def extract_function(self, avm_class, func_name): + p = self._patched_functions.get((avm_class, func_name)) + if p: + return p if func_name in avm_class.method_pyfunctions: return avm_class.method_pyfunctions[func_name] if func_name in self._classes_by_name: @@ -379,10 +452,15 @@ class SWFInterpreter(object): registers = [avm_class.variables] + list(args) + [None] * m.local_count stack = [] scopes = collections.deque([ - self._classes_by_name, avm_class.variables]) + self._classes_by_name, avm_class.constants, avm_class.variables]) while True: opcode = _read_byte(coder) - if opcode == 17: # iftrue + if opcode == 9: # label + pass # Spec says: "Do nothing." + elif opcode == 16: # jump + offset = s24() + coder.seek(coder.tell() + offset) + elif opcode == 17: # iftrue offset = s24() value = stack.pop() if value: @@ -392,9 +470,40 @@ class SWFInterpreter(object): value = stack.pop() if not value: coder.seek(coder.tell() + offset) + elif opcode == 19: # ifeq + offset = s24() + value2 = stack.pop() + value1 = stack.pop() + if value2 == value1: + coder.seek(coder.tell() + offset) + elif opcode == 20: # ifne + offset = s24() + value2 = stack.pop() + value1 = stack.pop() + if value2 != value1: + coder.seek(coder.tell() + offset) + elif opcode == 21: # iflt + offset = s24() + value2 = stack.pop() + value1 = stack.pop() + if value1 < value2: + coder.seek(coder.tell() + offset) + elif opcode == 32: # pushnull + stack.append(None) + elif opcode == 33: # pushundefined + stack.append(undefined) elif opcode == 36: # pushbyte v = _read_byte(coder) stack.append(v) + elif opcode == 37: # pushshort + v = u30() + stack.append(v) + elif opcode == 38: # pushtrue + stack.append(True) + elif opcode == 39: # pushfalse + stack.append(False) + elif opcode == 40: # pushnan + stack.append(float('NaN')) elif opcode == 42: # dup value = stack[-1] stack.append(value) @@ -419,11 +528,31 @@ class SWFInterpreter(object): [stack.pop() for _ in range(arg_count)])) obj = stack.pop() - if isinstance(obj, _AVMClass_Object): + if obj == StringClass: + if mname == 'String': + assert len(args) == 1 + assert isinstance(args[0], ( + int, compat_str, _Undefined)) + if args[0] == undefined: + res = 'undefined' + else: + res = compat_str(args[0]) + stack.append(res) + continue + else: + raise NotImplementedError( + 'Function String.%s is not yet implemented' + % mname) + elif isinstance(obj, _AVMClass_Object): func = self.extract_function(obj.avm_class, mname) res = func(args) stack.append(res) continue + elif isinstance(obj, _AVMClass): + func = self.extract_function(obj, mname) + res = func(args) + stack.append(res) + continue elif isinstance(obj, _ScopeDict): if mname in obj.avm_class.method_names: func = self.extract_function(obj.avm_class, mname) @@ -442,6 +571,13 @@ class SWFInterpreter(object): res = obj.split(args[0]) stack.append(res) continue + elif mname == 'charCodeAt': + assert len(args) <= 1 + idx = 0 if len(args) == 0 else args[0] + assert isinstance(idx, int) + res = ord(obj[idx]) + stack.append(res) + continue elif isinstance(obj, list): if mname == 'slice': assert len(args) == 1 @@ -458,9 +594,18 @@ class SWFInterpreter(object): raise NotImplementedError( 'Unsupported property %r on %r' % (mname, obj)) + elif opcode == 71: # returnvoid + res = undefined + return res elif opcode == 72: # returnvalue res = stack.pop() return res + elif opcode == 73: # constructsuper + # Not yet implemented, just hope it works without it + arg_count = u30() + args = list(reversed( + [stack.pop() for _ in range(arg_count)])) + obj = stack.pop() elif opcode == 74: # constructproperty index = u30() arg_count = u30() @@ -481,6 +626,17 @@ class SWFInterpreter(object): args = list(reversed( [stack.pop() for _ in range(arg_count)])) obj = stack.pop() + if isinstance(obj, _AVMClass_Object): + func = self.extract_function(obj.avm_class, mname) + res = func(args) + assert res is undefined + continue + if isinstance(obj, _ScopeDict): + assert mname in obj.avm_class.method_names + func = self.extract_function(obj.avm_class, mname) + res = func(args) + assert res is undefined + continue if mname == 'reverse': assert isinstance(obj, list) obj.reverse() @@ -504,7 +660,10 @@ class SWFInterpreter(object): break else: res = scopes[0] - stack.append(res[mname]) + if mname not in res and mname in _builtin_classes: + stack.append(_builtin_classes[mname]) + else: + stack.append(res[mname]) elif opcode == 94: # findproperty index = u30() mname = self.multinames[index] @@ -524,9 +683,15 @@ class SWFInterpreter(object): break else: scope = avm_class.variables - # I cannot find where static variables are initialized - # so let's just return None - res = scope.get(mname) + + if mname in scope: + res = scope[mname] + elif mname in _builtin_classes: + res = _builtin_classes[mname] + else: + # Assume unitialized + # TODO warn here + res = undefined stack.append(res) elif opcode == 97: # setproperty index = u30() @@ -548,22 +713,57 @@ class SWFInterpreter(object): pname = self.multinames[index] if pname == 'length': obj = stack.pop() - assert isinstance(obj, list) + assert isinstance(obj, (compat_str, list)) stack.append(len(obj)) + elif isinstance(pname, compat_str): # Member access + obj = stack.pop() + if isinstance(obj, _AVMClass): + res = obj.static_properties[pname] + stack.append(res) + continue + + assert isinstance(obj, (dict, _ScopeDict)),\ + 'Accessing member %r on %r' % (pname, obj) + res = obj.get(pname, undefined) + stack.append(res) else: # Assume attribute access idx = stack.pop() assert isinstance(idx, int) obj = stack.pop() assert isinstance(obj, list) stack.append(obj[idx]) + elif opcode == 104: # initproperty + index = u30() + value = stack.pop() + idx = self.multinames[index] + if isinstance(idx, _Multiname): + idx = stack.pop() + obj = stack.pop() + obj[idx] = value elif opcode == 115: # convert_ value = stack.pop() intvalue = int(value) stack.append(intvalue) elif opcode == 128: # coerce u30() + elif opcode == 130: # coerce_a + value = stack.pop() + # um, yes, it's any value + stack.append(value) elif opcode == 133: # coerce_s assert isinstance(stack[-1], (type(None), compat_str)) + elif opcode == 147: # decrement + value = stack.pop() + assert isinstance(value, int) + stack.append(value - 1) + elif opcode == 149: # typeof + value = stack.pop() + return { + _Undefined: 'undefined', + compat_str: 'String', + int: 'Number', + float: 'Number', + }[type(value)] elif opcode == 160: # add value2 = stack.pop() value1 = stack.pop() @@ -574,16 +774,37 @@ class SWFInterpreter(object): value1 = stack.pop() res = value1 - value2 stack.append(res) + elif opcode == 162: # multiply + value2 = stack.pop() + value1 = stack.pop() + res = value1 * value2 + stack.append(res) elif opcode == 164: # modulo value2 = stack.pop() value1 = stack.pop() res = value1 % value2 stack.append(res) + elif opcode == 168: # bitand + value2 = stack.pop() + value1 = stack.pop() + assert isinstance(value1, int) + assert isinstance(value2, int) + res = value1 & value2 + stack.append(res) + elif opcode == 171: # equals + value2 = stack.pop() + value1 = stack.pop() + result = value1 == value2 + stack.append(result) elif opcode == 175: # greaterequals value2 = stack.pop() value1 = stack.pop() result = value1 >= value2 stack.append(result) + elif opcode == 192: # increment_i + value = stack.pop() + assert isinstance(value, int) + stack.append(value + 1) elif opcode == 208: # getlocal_0 stack.append(registers[0]) elif opcode == 209: # getlocal_1 @@ -606,4 +827,3 @@ class SWFInterpreter(object): avm_class.method_pyfunctions[func_name] = resfunc return resfunc - |