From 5425626790a46f9b5bdecf4e33bb254c4c2423ea Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 18 Jul 2014 10:24:28 +0200 Subject: [youtube] Move swfinterp into its own file --- youtube_dl/swfinterp.py | 503 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 503 insertions(+) create mode 100644 youtube_dl/swfinterp.py (limited to 'youtube_dl/swfinterp.py') diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py new file mode 100644 index 000000000..1cd292138 --- /dev/null +++ b/youtube_dl/swfinterp.py @@ -0,0 +1,503 @@ +from __future__ import unicode_literals + +import collections +import io +import struct +import zlib + +from .utils import ExtractorError + + +def _extract_tags(content): + pos = 0 + while pos < len(content): + header16 = struct.unpack('> 6 + tag_len = header16 & 0x3f + if tag_len == 0x3f: + tag_len = struct.unpack('= 0x80) else b'\x00' + return struct.unpack('!i', first_byte + bs) + + +def _read_string(reader): + slen = _u30(reader) + resb = reader.read(slen) + assert len(resb) == slen + return resb.decode('utf-8') + + +def _read_bytes(count, reader): + if reader is None: + reader = code_reader + resb = reader.read(count) + assert len(resb) == count + return resb + + +def _read_byte(reader): + resb = _read_bytes(1, reader=reader) + res = struct.unpack('> 4 + methods = {} + if kind in [0x00, 0x06]: # Slot or Const + u30() # Slot id + u30() # type_name_idx + vindex = u30() + if vindex != 0: + read_byte() # vkind + elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter + u30() # disp_id + method_idx = u30() + methods[self.multinames[trait_name_idx]] = method_idx + elif kind == 0x04: # Class + u30() # slot_id + u30() # classi + elif kind == 0x05: # Function + u30() # slot_id + function_idx = u30() + methods[function_idx] = self.multinames[trait_name_idx] + else: + raise ExtractorError('Unsupported trait kind %d' % kind) + + if attrs & 0x4 != 0: # Metadata present + metadata_count = u30() + for _c3 in range(metadata_count): + u30() # metadata index + + return methods + + # Classes + class_count = u30() + classes = [] + for class_id in range(class_count): + name_idx = u30() + classes.append(_AVMClass(name_idx, self.multinames[name_idx])) + u30() # super_name idx + flags = read_byte() + if flags & 0x08 != 0: # Protected namespace is present + u30() # protected_ns_idx + intrf_count = u30() + for _c2 in range(intrf_count): + u30() + u30() # iinit + trait_count = u30() + for _c2 in range(trait_count): + parse_traits_info() + assert len(classes) == class_count + self._classes_by_name = dict((c.name, c) for c in classes) + + for avm_class in classes: + u30() # cinit + trait_count = u30() + for _c2 in range(trait_count): + trait_methods = parse_traits_info() + avm_class.method_names.update(trait_methods.items()) + avm_class.method_idxs.update(dict( + (idx, name) + for name, idx in trait_methods.items())) + + # Scripts + script_count = u30() + for _c in range(script_count): + u30() # init + trait_count = u30() + for _c2 in range(trait_count): + parse_traits_info() + + # Method bodies + method_body_count = u30() + Method = collections.namedtuple('Method', ['code', 'local_count']) + for _c in range(method_body_count): + method_idx = u30() + u30() # max_stack + local_count = u30() + u30() # init_scope_depth + u30() # max_scope_depth + code_length = u30() + code = read_bytes(code_length) + for avm_class in classes: + if method_idx in avm_class.method_idxs: + m = Method(code, local_count) + avm_class.methods[avm_class.method_idxs[method_idx]] = m + exception_count = u30() + for _c2 in range(exception_count): + u30() # from + u30() # to + u30() # target + u30() # exc_type + u30() # var_name + trait_count = u30() + for _c2 in range(trait_count): + parse_traits_info() + + assert p + code_reader.tell() == len(code_tag) + + def extract_class(self, class_name): + try: + return self._classes_by_name[class_name] + except KeyError: + raise ExtractorError('Class %r not found' % class_name) + + def extract_function(self, avm_class, func_name): + if func_name in avm_class.method_pyfunctions: + return avm_class.method_pyfunctions[func_name] + if func_name in self._classes_by_name: + return self._classes_by_name[func_name].make_object() + if func_name not in avm_class.methods: + raise ExtractorError('Cannot find function %r' % func_name) + m = avm_class.methods[func_name] + + def resfunc(args): + # Helper functions + coder = io.BytesIO(m.code) + s24 = lambda: _s24(coder) + u30 = lambda: _u30(coder) + + print('Invoking %s.%s(%r)' % (avm_class.name, func_name, tuple(args))) + registers = ['(this)'] + list(args) + [None] * m.local_count + stack = [] + while True: + opcode = _read_byte(coder) + print('opcode: %r, stack(%d): %r' % (opcode, len(stack), stack)) + if opcode == 17: # iftrue + offset = s24() + value = stack.pop() + if value: + coder.seek(coder.tell() + offset) + elif opcode == 36: # pushbyte + v = _read_byte(coder) + stack.append(v) + elif opcode == 44: # pushstring + idx = u30() + stack.append(constant_strings[idx]) + elif opcode == 48: # pushscope + # We don't implement the scope register, so we'll just + # ignore the popped value + new_scope = stack.pop() + elif opcode == 70: # callproperty + index = u30() + mname = self.multinames[index] + arg_count = u30() + args = list(reversed( + [stack.pop() for _ in range(arg_count)])) + obj = stack.pop() + if mname == 'split': + assert len(args) == 1 + assert isinstance(args[0], compat_str) + assert isinstance(obj, compat_str) + if args[0] == '': + res = list(obj) + else: + res = obj.split(args[0]) + stack.append(res) + elif mname == 'slice': + assert len(args) == 1 + assert isinstance(args[0], int) + assert isinstance(obj, list) + res = obj[args[0]:] + stack.append(res) + elif mname == 'join': + assert len(args) == 1 + assert isinstance(args[0], compat_str) + assert isinstance(obj, list) + res = args[0].join(obj) + stack.append(res) + elif mname in avm_class.method_pyfunctions: + stack.append(avm_class.method_pyfunctions[mname](args)) + else: + raise NotImplementedError( + 'Unsupported property %r on %r' + % (mname, obj)) + elif opcode == 72: # returnvalue + res = stack.pop() + return res + elif opcode == 74: # constructproperty + index = u30() + arg_count = u30() + args = list(reversed( + [stack.pop() for _ in range(arg_count)])) + obj = stack.pop() + + mname = self.multinames[index] + construct_method = self.extract_function( + obj.avm_class, mname) + # We do not actually call the constructor for now; + # we just pretend it does nothing + stack.append(obj) + elif opcode == 79: # callpropvoid + index = u30() + mname = self.multinames[index] + arg_count = u30() + args = list(reversed( + [stack.pop() for _ in range(arg_count)])) + obj = stack.pop() + if mname == 'reverse': + assert isinstance(obj, list) + obj.reverse() + else: + raise NotImplementedError( + 'Unsupported (void) property %r on %r' + % (mname, obj)) + elif opcode == 86: # newarray + arg_count = u30() + arr = [] + for i in range(arg_count): + arr.append(stack.pop()) + arr = arr[::-1] + stack.append(arr) + elif opcode == 93: # findpropstrict + index = u30() + mname = self.multinames[index] + res = self.extract_function(avm_class, mname) + stack.append(res) + elif opcode == 94: # findproperty + index = u30() + mname = self.multinames[index] + res = avm_class.variables.get(mname) + stack.append(res) + elif opcode == 96: # getlex + index = u30() + mname = self.multinames[index] + res = avm_class.variables.get(mname, None) + stack.append(res) + elif opcode == 97: # setproperty + index = u30() + value = stack.pop() + idx = self.multinames[index] + obj = stack.pop() + obj[idx] = value + elif opcode == 98: # getlocal + index = u30() + stack.append(registers[index]) + elif opcode == 99: # setlocal + index = u30() + value = stack.pop() + registers[index] = value + elif opcode == 102: # getproperty + index = u30() + pname = self.multinames[index] + if pname == 'length': + obj = stack.pop() + assert isinstance(obj, list) + stack.append(len(obj)) + else: # Assume attribute access + idx = stack.pop() + assert isinstance(idx, int) + obj = stack.pop() + assert isinstance(obj, list) + stack.append(obj[idx]) + elif opcode == 128: # coerce + u30() + elif opcode == 133: # coerce_s + assert isinstance(stack[-1], (type(None), compat_str)) + elif opcode == 164: # modulo + value2 = stack.pop() + value1 = stack.pop() + res = value1 % value2 + stack.append(res) + elif opcode == 175: # greaterequals + value2 = stack.pop() + value1 = stack.pop() + result = value1 >= value2 + stack.append(result) + elif opcode == 208: # getlocal_0 + stack.append(registers[0]) + elif opcode == 209: # getlocal_1 + stack.append(registers[1]) + elif opcode == 210: # getlocal_2 + stack.append(registers[2]) + elif opcode == 211: # getlocal_3 + stack.append(registers[3]) + elif opcode == 214: # setlocal_2 + registers[2] = stack.pop() + elif opcode == 215: # setlocal_3 + registers[3] = stack.pop() + else: + raise NotImplementedError( + 'Unsupported opcode %d' % opcode) + + avm_class.method_pyfunctions[func_name] = resfunc + return resfunc + -- cgit v1.2.3 From 0cb2056304178ae8944e84c5bc72f96102291a12 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 18 Jul 2014 14:20:34 +0200 Subject: [swfinterp] Start working on basic tests --- youtube_dl/swfinterp.py | 56 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 17 deletions(-) (limited to 'youtube_dl/swfinterp.py') diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 1cd292138..49fade364 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -8,8 +8,22 @@ import zlib from .utils import ExtractorError -def _extract_tags(content): - pos = 0 +def _extract_tags(file_contents): + if file_contents[1:3] != b'WS': + raise ExtractorError( + 'Not an SWF file; header is %r' % file_contents[:3]) + if file_contents[:1] == b'C': + content = zlib.decompress(file_contents[8:]) + else: + raise NotImplementedError( + 'Unsupported compression format %r' % + file_contents[:1]) + + # Determine number of bits in framesize rectangle + framesize_nbits = struct.unpack('!B', content[:1])[0] >> 3 + framesize_len = (5 + 4 * framesize_nbits + 7) // 8 + + pos = framesize_len + 2 + 2 while pos < len(content): header16 = struct.unpack('= 0 resb = reader.read(count) assert len(resb) == count return resb @@ -103,18 +118,8 @@ def _read_byte(reader): class SWFInterpreter(object): def __init__(self, file_contents): - if file_contents[1:3] != b'WS': - raise ExtractorError( - 'Not an SWF file; header is %r' % file_contents[:3]) - if file_contents[:1] == b'C': - content = zlib.decompress(file_contents[8:]) - else: - raise NotImplementedError( - 'Unsupported compression format %r' % - file_contents[:1]) - code_tag = next(tag - for tag_code, tag in _extract_tags(content) + for tag_code, tag in _extract_tags(file_contents) if tag_code == 82) p = code_tag.index(b'\0', 4) + 1 code_reader = io.BytesIO(code_tag[p:]) @@ -139,7 +144,7 @@ class SWFInterpreter(object): for _c in range(1, uint_count): u32() double_count = u30() - read_bytes((double_count - 1) * 8) + read_bytes(max(0, (double_count - 1)) * 8) string_count = u30() constant_strings = [''] for _c in range(1, string_count): @@ -349,6 +354,9 @@ class SWFInterpreter(object): elif opcode == 36: # pushbyte v = _read_byte(coder) stack.append(v) + elif opcode == 42: # dup + value = stack[-1] + stack.append(value) elif opcode == 44: # pushstring idx = u30() stack.append(constant_strings[idx]) @@ -468,10 +476,24 @@ class SWFInterpreter(object): obj = stack.pop() assert isinstance(obj, list) stack.append(obj[idx]) + elif opcode == 115: # convert_ + value = stack.pop() + intvalue = int(value) + stack.append(intvalue) elif opcode == 128: # coerce u30() elif opcode == 133: # coerce_s assert isinstance(stack[-1], (type(None), compat_str)) + elif opcode == 160: # add + value2 = stack.pop() + value1 = stack.pop() + res = value1 + value2 + stack.append(res) + elif opcode == 161: # subtract + value2 = stack.pop() + value1 = stack.pop() + res = value1 - value2 + stack.append(res) elif opcode == 164: # modulo value2 = stack.pop() value1 = stack.pop() -- cgit v1.2.3 From e75c24e88907f329c57cf05d729dbf599349bb50 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 20 Jul 2014 00:03:54 +0200 Subject: [swfinterp] Extend tests and fix parsing --- youtube_dl/swfinterp.py | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) (limited to 'youtube_dl/swfinterp.py') diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 49fade364..64a518fc6 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -39,6 +39,16 @@ def _extract_tags(file_contents): pos += tag_len +class _AVM_Object(object): + def __init__(self, value=None, name_hint=None): + self.value = value + self.name_hint = name_hint + + def __repr__(self): + nh = '' if self.name_hint is None else (' %s' % self.name_hint) + return 'AVMObject%s(%r)' % (nh, self.value) + + class _AVMClass_Object(object): def __init__(self, avm_class): self.avm_class = avm_class @@ -92,8 +102,8 @@ def _s32(reader): def _s24(reader): bs = reader.read(3) assert len(bs) == 3 - first_byte = b'\xff' if (ord(bs[0:1]) >= 0x80) else b'\x00' - return struct.unpack('!i', first_byte + bs) + last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00' + return struct.unpack(' Date: Sun, 20 Jul 2014 00:25:58 +0200 Subject: [swfinterp] Add support for multiple classes --- youtube_dl/swfinterp.py | 48 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 6 deletions(-) (limited to 'youtube_dl/swfinterp.py') diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 64a518fc6..f7a3889a0 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -5,7 +5,10 @@ import io import struct import zlib -from .utils import ExtractorError +from .utils import ( + compat_str, + ExtractorError, +) def _extract_tags(file_contents): @@ -65,7 +68,26 @@ class _AVMClass(object): self.method_idxs = {} self.methods = {} self.method_pyfunctions = {} - self.variables = {} + + class ScopeDict(dict): + def __init__(self, avm_class): + super(ScopeDict, self).__init__() + self.avm_class = avm_class + + def __getitem__(self, k): + print('getting %r' % k) + return super(ScopeDict, self).__getitem__(k) + + def __contains__(self, k): + print('contains %r' % k) + return super(ScopeDict, self).__contains__(k) + + def __repr__(self): + return '%s__Scope(%s)' % ( + self.avm_class.name, + super(ScopeDict, self).__repr__()) + + self.variables = ScopeDict(self) def make_object(self): return _AVMClass_Object(self) @@ -156,10 +178,10 @@ class SWFInterpreter(object): double_count = u30() read_bytes(max(0, (double_count - 1)) * 8) string_count = u30() - constant_strings = [''] + self.constant_strings = [''] for _c in range(1, string_count): s = _read_string(code_reader) - constant_strings.append(s) + self.constant_strings.append(s) namespace_count = u30() for _c in range(1, namespace_count): read_bytes(1) # kind @@ -189,7 +211,7 @@ class SWFInterpreter(object): if kind == 0x07: u30() # namespace_idx name_idx = u30() - self.multinames.append(constant_strings[name_idx]) + self.multinames.append(self.constant_strings[name_idx]) else: self.multinames.append('[MULTINAME kind: %d]' % kind) for _c2 in range(MULTINAME_SIZES[kind]): @@ -375,7 +397,7 @@ class SWFInterpreter(object): stack.append(value) elif opcode == 44: # pushstring idx = u30() - stack.append(constant_strings[idx]) + stack.append(self.constant_strings[idx]) elif opcode == 48: # pushscope new_scope = stack.pop() scopes.append(new_scope) @@ -450,6 +472,16 @@ class SWFInterpreter(object): arr.append(stack.pop()) arr = arr[::-1] stack.append(arr) + elif opcode == 93: # findpropstrict + index = u30() + mname = self.multinames[index] + for s in reversed(scopes): + if mname in s: + res = s + break + else: + res = scopes[0] + stack.append(res) elif opcode == 94: # findproperty index = u30() mname = self.multinames[index] @@ -535,6 +567,10 @@ class SWFInterpreter(object): stack.append(registers[2]) elif opcode == 211: # getlocal_3 stack.append(registers[3]) + elif opcode == 212: # setlocal_0 + registers[0] = stack.pop() + elif opcode == 213: # setlocal_1 + registers[1] = stack.pop() elif opcode == 214: # setlocal_2 registers[2] = stack.pop() elif opcode == 215: # setlocal_3 -- cgit v1.2.3 From 01b4b745749bb92b4a56b4201d699740cbf450ab Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 20 Jul 2014 12:47:15 +0200 Subject: [swfinterp] Add support for calls to instance methods --- youtube_dl/swfinterp.py | 117 +++++++++++++++++++++++++----------------------- 1 file changed, 60 insertions(+), 57 deletions(-) (limited to 'youtube_dl/swfinterp.py') diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index f7a3889a0..8ccb64c9d 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -42,16 +42,6 @@ def _extract_tags(file_contents): pos += tag_len -class _AVM_Object(object): - def __init__(self, value=None, name_hint=None): - self.value = value - self.name_hint = name_hint - - def __repr__(self): - nh = '' if self.name_hint is None else (' %s' % self.name_hint) - return 'AVMObject%s(%r)' % (nh, self.value) - - class _AVMClass_Object(object): def __init__(self, avm_class): self.avm_class = avm_class @@ -74,14 +64,6 @@ class _AVMClass(object): super(ScopeDict, self).__init__() self.avm_class = avm_class - def __getitem__(self, k): - print('getting %r' % k) - return super(ScopeDict, self).__getitem__(k) - - def __contains__(self, k): - print('contains %r' % k) - return super(ScopeDict, self).__contains__(k) - def __repr__(self): return '%s__Scope(%s)' % ( self.avm_class.name, @@ -92,6 +74,15 @@ class _AVMClass(object): def make_object(self): return _AVMClass_Object(self) + def __repr__(self): + return '_AVMClass(%s)' % (self.name) + + def register_methods(self, methods): + self.method_names.update(methods.items()) + self.method_idxs.update(dict( + (idx, name) + for name, idx in methods.items())) + def _read_int(reader): res = 0 @@ -290,7 +281,11 @@ class SWFInterpreter(object): classes = [] for class_id in range(class_count): name_idx = u30() - classes.append(_AVMClass(name_idx, self.multinames[name_idx])) + + cname = self.multinames[name_idx] + avm_class = _AVMClass(name_idx, cname) + classes.append(avm_class) + u30() # super_name idx flags = read_byte() if flags & 0x08 != 0: # Protected namespace is present @@ -301,7 +296,9 @@ class SWFInterpreter(object): u30() # iinit trait_count = u30() for _c2 in range(trait_count): - parse_traits_info() + trait_methods = parse_traits_info() + avm_class.register_methods(trait_methods) + assert len(classes) == class_count self._classes_by_name = dict((c.name, c) for c in classes) @@ -310,10 +307,7 @@ class SWFInterpreter(object): trait_count = u30() for _c2 in range(trait_count): trait_methods = parse_traits_info() - avm_class.method_names.update(trait_methods.items()) - avm_class.method_idxs.update(dict( - (idx, name) - for name, idx in trait_methods.items())) + avm_class.register_methods(trait_methods) # Scripts script_count = u30() @@ -358,12 +352,14 @@ class SWFInterpreter(object): raise ExtractorError('Class %r not found' % class_name) def extract_function(self, avm_class, func_name): + print('Extracting %s.%s' % (avm_class.name, func_name)) if func_name in avm_class.method_pyfunctions: return avm_class.method_pyfunctions[func_name] if func_name in self._classes_by_name: return self._classes_by_name[func_name].make_object() if func_name not in avm_class.methods: - raise ExtractorError('Cannot find function %r' % func_name) + raise ExtractorError('Cannot find function %s.%s' % ( + avm_class.name, func_name)) m = avm_class.methods[func_name] def resfunc(args): @@ -375,7 +371,8 @@ class SWFInterpreter(object): print('Invoking %s.%s(%r)' % (avm_class.name, func_name, tuple(args))) registers = [avm_class.variables] + list(args) + [None] * m.local_count stack = [] - scopes = collections.deque([avm_class.variables]) + scopes = collections.deque([ + self._classes_by_name, avm_class.variables]) while True: opcode = _read_byte(coder) print('opcode: %r, stack(%d): %r' % (opcode, len(stack), stack)) @@ -408,33 +405,38 @@ class SWFInterpreter(object): args = list(reversed( [stack.pop() for _ in range(arg_count)])) obj = stack.pop() - if mname == 'split': - assert len(args) == 1 - assert isinstance(args[0], compat_str) - assert isinstance(obj, compat_str) - if args[0] == '': - res = list(obj) - else: - res = obj.split(args[0]) - stack.append(res) - elif mname == 'slice': - assert len(args) == 1 - assert isinstance(args[0], int) - assert isinstance(obj, list) - res = obj[args[0]:] - stack.append(res) - elif mname == 'join': - assert len(args) == 1 - assert isinstance(args[0], compat_str) - assert isinstance(obj, list) - res = args[0].join(obj) + + if isinstance(obj, _AVMClass_Object): + func = self.extract_function(obj.avm_class, mname) + res = func(args) stack.append(res) - elif mname in avm_class.method_pyfunctions: - stack.append(avm_class.method_pyfunctions[mname](args)) - else: - raise NotImplementedError( - 'Unsupported property %r on %r' - % (mname, obj)) + continue + elif isinstance(obj, compat_str): + if mname == 'split': + assert len(args) == 1 + assert isinstance(args[0], compat_str) + if args[0] == '': + res = list(obj) + else: + res = obj.split(args[0]) + stack.append(res) + continue + elif isinstance(obj, list): + if mname == 'slice': + assert len(args) == 1 + assert isinstance(args[0], int) + res = obj[args[0]:] + stack.append(res) + continue + elif mname == 'join': + assert len(args) == 1 + assert isinstance(args[0], compat_str) + res = args[0].join(obj) + stack.append(res) + continue + raise NotImplementedError( + 'Unsupported property %r on %r' + % (mname, obj)) elif opcode == 72: # returnvalue res = stack.pop() return res @@ -446,11 +448,12 @@ class SWFInterpreter(object): obj = stack.pop() mname = self.multinames[index] + assert isinstance(obj, _AVMClass) construct_method = self.extract_function( - obj.avm_class, mname) + obj, mname) # We do not actually call the constructor for now; # we just pretend it does nothing - stack.append(obj) + stack.append(obj.make_object()) elif opcode == 79: # callpropvoid index = u30() mname = self.multinames[index] @@ -481,7 +484,7 @@ class SWFInterpreter(object): break else: res = scopes[0] - stack.append(res) + stack.append(res[mname]) elif opcode == 94: # findproperty index = u30() mname = self.multinames[index] @@ -490,7 +493,7 @@ class SWFInterpreter(object): res = s break else: - res = scopes[0] + res = avm_class.variables stack.append(res) elif opcode == 96: # getlex index = u30() @@ -500,7 +503,7 @@ class SWFInterpreter(object): scope = s break else: - scope = scopes[0] + scope = avm_class.variables # I cannot find where static variables are initialized # so let's just return None res = scope.get(mname) -- cgit v1.2.3 From 0d989011fffd768116d0ca81f6c067c7e0876f36 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 20 Jul 2014 14:49:10 +0200 Subject: [swfinterp] Add support for calling methods on objects --- youtube_dl/swfinterp.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'youtube_dl/swfinterp.py') diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 8ccb64c9d..d043c2f99 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -50,6 +50,17 @@ class _AVMClass_Object(object): return '%s#%x' % (self.avm_class.name, id(self)) +class _ScopeDict(dict): + def __init__(self, avm_class): + super(_ScopeDict, self).__init__() + self.avm_class = avm_class + + def __repr__(self): + return '%s__Scope(%s)' % ( + self.avm_class.name, + super(_ScopeDict, self).__repr__()) + + class _AVMClass(object): def __init__(self, name_idx, name): self.name_idx = name_idx @@ -59,17 +70,7 @@ class _AVMClass(object): self.methods = {} self.method_pyfunctions = {} - class ScopeDict(dict): - def __init__(self, avm_class): - super(ScopeDict, self).__init__() - self.avm_class = avm_class - - def __repr__(self): - return '%s__Scope(%s)' % ( - self.avm_class.name, - super(ScopeDict, self).__repr__()) - - self.variables = ScopeDict(self) + self.variables = _ScopeDict(self) def make_object(self): return _AVMClass_Object(self) @@ -411,6 +412,14 @@ class SWFInterpreter(object): res = func(args) stack.append(res) continue + elif isinstance(obj, _ScopeDict): + if mname in obj.avm_class.method_names: + func = self.extract_function(obj.avm_class, mname) + res = func(args) + else: + res = obj[mname] + stack.append(res) + continue elif isinstance(obj, compat_str): if mname == 'split': assert len(args) == 1 -- cgit v1.2.3 From decf2ae400d52e98bcd073a69b24b3dbf3d38d53 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 20 Jul 2014 18:28:49 +0200 Subject: [swfinterp] Correct array access --- youtube_dl/swfinterp.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'youtube_dl/swfinterp.py') diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index d043c2f99..812ee7e8c 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -85,6 +85,14 @@ class _AVMClass(object): for name, idx in methods.items())) +class _Multiname(object): + def __init__(self, kind): + self.kind = kind + + def __repr__(self): + return '[MULTINAME kind: 0x%x]' % self.kind + + def _read_int(reader): res = 0 shift = 0 @@ -205,7 +213,7 @@ class SWFInterpreter(object): name_idx = u30() self.multinames.append(self.constant_strings[name_idx]) else: - self.multinames.append('[MULTINAME kind: %d]' % kind) + self.multinames.append(_Multiname(kind)) for _c2 in range(MULTINAME_SIZES[kind]): u30() @@ -399,6 +407,13 @@ class SWFInterpreter(object): elif opcode == 48: # pushscope new_scope = stack.pop() scopes.append(new_scope) + elif opcode == 66: # construct + arg_count = u30() + args = list(reversed( + [stack.pop() for _ in range(arg_count)])) + obj = stack.pop() + res = obj.avm_class.make_object() + stack.append(res) elif opcode == 70: # callproperty index = u30() mname = self.multinames[index] @@ -521,7 +536,10 @@ class SWFInterpreter(object): index = u30() value = stack.pop() idx = self.multinames[index] + if isinstance(idx, _Multiname): + idx = stack.pop() obj = stack.pop() + print('Setting %r.%r = %r' % (obj, idx, value)) obj[idx] = value elif opcode == 98: # getlocal index = u30() -- cgit v1.2.3 From 1b38b5be867a01808390ee320fa03c6512177a9b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 20 Jul 2014 18:29:09 +0200 Subject: [swfinterp] Remove debugging code --- youtube_dl/swfinterp.py | 4 ---- 1 file changed, 4 deletions(-) (limited to 'youtube_dl/swfinterp.py') diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 812ee7e8c..79d86152d 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -361,7 +361,6 @@ class SWFInterpreter(object): raise ExtractorError('Class %r not found' % class_name) def extract_function(self, avm_class, func_name): - print('Extracting %s.%s' % (avm_class.name, func_name)) if func_name in avm_class.method_pyfunctions: return avm_class.method_pyfunctions[func_name] if func_name in self._classes_by_name: @@ -377,14 +376,12 @@ class SWFInterpreter(object): s24 = lambda: _s24(coder) u30 = lambda: _u30(coder) - print('Invoking %s.%s(%r)' % (avm_class.name, func_name, tuple(args))) registers = [avm_class.variables] + list(args) + [None] * m.local_count stack = [] scopes = collections.deque([ self._classes_by_name, avm_class.variables]) while True: opcode = _read_byte(coder) - print('opcode: %r, stack(%d): %r' % (opcode, len(stack), stack)) if opcode == 17: # iftrue offset = s24() value = stack.pop() @@ -539,7 +536,6 @@ class SWFInterpreter(object): if isinstance(idx, _Multiname): idx = stack.pop() obj = stack.pop() - print('Setting %r.%r = %r' % (obj, idx, value)) obj[idx] = value elif opcode == 98: # getlocal index = u30() -- cgit v1.2.3 From c13bf7c836e1befb28070fe393e474566a43409a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 20 Jul 2014 23:20:15 +0200 Subject: [swfinterp] Use helper function struct_unpack for old Python 2.x releases (#3270) --- youtube_dl/swfinterp.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'youtube_dl/swfinterp.py') diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 79d86152d..87ec7bcff 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -2,12 +2,12 @@ from __future__ import unicode_literals import collections import io -import struct import zlib from .utils import ( compat_str, ExtractorError, + struct_unpack, ) @@ -23,17 +23,17 @@ def _extract_tags(file_contents): file_contents[:1]) # Determine number of bits in framesize rectangle - framesize_nbits = struct.unpack('!B', content[:1])[0] >> 3 + framesize_nbits = struct_unpack('!B', content[:1])[0] >> 3 framesize_len = (5 + 4 * framesize_nbits + 7) // 8 pos = framesize_len + 2 + 2 while pos < len(content): - header16 = struct.unpack('> 6 tag_len = header16 & 0x3f if tag_len == 0x3f: - tag_len = struct.unpack('= 0x80) else b'\x00' - return struct.unpack(' Date: Sun, 20 Jul 2014 23:36:21 +0200 Subject: [swfinterp] Fix _u32 name --- youtube_dl/swfinterp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/swfinterp.py') diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 87ec7bcff..a6f4ba6e0 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -111,7 +111,7 @@ def _u30(reader): res = _read_int(reader) assert res & 0xf0000000 == 0 return res -u32 = _read_int +_u32 = _read_int def _s32(reader): -- cgit v1.2.3 From 7fbf54dc62b43884d49d1d96854dc82a38b8b42f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 20 Jul 2014 23:37:10 +0200 Subject: [swfinterp] Remove (at the moment) dead code --- youtube_dl/swfinterp.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'youtube_dl/swfinterp.py') diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index a6f4ba6e0..b63c65b20 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -470,8 +470,7 @@ class SWFInterpreter(object): mname = self.multinames[index] assert isinstance(obj, _AVMClass) - construct_method = self.extract_function( - obj, mname) + # We do not actually call the constructor for now; # we just pretend it does nothing stack.append(obj.make_object()) -- cgit v1.2.3