diff options
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 262 | 
1 files changed, 122 insertions, 140 deletions
| diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2cd2fdce3..09bd423f5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -590,99 +590,83 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                          for tag_code, tag in extract_tags(content)                          if tag_code == 82)          p = code_tag.index(b'\0', 4) + 1 +        code_reader = io.BytesIO(code_tag[p:])          # Parse ABC (AVM2 ByteCode) -        def read_int(data=None, pos=None): -            if hasattr(data, 'read'): -                assert pos is None - -                res = 0 -                shift = 0 -                for _ in range(5): -                    buf = data.read(1) -                    assert len(buf) == 1 -                    b = struct.unpack('<B', buf)[0] -                    res = res | ((b & 0x7f) << shift) -                    if b & 0x80 == 0: -                        break -                    shift += 7 -                return res - -            if data is None: -                data = code_tag -            if pos is None: -                pos = p +        def read_int(reader=None): +            if reader is None: +                reader = code_reader              res = 0              shift = 0              for _ in range(5): -                b = struct.unpack('<B', data[pos:pos+1])[0] -                pos += 1 +                buf = reader.read(1) +                assert len(buf) == 1 +                b = struct.unpack('<B', buf)[0]                  res = res | ((b & 0x7f) << shift)                  if b & 0x80 == 0:                      break                  shift += 7 -            return (res, pos) -        assert read_int(b'\x00', 0) == (0, 1) -        assert read_int(b'\x10', 0) == (16, 1) -        assert read_int(b'\x34', 0) == (0x34, 1) -        assert read_int(b'\xb4\x12', 0) == (0x12 * 0x80 + 0x34, 2) -        assert read_int(b'\xff\xff\xff\x00', 0) == (0x1fffff, 4) - -        def u30(*args, **kwargs): -            res = read_int(*args, **kwargs) -            if isinstance(res, tuple): -                assert res[0] & 0xf0000000 == 0 -            else: -                assert res & 0xf0000000 == 0 +            return res + +        def u30(reader=None): +            res = read_int(reader) +            assert res & 0xf0000000 == 0              return res          u32 = read_int -        def s32(data=None, pos=None): -            v, pos = read_int(data, pos) +        def s32(reader=None): +            v = read_int(reader)              if v & 0x80000000 != 0:                  v = - ((v ^ 0xffffffff) + 1) -            return (v, pos) -        assert s32(b'\xff\xff\xff\xff\x0f', 0) == (-1, 5) - -        def string(): -            slen, p = u30() -            return (code_tag[p:p+slen].decode('utf-8'), p + slen) - -        def read_byte(data=None, pos=None): -            if data is None: -                data = code_tag -            if pos is None: -                pos = p -            res = struct.unpack('<B', data[pos:pos+1])[0] -            return (res, pos + 1) +            return v + +        def string(reader=None): +            if reader is None: +                reader = code_reader +            slen = u30(reader) +            resb = reader.read(slen) +            assert len(resb) == slen +            return resb.decode('utf-8') + +        def read_bytes(count, reader=None): +            if reader is None: +                reader = code_reader +            resb = reader.read(count) +            assert len(resb) == count +            return resb + +        def read_byte(reader=None): +            resb = read_bytes(1, reader=reader) +            res = struct.unpack('<B', resb)[0] +            return res          # minor_version + major_version -        p += 2 + 2 +        _ = read_bytes(4)          # Constant pool -        int_count, p = u30() +        int_count = u30()          for _c in range(1, int_count): -            _, p = s32() -        uint_count, p = u30() +            _ = s32() +        uint_count = u30()          for _c in range(1, uint_count): -            _, p = u32() -        double_count, p = u30() -        p += (double_count-1) * 8 -        string_count, p = u30() +            _ = u32() +        double_count = u30() +        _ = read_bytes((double_count-1) * 8) +        string_count = u30()          constant_strings = [u'']          for _c in range(1, string_count): -            s, p = string() +            s = string()              constant_strings.append(s) -        namespace_count, p = u30() +        namespace_count = u30()          for _c in range(1, namespace_count): -            p += 1        # kind -            _, p = u30()  # name -        ns_set_count, p = u30() +            _ = read_bytes(1)  # kind +            _ = u30()  # name +        ns_set_count = u30()          for _c in range(1, ns_set_count): -            count, p = u30() +            count = u30()              for _c2 in range(count): -                _, p = u30() -        multiname_count, p = u30() +                _ = u30() +        multiname_count = u30()          MULTINAME_SIZES = {              0x07: 2,  # QName              0x0d: 2,  # QNameA @@ -697,108 +681,106 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          }          multinames = [u'']          for _c in range(1, multiname_count): -            kind, p = u30() +            kind = u30()              assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind              if kind == 0x07: -                namespace_idx, p = u30() -                name_idx, p = u30() +                namespace_idx = u30() +                name_idx = u30()                  multinames.append(constant_strings[name_idx])              else:                  multinames.append('[MULTINAME kind: %d]' % kind)                  for _c2 in range(MULTINAME_SIZES[kind]): -                    _, p = u30() +                    _ = u30()          # Methods -        method_count, p = u30() +        method_count = u30()          MethodInfo = collections.namedtuple(              'MethodInfo',              ['NEED_ARGUMENTS', 'NEED_REST'])          method_infos = []          for method_id in range(method_count): -            param_count, p = u30() -            _, p = u30()  # return type +            param_count = u30() +            _ = u30()  # return type              for _ in range(param_count): -                _, p = u30()  # param type -            _, p = u30()  # name index (always 0 for youtube) -            flags, p = read_byte() +                _ = u30()  # param type +            _ = u30()  # name index (always 0 for youtube) +            flags = read_byte()              if flags & 0x08 != 0:                  # Options present -                option_count, p = u30() +                option_count = u30()                  for c in range(option_count): -                    _, p = u30()  # val -                    p += 1        # kind +                    _ = u30()  # val +                    _ = read_bytes(1)  # kind              if flags & 0x80 != 0:                  # Param names present                  for _ in range(param_count): -                    _, p = u30()  # param name +                    _ = u30()  # param name              mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)              method_infos.append(mi)          # Metadata -        metadata_count, p = u30() +        metadata_count = u30()          for _c in range(metadata_count): -            _, p = u30()  # name -            item_count, p = u30() +            _ = u30()  # name +            item_count = u30()              for _c2 in range(item_count): -                _, p = u30()  # key -                _, p = u30()  # value - -        def parse_traits_info(pos=None): -            if pos is None: -                pos = p -            trait_name_idx, pos = u30(pos=pos) -            kind_full, pos = read_byte(pos=pos) +                _ = u30()  # key +                _ = u30()  # value + +        def parse_traits_info(): +            trait_name_idx = u30() +            kind_full = read_byte()              kind = kind_full & 0x0f              attrs = kind_full >> 4              methods = {}              if kind in [0x00, 0x06]:  # Slot or Const -                _, pos = u30(pos=pos)  # Slot id -                type_name_idx, pos = u30(pos=pos) -                vindex, pos = u30(pos=pos) +                _ = u30()  # Slot id +                type_name_idx = u30() +                vindex = u30()                  if vindex != 0: -                    _, pos = read_byte(pos=pos)  # vkind +                    _ = read_byte()  # vkind              elif kind in [0x01, 0x02, 0x03]:  # Method / Getter / Setter -                _, pos = u30(pos=pos)  # disp_id -                method_idx, pos = u30(pos=pos) +                _ = u30()  # disp_id +                method_idx = u30()                  methods[multinames[trait_name_idx]] = method_idx              elif kind == 0x04:  # Class -                _, pos = u30(pos=pos)  # slot_id -                _, pos = u30(pos=pos)  # classi +                _ = u30()  # slot_id +                _ = u30()  # classi              elif kind == 0x05:  # Function -                _, pos = u30(pos=pos)  # slot_id -                function_idx, pos = u30(pos=pos) +                _ = u30()  # slot_id +                function_idx = u30()                  methods[function_idx] = multinames[trait_name_idx]              else:                  raise ExtractorError(u'Unsupported trait kind %d' % kind)              if attrs & 0x4 != 0:  # Metadata present -                metadata_count, pos = u30(pos=pos) +                metadata_count = u30()                  for _c3 in range(metadata_count): -                    _, pos = u30(pos=pos) +                    _ = u30() -            return (methods, pos) +            return methods          # Classes          TARGET_CLASSNAME = u'SignatureDecipher'          searched_idx = multinames.index(TARGET_CLASSNAME)          searched_class_id = None -        class_count, p = u30() +        class_count = u30()          for class_id in range(class_count): -            name_idx, p = u30() +            name_idx = u30()              if name_idx == searched_idx:                  # We found the class we're looking for!                  searched_class_id = class_id -            _, p = u30()  # super_name idx -            flags, p = read_byte() +            _ = u30()  # super_name idx +            flags = read_byte()              if flags & 0x08 != 0:  # Protected namespace is present -                protected_ns_idx, p = u30() -            intrf_count, p = u30() +                protected_ns_idx = u30() +            intrf_count = u30()              for _c2 in range(intrf_count): -                _, p = u30() -            _, p = u30()  # iinit -            trait_count, p = u30() +                _ = u30() +            _ = u30()  # iinit +            trait_count = u30()              for _c2 in range(trait_count): -                _, p = parse_traits_info() +                _ = parse_traits_info()          if searched_class_id is None:              raise ExtractorError(u'Target class %r not found' % @@ -807,10 +789,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          method_names = {}          method_idxs = {}          for class_id in range(class_count): -            _, p = u30()  # cinit -            trait_count, p = u30() +            _ = u30()  # cinit +            trait_count = u30()              for _c2 in range(trait_count): -                trait_methods, p = parse_traits_info() +                trait_methods = parse_traits_info()                  if class_id == searched_class_id:                      method_names.update(trait_methods.items())                      method_idxs.update(dict( @@ -818,40 +800,40 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                          for name, idx in trait_methods.items()))          # Scripts -        script_count, p = u30() +        script_count = u30()          for _c in range(script_count): -            _, p = u30()  # init -            trait_count, p = u30() +            _ = u30()  # init +            trait_count = u30()              for _c2 in range(trait_count): -                _, p = parse_traits_info() +                _ = parse_traits_info()          # Method bodies -        method_body_count, p = u30() +        method_body_count = u30()          Method = collections.namedtuple('Method', ['code', 'local_count'])          methods = {}          for _c in range(method_body_count): -            method_idx, p = u30() -            max_stack, p = u30() -            local_count, p = u30() -            init_scope_depth, p = u30() -            max_scope_depth, p = u30() -            code_length, p = u30() +            method_idx = u30() +            max_stack = u30() +            local_count = u30() +            init_scope_depth = u30() +            max_scope_depth = u30() +            code_length = u30() +            code = read_bytes(code_length)              if method_idx in method_idxs: -                m = Method(code_tag[p:p+code_length], local_count) +                m = Method(code, local_count)                  methods[method_idxs[method_idx]] = m -            p += code_length -            exception_count, p = u30() +            exception_count = u30()              for _c2 in range(exception_count): -                _, p = u30()  # from -                _, p = u30()  # to -                _, p = u30()  # target -                _, p = u30()  # exc_type -                _, p = u30()  # var_name -            trait_count, p = u30() +                _ = u30()  # from +                _ = u30()  # to +                _ = u30()  # target +                _ = u30()  # exc_type +                _ = u30()  # var_name +            trait_count = u30()              for _c2 in range(trait_count): -                _, p = parse_traits_info() +                _ = parse_traits_info() -        assert p == len(code_tag) +        assert p + code_reader.tell() == len(code_tag)          assert len(methods) == len(method_idxs)          method_pyfunctions = {} | 
