diff options
author | Wladimir J. van der Laan <laanwj@protonmail.com> | 2020-11-20 09:15:44 +0100 |
---|---|---|
committer | Wladimir J. van der Laan <laanwj@protonmail.com> | 2020-11-22 11:11:32 +0100 |
commit | 634f6ec4eb9997d7bd0f8209fad49a4171d42384 (patch) | |
tree | 0c9e1de57c164790c4b6cc8397c6c8ce0117b983 | |
parent | fdd068507d2694137d72638d87ea961e6f16a753 (diff) |
contrib: Parse ELF directly for symbol and security checks
Instead of the ever-messier text parsing of the output of the readelf
tool (which is clearly meant for human consumption not to be machine
parseable), parse the ELF binaries directly.
Add a small dependency-less ELF parser specific to the checks.
This is slightly more secure, too, because it removes potential
ambiguity due to misparsing and changes in the output format of `elfread`. It
also allows for stricter and more specific ELF format checks in the future.
This removes the build-time dependency for `readelf`.
It passes the test-security-check for me locally, though I haven't
checked on all platforms.
-rw-r--r-- | Makefile.am | 3 | ||||
-rw-r--r-- | configure.ac | 1 | ||||
-rw-r--r-- | contrib/devtools/pixie.py | 323 | ||||
-rwxr-xr-x | contrib/devtools/security-check.py | 170 | ||||
-rwxr-xr-x | contrib/devtools/symbol-check.py | 81 | ||||
-rw-r--r-- | src/Makefile.am | 4 |
6 files changed, 413 insertions, 169 deletions
diff --git a/Makefile.am b/Makefile.am index c8af4228f3..76dc0dd10a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -52,7 +52,8 @@ DIST_SHARE = \ $(top_srcdir)/share/rpcauth BIN_CHECKS=$(top_srcdir)/contrib/devtools/symbol-check.py \ - $(top_srcdir)/contrib/devtools/security-check.py + $(top_srcdir)/contrib/devtools/security-check.py \ + $(top_srcdir)/contrib/devtools/pixie.py WINDOWS_PACKAGING = $(top_srcdir)/share/pixmaps/bitcoin.ico \ $(top_srcdir)/share/pixmaps/nsis-header.bmp \ diff --git a/configure.ac b/configure.ac index e1548e5c36..cc2801e97e 100644 --- a/configure.ac +++ b/configure.ac @@ -104,7 +104,6 @@ AC_PATH_PROG([GIT], [git]) AC_PATH_PROG(CCACHE,ccache) AC_PATH_PROG(XGETTEXT,xgettext) AC_PATH_PROG(HEXDUMP,hexdump) -AC_PATH_TOOL(READELF, readelf) AC_PATH_TOOL(CPPFILT, c++filt) AC_PATH_TOOL(OBJCOPY, objcopy) AC_PATH_PROG(DOXYGEN, doxygen) diff --git a/contrib/devtools/pixie.py b/contrib/devtools/pixie.py new file mode 100644 index 0000000000..8cf06a799a --- /dev/null +++ b/contrib/devtools/pixie.py @@ -0,0 +1,323 @@ +#!/usr/bin/env python3 +# Copyright (c) 2020 Wladimir J. van der Laan +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +''' +Compact, self-contained ELF implementation for bitcoin-core security checks. +''' +import struct +import types +from typing import Dict, List, Optional, Union, Tuple + +# you can find all these values in elf.h +EI_NIDENT = 16 + +# Byte indices in e_ident +EI_CLASS = 4 # ELFCLASSxx +EI_DATA = 5 # ELFDATAxxxx + +ELFCLASS32 = 1 # 32-bit +ELFCLASS64 = 2 # 64-bit + +ELFDATA2LSB = 1 # little endian +ELFDATA2MSB = 2 # big endian + +# relevant values for e_machine +EM_386 = 3 +EM_PPC64 = 21 +EM_ARM = 40 +EM_AARCH64 = 183 +EM_X86_64 = 62 +EM_RISCV = 243 + +# relevant values for e_type +ET_DYN = 3 + +# relevant values for sh_type +SHT_PROGBITS = 1 +SHT_STRTAB = 3 +SHT_DYNAMIC = 6 +SHT_DYNSYM = 11 +SHT_GNU_verneed = 0x6ffffffe +SHT_GNU_versym = 0x6fffffff + +# relevant values for p_type +PT_LOAD = 1 +PT_GNU_STACK = 0x6474e551 +PT_GNU_RELRO = 0x6474e552 + +# relevant values for p_flags +PF_X = (1 << 0) +PF_W = (1 << 1) +PF_R = (1 << 2) + +# relevant values for d_tag +DT_NEEDED = 1 +DT_FLAGS = 30 + +# relevant values of `d_un.d_val' in the DT_FLAGS entry +DF_BIND_NOW = 0x00000008 + +# relevant d_tags with string payload +STRING_TAGS = {DT_NEEDED} + +# rrlevant values for ST_BIND subfield of st_info (symbol binding) +STB_LOCAL = 0 + +class ELFRecord(types.SimpleNamespace): + '''Unified parsing for ELF records.''' + def __init__(self, data: bytes, offset: int, eh: 'ELFHeader', total_size: Optional[int]) -> None: + hdr_struct = self.STRUCT[eh.ei_class][0][eh.ei_data] + if total_size is not None and hdr_struct.size > total_size: + raise ValueError(f'{self.__class__.__name__} header size too small ({total_size} < {hdr_struct.size})') + for field, value in zip(self.STRUCT[eh.ei_class][1], hdr_struct.unpack(data[offset:offset + hdr_struct.size])): + setattr(self, field, value) + +def BiStruct(chars: str) -> Dict[int, struct.Struct]: + '''Compile a struct parser for both endians.''' + return { + ELFDATA2LSB: struct.Struct('<' + chars), + ELFDATA2MSB: struct.Struct('>' + chars), + } + +class ELFHeader(ELFRecord): + FIELDS = ['e_type', 'e_machine', 'e_version', 'e_entry', 'e_phoff', 'e_shoff', 'e_flags', 'e_ehsize', 'e_phentsize', 'e_phnum', 'e_shentsize', 'e_shnum', 'e_shstrndx'] + STRUCT = { + ELFCLASS32: (BiStruct('HHIIIIIHHHHHH'), FIELDS), + ELFCLASS64: (BiStruct('HHIQQQIHHHHHH'), FIELDS), + } + + def __init__(self, data: bytes, offset: int) -> None: + self.e_ident = data[offset:offset + EI_NIDENT] + if self.e_ident[0:4] != b'\x7fELF': + raise ValueError('invalid ELF magic') + self.ei_class = self.e_ident[EI_CLASS] + self.ei_data = self.e_ident[EI_DATA] + + super().__init__(data, offset + EI_NIDENT, self, None) + + def __repr__(self) -> str: + return f'Header(e_ident={self.e_ident!r}, e_type={self.e_type}, e_machine={self.e_machine}, e_version={self.e_version}, e_entry={self.e_entry}, e_phoff={self.e_phoff}, e_shoff={self.e_shoff}, e_flags={self.e_flags}, e_ehsize={self.e_ehsize}, e_phentsize={self.e_phentsize}, e_phnum={self.e_phnum}, e_shentsize={self.e_shentsize}, e_shnum={self.e_shnum}, e_shstrndx={self.e_shstrndx})' + +class Section(ELFRecord): + name: Optional[bytes] = None + FIELDS = ['sh_name', 'sh_type', 'sh_flags', 'sh_addr', 'sh_offset', 'sh_size', 'sh_link', 'sh_info', 'sh_addralign', 'sh_entsize'] + STRUCT = { + ELFCLASS32: (BiStruct('IIIIIIIIII'), FIELDS), + ELFCLASS64: (BiStruct('IIQQQQIIQQ'), FIELDS), + } + + def __init__(self, data: bytes, offset: int, eh: ELFHeader) -> None: + super().__init__(data, offset, eh, eh.e_shentsize) + self._data = data + + def __repr__(self) -> str: + return f'Section(sh_name={self.sh_name}({self.name!r}), sh_type=0x{self.sh_type:x}, sh_flags={self.sh_flags}, sh_addr=0x{self.sh_addr:x}, sh_offset=0x{self.sh_offset:x}, sh_size={self.sh_size}, sh_link={self.sh_link}, sh_info={self.sh_info}, sh_addralign={self.sh_addralign}, sh_entsize={self.sh_entsize})' + + def contents(self) -> bytes: + '''Return section contents.''' + return self._data[self.sh_offset:self.sh_offset + self.sh_size] + +class ProgramHeader(ELFRecord): + STRUCT = { + # different ELF classes have the same fields, but in a different order to optimize space versus alignment + ELFCLASS32: (BiStruct('IIIIIIII'), ['p_type', 'p_offset', 'p_vaddr', 'p_paddr', 'p_filesz', 'p_memsz', 'p_flags', 'p_align']), + ELFCLASS64: (BiStruct('IIQQQQQQ'), ['p_type', 'p_flags', 'p_offset', 'p_vaddr', 'p_paddr', 'p_filesz', 'p_memsz', 'p_align']), + } + + def __init__(self, data: bytes, offset: int, eh: ELFHeader) -> None: + super().__init__(data, offset, eh, eh.e_phentsize) + + def __repr__(self) -> str: + return f'ProgramHeader(p_type={self.p_type}, p_offset={self.p_offset}, p_vaddr={self.p_vaddr}, p_paddr={self.p_paddr}, p_filesz={self.p_filesz}, p_memsz={self.p_memsz}, p_flags={self.p_flags}, p_align={self.p_align})' + +class Symbol(ELFRecord): + STRUCT = { + # different ELF classes have the same fields, but in a different order to optimize space versus alignment + ELFCLASS32: (BiStruct('IIIBBH'), ['st_name', 'st_value', 'st_size', 'st_info', 'st_other', 'st_shndx']), + ELFCLASS64: (BiStruct('IBBHQQ'), ['st_name', 'st_info', 'st_other', 'st_shndx', 'st_value', 'st_size']), + } + + def __init__(self, data: bytes, offset: int, eh: ELFHeader, symtab: Section, strings: bytes, version: Optional[bytes]) -> None: + super().__init__(data, offset, eh, symtab.sh_entsize) + self.name = _lookup_string(strings, self.st_name) + self.version = version + + def __repr__(self) -> str: + return f'Symbol(st_name={self.st_name}({self.name!r}), st_value={self.st_value}, st_size={self.st_size}, st_info={self.st_info}, st_other={self.st_other}, st_shndx={self.st_shndx}, version={self.version!r})' + + @property + def is_import(self) -> bool: + '''Returns whether the symbol is an imported symbol.''' + return self.st_bind != STB_LOCAL and self.st_shndx == 0 + + @property + def is_export(self) -> bool: + '''Returns whether the symbol is an exported symbol.''' + return self.st_bind != STB_LOCAL and self.st_shndx != 0 + + @property + def st_bind(self) -> int: + '''Returns STB_*.''' + return self.st_info >> 4 + +class Verneed(ELFRecord): + DEF = (BiStruct('HHIII'), ['vn_version', 'vn_cnt', 'vn_file', 'vn_aux', 'vn_next']) + STRUCT = { ELFCLASS32: DEF, ELFCLASS64: DEF } + + def __init__(self, data: bytes, offset: int, eh: ELFHeader) -> None: + super().__init__(data, offset, eh, None) + + def __repr__(self) -> str: + return f'Verneed(vn_version={self.vn_version}, vn_cnt={self.vn_cnt}, vn_file={self.vn_file}, vn_aux={self.vn_aux}, vn_next={self.vn_next})' + +class Vernaux(ELFRecord): + DEF = (BiStruct('IHHII'), ['vna_hash', 'vna_flags', 'vna_other', 'vna_name', 'vna_next']) + STRUCT = { ELFCLASS32: DEF, ELFCLASS64: DEF } + + def __init__(self, data: bytes, offset: int, eh: ELFHeader, strings: bytes) -> None: + super().__init__(data, offset, eh, None) + self.name = _lookup_string(strings, self.vna_name) + + def __repr__(self) -> str: + return f'Veraux(vna_hash={self.vna_hash}, vna_flags={self.vna_flags}, vna_other={self.vna_other}, vna_name={self.vna_name}({self.name!r}), vna_next={self.vna_next})' + +class DynTag(ELFRecord): + STRUCT = { + ELFCLASS32: (BiStruct('II'), ['d_tag', 'd_val']), + ELFCLASS64: (BiStruct('QQ'), ['d_tag', 'd_val']), + } + + def __init__(self, data: bytes, offset: int, eh: ELFHeader, section: Section) -> None: + super().__init__(data, offset, eh, section.sh_entsize) + + def __repr__(self) -> str: + return f'DynTag(d_tag={self.d_tag}, d_val={self.d_val})' + +def _lookup_string(data: bytes, index: int) -> bytes: + '''Look up string by offset in ELF string table.''' + endx = data.find(b'\x00', index) + assert endx != -1 + return data[index:endx] + +VERSYM_S = BiStruct('H') # .gnu_version section has a single 16-bit integer per symbol in the linked section +def _parse_symbol_table(section: Section, strings: bytes, eh: ELFHeader, versym: bytes, verneed: Dict[int, bytes]) -> List[Symbol]: + '''Parse symbol table, return a list of symbols.''' + data = section.contents() + symbols = [] + versym_iter = (verneed.get(v[0]) for v in VERSYM_S[eh.ei_data].iter_unpack(versym)) + for ofs, version in zip(range(0, len(data), section.sh_entsize), versym_iter): + symbols.append(Symbol(data, ofs, eh, section, strings, version)) + return symbols + +def _parse_verneed(section: Section, strings: bytes, eh: ELFHeader) -> Dict[int, bytes]: + '''Parse .gnu.version_r section, return a dictionary of {versym: 'GLIBC_...'}.''' + data = section.contents() + ofs = 0 + result = {} + while True: + verneed = Verneed(data, ofs, eh) + aofs = verneed.vn_aux + while True: + vernaux = Vernaux(data, aofs, eh, strings) + result[vernaux.vna_other] = vernaux.name + if not vernaux.vna_next: + break + aofs += vernaux.vna_next + + if not verneed.vn_next: + break + ofs += verneed.vn_next + + return result + +def _parse_dyn_tags(section: Section, strings: bytes, eh: ELFHeader) -> List[Tuple[int, Union[bytes, int]]]: + '''Parse dynamic tags. Return array of tuples.''' + data = section.contents() + ofs = 0 + result = [] + for ofs in range(0, len(data), section.sh_entsize): + tag = DynTag(data, ofs, eh, section) + val = _lookup_string(strings, tag.d_val) if tag.d_tag in STRING_TAGS else tag.d_val + result.append((tag.d_tag, val)) + + return result + +class ELFFile: + sections: List[Section] + program_headers: List[ProgramHeader] + dyn_symbols: List[Symbol] + dyn_tags: List[Tuple[int, Union[bytes, int]]] + + def __init__(self, data: bytes) -> None: + self.data = data + self.hdr = ELFHeader(self.data, 0) + self._load_sections() + self._load_program_headers() + self._load_dyn_symbols() + self._load_dyn_tags() + self._section_to_segment_mapping() + + def _load_sections(self) -> None: + self.sections = [] + for idx in range(self.hdr.e_shnum): + offset = self.hdr.e_shoff + idx * self.hdr.e_shentsize + self.sections.append(Section(self.data, offset, self.hdr)) + + shstr = self.sections[self.hdr.e_shstrndx].contents() + for section in self.sections: + section.name = _lookup_string(shstr, section.sh_name) + + def _load_program_headers(self) -> None: + self.program_headers = [] + for idx in range(self.hdr.e_phnum): + offset = self.hdr.e_phoff + idx * self.hdr.e_phentsize + self.program_headers.append(ProgramHeader(self.data, offset, self.hdr)) + + def _load_dyn_symbols(self) -> None: + # first, load 'verneed' section + verneed = None + for section in self.sections: + if section.sh_type == SHT_GNU_verneed: + strtab = self.sections[section.sh_link].contents() # associated string table + assert verneed is None # only one section of this kind please + verneed = _parse_verneed(section, strtab, self.hdr) + assert verneed is not None + + # then, correlate GNU versym sections with dynamic symbol sections + versym = {} + for section in self.sections: + if section.sh_type == SHT_GNU_versym: + versym[section.sh_link] = section + + # finally, load dynsym sections + self.dyn_symbols = [] + for idx, section in enumerate(self.sections): + if section.sh_type == SHT_DYNSYM: # find dynamic symbol tables + strtab_data = self.sections[section.sh_link].contents() # associated string table + versym_data = versym[idx].contents() # associated symbol version table + self.dyn_symbols += _parse_symbol_table(section, strtab_data, self.hdr, versym_data, verneed) + + def _load_dyn_tags(self) -> None: + self.dyn_tags = [] + for idx, section in enumerate(self.sections): + if section.sh_type == SHT_DYNAMIC: # find dynamic tag tables + strtab = self.sections[section.sh_link].contents() # associated string table + self.dyn_tags += _parse_dyn_tags(section, strtab, self.hdr) + + def _section_to_segment_mapping(self) -> None: + for ph in self.program_headers: + ph.sections = [] + for section in self.sections: + if ph.p_vaddr <= section.sh_addr < (ph.p_vaddr + ph.p_memsz): + ph.sections.append(section) + + def query_dyn_tags(self, tag_in: int) -> List[Union[int, bytes]]: + '''Return the values of all dyn tags with the specified tag.''' + return [val for (tag, val) in self.dyn_tags if tag == tag_in] + + +def load(filename: str) -> ELFFile: + with open(filename, 'rb') as f: + data = f.read() + return ELFFile(data) diff --git a/contrib/devtools/security-check.py b/contrib/devtools/security-check.py index 02615edb54..a7eb7f429b 100755 --- a/contrib/devtools/security-check.py +++ b/contrib/devtools/security-check.py @@ -6,15 +6,15 @@ Perform basic security checks on a series of executables. Exit status will be 0 if successful, and the program will be silent. Otherwise the exit status will be 1 and it will log which executables failed which checks. -Needs `readelf` (for ELF), `objdump` (for PE) and `otool` (for MACHO). +Needs `objdump` (for PE) and `otool` (for MACHO). ''' import subprocess import sys import os - from typing import List, Optional -READELF_CMD = os.getenv('READELF', '/usr/bin/readelf') +import pixie + OBJDUMP_CMD = os.getenv('OBJDUMP', '/usr/bin/objdump') OTOOL_CMD = os.getenv('OTOOL', '/usr/bin/otool') @@ -26,75 +26,20 @@ def check_ELF_PIE(executable) -> bool: ''' Check for position independent executable (PIE), allowing for address space randomization. ''' - stdout = run_command([READELF_CMD, '-h', '-W', executable]) - - ok = False - for line in stdout.splitlines(): - tokens = line.split() - if len(line)>=2 and tokens[0] == 'Type:' and tokens[1] == 'DYN': - ok = True - return ok - -def get_ELF_program_headers(executable): - '''Return type and flags for ELF program headers''' - stdout = run_command([READELF_CMD, '-l', '-W', executable]) - - in_headers = False - headers = [] - for line in stdout.splitlines(): - if line.startswith('Program Headers:'): - in_headers = True - count = 0 - if line == '': - in_headers = False - if in_headers: - if count == 1: # header line - header = [x.strip() for x in line.split()] - ofs_typ = header.index('Type') - ofs_flags = header.index('Flg') - # assert readelf output is what we expect - if ofs_typ == -1 or ofs_flags == -1: - raise ValueError('Cannot parse elfread -lW output') - elif count > 1: - splitline = [x.strip() for x in line.split()] - typ = splitline[ofs_typ] - if not typ.startswith('[R'): # skip [Requesting ...] - splitline = [x.strip() for x in line.split()] - flags = splitline[ofs_flags] - # check for 'R', ' E' - if splitline[ofs_flags + 1] == 'E': - flags += ' E' - headers.append((typ, flags, [])) - count += 1 - - if line.startswith(' Section to Segment mapping:'): - in_mapping = True - count = 0 - if line == '': - in_mapping = False - if in_mapping: - if count == 1: # header line - ofs_segment = line.find('Segment') - ofs_sections = line.find('Sections...') - if ofs_segment == -1 or ofs_sections == -1: - raise ValueError('Cannot parse elfread -lW output') - elif count > 1: - segment = int(line[ofs_segment:ofs_sections].strip()) - sections = line[ofs_sections:].strip().split() - headers[segment][2].extend(sections) - count += 1 - return headers + elf = pixie.load(executable) + return elf.hdr.e_type == pixie.ET_DYN def check_ELF_NX(executable) -> bool: ''' Check that no sections are writable and executable (including the stack) ''' + elf = pixie.load(executable) have_wx = False have_gnu_stack = False - for (typ, flags, _) in get_ELF_program_headers(executable): - if typ == 'GNU_STACK': + for ph in elf.program_headers: + if ph.p_type == pixie.PT_GNU_STACK: have_gnu_stack = True - if 'W' in flags and 'E' in flags: # section is both writable and executable + if (ph.p_flags & pixie.PF_W) != 0 and (ph.p_flags & pixie.PF_X) != 0: # section is both writable and executable have_wx = True return have_gnu_stack and not have_wx @@ -104,35 +49,34 @@ def check_ELF_RELRO(executable) -> bool: GNU_RELRO program header must exist Dynamic section must have BIND_NOW flag ''' + elf = pixie.load(executable) have_gnu_relro = False - for (typ, flags, _) in get_ELF_program_headers(executable): - # Note: not checking flags == 'R': here as linkers set the permission differently + for ph in elf.program_headers: + # Note: not checking p_flags == PF_R: here as linkers set the permission differently # This does not affect security: the permission flags of the GNU_RELRO program # header are ignored, the PT_LOAD header determines the effective permissions. # However, the dynamic linker need to write to this area so these are RW. # Glibc itself takes care of mprotecting this area R after relocations are finished. # See also https://marc.info/?l=binutils&m=1498883354122353 - if typ == 'GNU_RELRO': + if ph.p_type == pixie.PT_GNU_RELRO: have_gnu_relro = True have_bindnow = False - stdout = run_command([READELF_CMD, '-d', '-W', executable]) - - for line in stdout.splitlines(): - tokens = line.split() - if len(tokens)>1 and tokens[1] == '(BIND_NOW)' or (len(tokens)>2 and tokens[1] == '(FLAGS)' and 'BIND_NOW' in tokens[2:]): + for flags in elf.query_dyn_tags(pixie.DT_FLAGS): + assert isinstance(flags, int) + if flags & pixie.DF_BIND_NOW: have_bindnow = True + return have_gnu_relro and have_bindnow def check_ELF_Canary(executable) -> bool: ''' Check for use of stack canary ''' - stdout = run_command([READELF_CMD, '--dyn-syms', '-W', executable]) - + elf = pixie.load(executable) ok = False - for line in stdout.splitlines(): - if '__stack_chk_fail' in line: + for symbol in elf.dyn_symbols: + if symbol.name == b'__stack_chk_fail': ok = True return ok @@ -142,48 +86,52 @@ def check_ELF_separate_code(executable): based on their permissions. This checks for missing -Wl,-z,separate-code and potentially other problems. ''' + elf = pixie.load(executable) + R = pixie.PF_R + W = pixie.PF_W + E = pixie.PF_X EXPECTED_FLAGS = { # Read + execute - '.init': 'R E', - '.plt': 'R E', - '.plt.got': 'R E', - '.plt.sec': 'R E', - '.text': 'R E', - '.fini': 'R E', + b'.init': R | E, + b'.plt': R | E, + b'.plt.got': R | E, + b'.plt.sec': R | E, + b'.text': R | E, + b'.fini': R | E, # Read-only data - '.interp': 'R', - '.note.gnu.property': 'R', - '.note.gnu.build-id': 'R', - '.note.ABI-tag': 'R', - '.gnu.hash': 'R', - '.dynsym': 'R', - '.dynstr': 'R', - '.gnu.version': 'R', - '.gnu.version_r': 'R', - '.rela.dyn': 'R', - '.rela.plt': 'R', - '.rodata': 'R', - '.eh_frame_hdr': 'R', - '.eh_frame': 'R', - '.qtmetadata': 'R', - '.gcc_except_table': 'R', - '.stapsdt.base': 'R', + b'.interp': R, + b'.note.gnu.property': R, + b'.note.gnu.build-id': R, + b'.note.ABI-tag': R, + b'.gnu.hash': R, + b'.dynsym': R, + b'.dynstr': R, + b'.gnu.version': R, + b'.gnu.version_r': R, + b'.rela.dyn': R, + b'.rela.plt': R, + b'.rodata': R, + b'.eh_frame_hdr': R, + b'.eh_frame': R, + b'.qtmetadata': R, + b'.gcc_except_table': R, + b'.stapsdt.base': R, # Writable data - '.init_array': 'RW', - '.fini_array': 'RW', - '.dynamic': 'RW', - '.got': 'RW', - '.data': 'RW', - '.bss': 'RW', + b'.init_array': R | W, + b'.fini_array': R | W, + b'.dynamic': R | W, + b'.got': R | W, + b'.data': R | W, + b'.bss': R | W, } # For all LOAD program headers get mapping to the list of sections, # and for each section, remember the flags of the associated program header. flags_per_section = {} - for (typ, flags, sections) in get_ELF_program_headers(executable): - if typ == 'LOAD': - for section in sections: - assert(section not in flags_per_section) - flags_per_section[section] = flags + for ph in elf.program_headers: + if ph.p_type == pixie.PT_LOAD: + for section in ph.sections: + assert(section.name not in flags_per_section) + flags_per_section[section.name] = ph.p_flags # Spot-check ELF LOAD program header flags per section # If these sections exist, check them against the expected R/W/E flags for (section, flags) in flags_per_section.items(): @@ -236,7 +184,7 @@ def check_PE_NX(executable) -> bool: def get_MACHO_executable_flags(executable) -> List[str]: stdout = run_command([OTOOL_CMD, '-vh', executable]) - flags = [] + flags: List[str] = [] for line in stdout.splitlines(): tokens = line.split() # filter first two header lines diff --git a/contrib/devtools/symbol-check.py b/contrib/devtools/symbol-check.py index 6949cb7ced..6ca5076d6f 100755 --- a/contrib/devtools/symbol-check.py +++ b/contrib/devtools/symbol-check.py @@ -11,10 +11,11 @@ Example usage: find ../gitian-builder/build -type f -executable | xargs python3 contrib/devtools/symbol-check.py ''' import subprocess -import re import sys import os -from typing import List, Optional, Tuple +from typing import List, Optional + +import pixie # Debian 8 (Jessie) EOL: 2020. https://wiki.debian.org/DebianReleases#Production_Releases # @@ -50,7 +51,6 @@ IGNORE_EXPORTS = { '_edata', '_end', '__end__', '_init', '__bss_start', '__bss_start__', '_bss_end__', '__bss_end__', '_fini', '_IO_stdin_used', 'stdin', 'stdout', 'stderr', 'environ', '_environ', '__environ', } -READELF_CMD = os.getenv('READELF', '/usr/bin/readelf') CPPFILT_CMD = os.getenv('CPPFILT', '/usr/bin/c++filt') OBJDUMP_CMD = os.getenv('OBJDUMP', '/usr/bin/objdump') OTOOL_CMD = os.getenv('OTOOL', '/usr/bin/otool') @@ -76,11 +76,11 @@ ELF_ALLOWED_LIBRARIES = { 'libdl.so.2' # programming interface to dynamic linker } ARCH_MIN_GLIBC_VER = { -'80386': (2,1), -'X86-64': (2,2,5), -'ARM': (2,4), -'AArch64':(2,17), -'RISC-V': (2,27) +pixie.EM_386: (2,1), +pixie.EM_X86_64: (2,2,5), +pixie.EM_ARM: (2,4), +pixie.EM_AARCH64:(2,17), +pixie.EM_RISCV: (2,27) } MACHO_ALLOWED_LIBRARIES = { @@ -140,29 +140,6 @@ class CPPFilt(object): self.proc.stdout.close() self.proc.wait() -def read_symbols(executable, imports=True) -> List[Tuple[str, str, str]]: - ''' - Parse an ELF executable and return a list of (symbol,version, arch) tuples - for dynamic, imported symbols. - ''' - p = subprocess.Popen([READELF_CMD, '--dyn-syms', '-W', '-h', executable], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True) - (stdout, stderr) = p.communicate() - if p.returncode: - raise IOError('Could not read symbols for {}: {}'.format(executable, stderr.strip())) - syms = [] - for line in stdout.splitlines(): - line = line.split() - if 'Machine:' in line: - arch = line[-1] - if len(line)>7 and re.match('[0-9]+:$', line[0]): - (sym, _, version) = line[7].partition('@') - is_import = line[6] == 'UND' - if version.startswith('@'): - version = version[1:] - if is_import == imports: - syms.append((sym, version, arch)) - return syms - def check_version(max_versions, version, arch) -> bool: if '_' in version: (lib, _, ver) = version.rpartition('_') @@ -174,36 +151,30 @@ def check_version(max_versions, version, arch) -> bool: return False return ver <= max_versions[lib] or lib == 'GLIBC' and ver <= ARCH_MIN_GLIBC_VER[arch] -def elf_read_libraries(filename) -> List[str]: - p = subprocess.Popen([READELF_CMD, '-d', '-W', filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True) - (stdout, stderr) = p.communicate() - if p.returncode: - raise IOError('Error opening file') - libraries = [] - for line in stdout.splitlines(): - tokens = line.split() - if len(tokens)>2 and tokens[1] == '(NEEDED)': - match = re.match(r'^Shared library: \[(.*)\]$', ' '.join(tokens[2:])) - if match: - libraries.append(match.group(1)) - else: - raise ValueError('Unparseable (NEEDED) specification') - return libraries - def check_imported_symbols(filename) -> bool: + elf = pixie.load(filename) cppfilt = CPPFilt() ok = True - for sym, version, arch in read_symbols(filename, True): - if version and not check_version(MAX_VERSIONS, version, arch): + + for symbol in elf.dyn_symbols: + if not symbol.is_import: + continue + sym = symbol.name.decode() + version = symbol.version.decode() if symbol.version is not None else None + if version and not check_version(MAX_VERSIONS, version, elf.hdr.e_machine): print('{}: symbol {} from unsupported version {}'.format(filename, cppfilt(sym), version)) ok = False return ok def check_exported_symbols(filename) -> bool: + elf = pixie.load(filename) cppfilt = CPPFilt() ok = True - for sym,version,arch in read_symbols(filename, False): - if arch == 'RISC-V' or sym in IGNORE_EXPORTS: + for symbol in elf.dyn_symbols: + if not symbol.is_export: + continue + sym = symbol.name.decode() + if elf.hdr.e_machine == pixie.EM_RISCV or sym in IGNORE_EXPORTS: continue print('{}: export of symbol {} not allowed'.format(filename, cppfilt(sym))) ok = False @@ -211,9 +182,11 @@ def check_exported_symbols(filename) -> bool: def check_ELF_libraries(filename) -> bool: ok = True - for library_name in elf_read_libraries(filename): - if library_name not in ELF_ALLOWED_LIBRARIES: - print('{}: NEEDED library {} is not allowed'.format(filename, library_name)) + elf = pixie.load(filename) + for library_name in elf.query_dyn_tags(pixie.DT_NEEDED): + assert(isinstance(library_name, bytes)) + if library_name.decode() not in ELF_ALLOWED_LIBRARIES: + print('{}: NEEDED library {} is not allowed'.format(filename, library_name.decode())) ok = False return ok diff --git a/src/Makefile.am b/src/Makefile.am index 67fd402603..8da754eee6 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -741,13 +741,13 @@ endif if GLIBC_BACK_COMPAT @echo "Checking glibc back compat..." - $(AM_V_at) READELF=$(READELF) CPPFILT=$(CPPFILT) $(PYTHON) $(top_srcdir)/contrib/devtools/symbol-check.py $(bin_PROGRAMS) + $(AM_V_at) CPPFILT=$(CPPFILT) $(PYTHON) $(top_srcdir)/contrib/devtools/symbol-check.py $(bin_PROGRAMS) endif check-security: $(bin_PROGRAMS) if HARDEN @echo "Checking binary security..." - $(AM_V_at) READELF=$(READELF) OBJDUMP=$(OBJDUMP) OTOOL=$(OTOOL) $(PYTHON) $(top_srcdir)/contrib/devtools/security-check.py $(bin_PROGRAMS) + $(AM_V_at) OBJDUMP=$(OBJDUMP) OTOOL=$(OTOOL) $(PYTHON) $(top_srcdir)/contrib/devtools/security-check.py $(bin_PROGRAMS) endif if EMBEDDED_LEVELDB |