aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWladimir J. van der Laan <laanwj@protonmail.com>2020-11-20 09:15:44 +0100
committerWladimir J. van der Laan <laanwj@protonmail.com>2020-11-22 11:11:32 +0100
commit634f6ec4eb9997d7bd0f8209fad49a4171d42384 (patch)
tree0c9e1de57c164790c4b6cc8397c6c8ce0117b983
parentfdd068507d2694137d72638d87ea961e6f16a753 (diff)
contrib: Parse ELF directly for symbol and security checks
Instead of the ever-messier text parsing of the output of the readelf tool (which is clearly meant for human consumption not to be machine parseable), parse the ELF binaries directly. Add a small dependency-less ELF parser specific to the checks. This is slightly more secure, too, because it removes potential ambiguity due to misparsing and changes in the output format of `elfread`. It also allows for stricter and more specific ELF format checks in the future. This removes the build-time dependency for `readelf`. It passes the test-security-check for me locally, though I haven't checked on all platforms.
-rw-r--r--Makefile.am3
-rw-r--r--configure.ac1
-rw-r--r--contrib/devtools/pixie.py323
-rwxr-xr-xcontrib/devtools/security-check.py170
-rwxr-xr-xcontrib/devtools/symbol-check.py81
-rw-r--r--src/Makefile.am4
6 files changed, 413 insertions, 169 deletions
diff --git a/Makefile.am b/Makefile.am
index c8af4228f3..76dc0dd10a 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -52,7 +52,8 @@ DIST_SHARE = \
$(top_srcdir)/share/rpcauth
BIN_CHECKS=$(top_srcdir)/contrib/devtools/symbol-check.py \
- $(top_srcdir)/contrib/devtools/security-check.py
+ $(top_srcdir)/contrib/devtools/security-check.py \
+ $(top_srcdir)/contrib/devtools/pixie.py
WINDOWS_PACKAGING = $(top_srcdir)/share/pixmaps/bitcoin.ico \
$(top_srcdir)/share/pixmaps/nsis-header.bmp \
diff --git a/configure.ac b/configure.ac
index e1548e5c36..cc2801e97e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -104,7 +104,6 @@ AC_PATH_PROG([GIT], [git])
AC_PATH_PROG(CCACHE,ccache)
AC_PATH_PROG(XGETTEXT,xgettext)
AC_PATH_PROG(HEXDUMP,hexdump)
-AC_PATH_TOOL(READELF, readelf)
AC_PATH_TOOL(CPPFILT, c++filt)
AC_PATH_TOOL(OBJCOPY, objcopy)
AC_PATH_PROG(DOXYGEN, doxygen)
diff --git a/contrib/devtools/pixie.py b/contrib/devtools/pixie.py
new file mode 100644
index 0000000000..8cf06a799a
--- /dev/null
+++ b/contrib/devtools/pixie.py
@@ -0,0 +1,323 @@
+#!/usr/bin/env python3
+# Copyright (c) 2020 Wladimir J. van der Laan
+# Distributed under the MIT software license, see the accompanying
+# file COPYING or http://www.opensource.org/licenses/mit-license.php.
+'''
+Compact, self-contained ELF implementation for bitcoin-core security checks.
+'''
+import struct
+import types
+from typing import Dict, List, Optional, Union, Tuple
+
+# you can find all these values in elf.h
+EI_NIDENT = 16
+
+# Byte indices in e_ident
+EI_CLASS = 4 # ELFCLASSxx
+EI_DATA = 5 # ELFDATAxxxx
+
+ELFCLASS32 = 1 # 32-bit
+ELFCLASS64 = 2 # 64-bit
+
+ELFDATA2LSB = 1 # little endian
+ELFDATA2MSB = 2 # big endian
+
+# relevant values for e_machine
+EM_386 = 3
+EM_PPC64 = 21
+EM_ARM = 40
+EM_AARCH64 = 183
+EM_X86_64 = 62
+EM_RISCV = 243
+
+# relevant values for e_type
+ET_DYN = 3
+
+# relevant values for sh_type
+SHT_PROGBITS = 1
+SHT_STRTAB = 3
+SHT_DYNAMIC = 6
+SHT_DYNSYM = 11
+SHT_GNU_verneed = 0x6ffffffe
+SHT_GNU_versym = 0x6fffffff
+
+# relevant values for p_type
+PT_LOAD = 1
+PT_GNU_STACK = 0x6474e551
+PT_GNU_RELRO = 0x6474e552
+
+# relevant values for p_flags
+PF_X = (1 << 0)
+PF_W = (1 << 1)
+PF_R = (1 << 2)
+
+# relevant values for d_tag
+DT_NEEDED = 1
+DT_FLAGS = 30
+
+# relevant values of `d_un.d_val' in the DT_FLAGS entry
+DF_BIND_NOW = 0x00000008
+
+# relevant d_tags with string payload
+STRING_TAGS = {DT_NEEDED}
+
+# rrlevant values for ST_BIND subfield of st_info (symbol binding)
+STB_LOCAL = 0
+
+class ELFRecord(types.SimpleNamespace):
+ '''Unified parsing for ELF records.'''
+ def __init__(self, data: bytes, offset: int, eh: 'ELFHeader', total_size: Optional[int]) -> None:
+ hdr_struct = self.STRUCT[eh.ei_class][0][eh.ei_data]
+ if total_size is not None and hdr_struct.size > total_size:
+ raise ValueError(f'{self.__class__.__name__} header size too small ({total_size} < {hdr_struct.size})')
+ for field, value in zip(self.STRUCT[eh.ei_class][1], hdr_struct.unpack(data[offset:offset + hdr_struct.size])):
+ setattr(self, field, value)
+
+def BiStruct(chars: str) -> Dict[int, struct.Struct]:
+ '''Compile a struct parser for both endians.'''
+ return {
+ ELFDATA2LSB: struct.Struct('<' + chars),
+ ELFDATA2MSB: struct.Struct('>' + chars),
+ }
+
+class ELFHeader(ELFRecord):
+ FIELDS = ['e_type', 'e_machine', 'e_version', 'e_entry', 'e_phoff', 'e_shoff', 'e_flags', 'e_ehsize', 'e_phentsize', 'e_phnum', 'e_shentsize', 'e_shnum', 'e_shstrndx']
+ STRUCT = {
+ ELFCLASS32: (BiStruct('HHIIIIIHHHHHH'), FIELDS),
+ ELFCLASS64: (BiStruct('HHIQQQIHHHHHH'), FIELDS),
+ }
+
+ def __init__(self, data: bytes, offset: int) -> None:
+ self.e_ident = data[offset:offset + EI_NIDENT]
+ if self.e_ident[0:4] != b'\x7fELF':
+ raise ValueError('invalid ELF magic')
+ self.ei_class = self.e_ident[EI_CLASS]
+ self.ei_data = self.e_ident[EI_DATA]
+
+ super().__init__(data, offset + EI_NIDENT, self, None)
+
+ def __repr__(self) -> str:
+ return f'Header(e_ident={self.e_ident!r}, e_type={self.e_type}, e_machine={self.e_machine}, e_version={self.e_version}, e_entry={self.e_entry}, e_phoff={self.e_phoff}, e_shoff={self.e_shoff}, e_flags={self.e_flags}, e_ehsize={self.e_ehsize}, e_phentsize={self.e_phentsize}, e_phnum={self.e_phnum}, e_shentsize={self.e_shentsize}, e_shnum={self.e_shnum}, e_shstrndx={self.e_shstrndx})'
+
+class Section(ELFRecord):
+ name: Optional[bytes] = None
+ FIELDS = ['sh_name', 'sh_type', 'sh_flags', 'sh_addr', 'sh_offset', 'sh_size', 'sh_link', 'sh_info', 'sh_addralign', 'sh_entsize']
+ STRUCT = {
+ ELFCLASS32: (BiStruct('IIIIIIIIII'), FIELDS),
+ ELFCLASS64: (BiStruct('IIQQQQIIQQ'), FIELDS),
+ }
+
+ def __init__(self, data: bytes, offset: int, eh: ELFHeader) -> None:
+ super().__init__(data, offset, eh, eh.e_shentsize)
+ self._data = data
+
+ def __repr__(self) -> str:
+ return f'Section(sh_name={self.sh_name}({self.name!r}), sh_type=0x{self.sh_type:x}, sh_flags={self.sh_flags}, sh_addr=0x{self.sh_addr:x}, sh_offset=0x{self.sh_offset:x}, sh_size={self.sh_size}, sh_link={self.sh_link}, sh_info={self.sh_info}, sh_addralign={self.sh_addralign}, sh_entsize={self.sh_entsize})'
+
+ def contents(self) -> bytes:
+ '''Return section contents.'''
+ return self._data[self.sh_offset:self.sh_offset + self.sh_size]
+
+class ProgramHeader(ELFRecord):
+ STRUCT = {
+ # different ELF classes have the same fields, but in a different order to optimize space versus alignment
+ ELFCLASS32: (BiStruct('IIIIIIII'), ['p_type', 'p_offset', 'p_vaddr', 'p_paddr', 'p_filesz', 'p_memsz', 'p_flags', 'p_align']),
+ ELFCLASS64: (BiStruct('IIQQQQQQ'), ['p_type', 'p_flags', 'p_offset', 'p_vaddr', 'p_paddr', 'p_filesz', 'p_memsz', 'p_align']),
+ }
+
+ def __init__(self, data: bytes, offset: int, eh: ELFHeader) -> None:
+ super().__init__(data, offset, eh, eh.e_phentsize)
+
+ def __repr__(self) -> str:
+ return f'ProgramHeader(p_type={self.p_type}, p_offset={self.p_offset}, p_vaddr={self.p_vaddr}, p_paddr={self.p_paddr}, p_filesz={self.p_filesz}, p_memsz={self.p_memsz}, p_flags={self.p_flags}, p_align={self.p_align})'
+
+class Symbol(ELFRecord):
+ STRUCT = {
+ # different ELF classes have the same fields, but in a different order to optimize space versus alignment
+ ELFCLASS32: (BiStruct('IIIBBH'), ['st_name', 'st_value', 'st_size', 'st_info', 'st_other', 'st_shndx']),
+ ELFCLASS64: (BiStruct('IBBHQQ'), ['st_name', 'st_info', 'st_other', 'st_shndx', 'st_value', 'st_size']),
+ }
+
+ def __init__(self, data: bytes, offset: int, eh: ELFHeader, symtab: Section, strings: bytes, version: Optional[bytes]) -> None:
+ super().__init__(data, offset, eh, symtab.sh_entsize)
+ self.name = _lookup_string(strings, self.st_name)
+ self.version = version
+
+ def __repr__(self) -> str:
+ return f'Symbol(st_name={self.st_name}({self.name!r}), st_value={self.st_value}, st_size={self.st_size}, st_info={self.st_info}, st_other={self.st_other}, st_shndx={self.st_shndx}, version={self.version!r})'
+
+ @property
+ def is_import(self) -> bool:
+ '''Returns whether the symbol is an imported symbol.'''
+ return self.st_bind != STB_LOCAL and self.st_shndx == 0
+
+ @property
+ def is_export(self) -> bool:
+ '''Returns whether the symbol is an exported symbol.'''
+ return self.st_bind != STB_LOCAL and self.st_shndx != 0
+
+ @property
+ def st_bind(self) -> int:
+ '''Returns STB_*.'''
+ return self.st_info >> 4
+
+class Verneed(ELFRecord):
+ DEF = (BiStruct('HHIII'), ['vn_version', 'vn_cnt', 'vn_file', 'vn_aux', 'vn_next'])
+ STRUCT = { ELFCLASS32: DEF, ELFCLASS64: DEF }
+
+ def __init__(self, data: bytes, offset: int, eh: ELFHeader) -> None:
+ super().__init__(data, offset, eh, None)
+
+ def __repr__(self) -> str:
+ return f'Verneed(vn_version={self.vn_version}, vn_cnt={self.vn_cnt}, vn_file={self.vn_file}, vn_aux={self.vn_aux}, vn_next={self.vn_next})'
+
+class Vernaux(ELFRecord):
+ DEF = (BiStruct('IHHII'), ['vna_hash', 'vna_flags', 'vna_other', 'vna_name', 'vna_next'])
+ STRUCT = { ELFCLASS32: DEF, ELFCLASS64: DEF }
+
+ def __init__(self, data: bytes, offset: int, eh: ELFHeader, strings: bytes) -> None:
+ super().__init__(data, offset, eh, None)
+ self.name = _lookup_string(strings, self.vna_name)
+
+ def __repr__(self) -> str:
+ return f'Veraux(vna_hash={self.vna_hash}, vna_flags={self.vna_flags}, vna_other={self.vna_other}, vna_name={self.vna_name}({self.name!r}), vna_next={self.vna_next})'
+
+class DynTag(ELFRecord):
+ STRUCT = {
+ ELFCLASS32: (BiStruct('II'), ['d_tag', 'd_val']),
+ ELFCLASS64: (BiStruct('QQ'), ['d_tag', 'd_val']),
+ }
+
+ def __init__(self, data: bytes, offset: int, eh: ELFHeader, section: Section) -> None:
+ super().__init__(data, offset, eh, section.sh_entsize)
+
+ def __repr__(self) -> str:
+ return f'DynTag(d_tag={self.d_tag}, d_val={self.d_val})'
+
+def _lookup_string(data: bytes, index: int) -> bytes:
+ '''Look up string by offset in ELF string table.'''
+ endx = data.find(b'\x00', index)
+ assert endx != -1
+ return data[index:endx]
+
+VERSYM_S = BiStruct('H') # .gnu_version section has a single 16-bit integer per symbol in the linked section
+def _parse_symbol_table(section: Section, strings: bytes, eh: ELFHeader, versym: bytes, verneed: Dict[int, bytes]) -> List[Symbol]:
+ '''Parse symbol table, return a list of symbols.'''
+ data = section.contents()
+ symbols = []
+ versym_iter = (verneed.get(v[0]) for v in VERSYM_S[eh.ei_data].iter_unpack(versym))
+ for ofs, version in zip(range(0, len(data), section.sh_entsize), versym_iter):
+ symbols.append(Symbol(data, ofs, eh, section, strings, version))
+ return symbols
+
+def _parse_verneed(section: Section, strings: bytes, eh: ELFHeader) -> Dict[int, bytes]:
+ '''Parse .gnu.version_r section, return a dictionary of {versym: 'GLIBC_...'}.'''
+ data = section.contents()
+ ofs = 0
+ result = {}
+ while True:
+ verneed = Verneed(data, ofs, eh)
+ aofs = verneed.vn_aux
+ while True:
+ vernaux = Vernaux(data, aofs, eh, strings)
+ result[vernaux.vna_other] = vernaux.name
+ if not vernaux.vna_next:
+ break
+ aofs += vernaux.vna_next
+
+ if not verneed.vn_next:
+ break
+ ofs += verneed.vn_next
+
+ return result
+
+def _parse_dyn_tags(section: Section, strings: bytes, eh: ELFHeader) -> List[Tuple[int, Union[bytes, int]]]:
+ '''Parse dynamic tags. Return array of tuples.'''
+ data = section.contents()
+ ofs = 0
+ result = []
+ for ofs in range(0, len(data), section.sh_entsize):
+ tag = DynTag(data, ofs, eh, section)
+ val = _lookup_string(strings, tag.d_val) if tag.d_tag in STRING_TAGS else tag.d_val
+ result.append((tag.d_tag, val))
+
+ return result
+
+class ELFFile:
+ sections: List[Section]
+ program_headers: List[ProgramHeader]
+ dyn_symbols: List[Symbol]
+ dyn_tags: List[Tuple[int, Union[bytes, int]]]
+
+ def __init__(self, data: bytes) -> None:
+ self.data = data
+ self.hdr = ELFHeader(self.data, 0)
+ self._load_sections()
+ self._load_program_headers()
+ self._load_dyn_symbols()
+ self._load_dyn_tags()
+ self._section_to_segment_mapping()
+
+ def _load_sections(self) -> None:
+ self.sections = []
+ for idx in range(self.hdr.e_shnum):
+ offset = self.hdr.e_shoff + idx * self.hdr.e_shentsize
+ self.sections.append(Section(self.data, offset, self.hdr))
+
+ shstr = self.sections[self.hdr.e_shstrndx].contents()
+ for section in self.sections:
+ section.name = _lookup_string(shstr, section.sh_name)
+
+ def _load_program_headers(self) -> None:
+ self.program_headers = []
+ for idx in range(self.hdr.e_phnum):
+ offset = self.hdr.e_phoff + idx * self.hdr.e_phentsize
+ self.program_headers.append(ProgramHeader(self.data, offset, self.hdr))
+
+ def _load_dyn_symbols(self) -> None:
+ # first, load 'verneed' section
+ verneed = None
+ for section in self.sections:
+ if section.sh_type == SHT_GNU_verneed:
+ strtab = self.sections[section.sh_link].contents() # associated string table
+ assert verneed is None # only one section of this kind please
+ verneed = _parse_verneed(section, strtab, self.hdr)
+ assert verneed is not None
+
+ # then, correlate GNU versym sections with dynamic symbol sections
+ versym = {}
+ for section in self.sections:
+ if section.sh_type == SHT_GNU_versym:
+ versym[section.sh_link] = section
+
+ # finally, load dynsym sections
+ self.dyn_symbols = []
+ for idx, section in enumerate(self.sections):
+ if section.sh_type == SHT_DYNSYM: # find dynamic symbol tables
+ strtab_data = self.sections[section.sh_link].contents() # associated string table
+ versym_data = versym[idx].contents() # associated symbol version table
+ self.dyn_symbols += _parse_symbol_table(section, strtab_data, self.hdr, versym_data, verneed)
+
+ def _load_dyn_tags(self) -> None:
+ self.dyn_tags = []
+ for idx, section in enumerate(self.sections):
+ if section.sh_type == SHT_DYNAMIC: # find dynamic tag tables
+ strtab = self.sections[section.sh_link].contents() # associated string table
+ self.dyn_tags += _parse_dyn_tags(section, strtab, self.hdr)
+
+ def _section_to_segment_mapping(self) -> None:
+ for ph in self.program_headers:
+ ph.sections = []
+ for section in self.sections:
+ if ph.p_vaddr <= section.sh_addr < (ph.p_vaddr + ph.p_memsz):
+ ph.sections.append(section)
+
+ def query_dyn_tags(self, tag_in: int) -> List[Union[int, bytes]]:
+ '''Return the values of all dyn tags with the specified tag.'''
+ return [val for (tag, val) in self.dyn_tags if tag == tag_in]
+
+
+def load(filename: str) -> ELFFile:
+ with open(filename, 'rb') as f:
+ data = f.read()
+ return ELFFile(data)
diff --git a/contrib/devtools/security-check.py b/contrib/devtools/security-check.py
index 02615edb54..a7eb7f429b 100755
--- a/contrib/devtools/security-check.py
+++ b/contrib/devtools/security-check.py
@@ -6,15 +6,15 @@
Perform basic security checks on a series of executables.
Exit status will be 0 if successful, and the program will be silent.
Otherwise the exit status will be 1 and it will log which executables failed which checks.
-Needs `readelf` (for ELF), `objdump` (for PE) and `otool` (for MACHO).
+Needs `objdump` (for PE) and `otool` (for MACHO).
'''
import subprocess
import sys
import os
-
from typing import List, Optional
-READELF_CMD = os.getenv('READELF', '/usr/bin/readelf')
+import pixie
+
OBJDUMP_CMD = os.getenv('OBJDUMP', '/usr/bin/objdump')
OTOOL_CMD = os.getenv('OTOOL', '/usr/bin/otool')
@@ -26,75 +26,20 @@ def check_ELF_PIE(executable) -> bool:
'''
Check for position independent executable (PIE), allowing for address space randomization.
'''
- stdout = run_command([READELF_CMD, '-h', '-W', executable])
-
- ok = False
- for line in stdout.splitlines():
- tokens = line.split()
- if len(line)>=2 and tokens[0] == 'Type:' and tokens[1] == 'DYN':
- ok = True
- return ok
-
-def get_ELF_program_headers(executable):
- '''Return type and flags for ELF program headers'''
- stdout = run_command([READELF_CMD, '-l', '-W', executable])
-
- in_headers = False
- headers = []
- for line in stdout.splitlines():
- if line.startswith('Program Headers:'):
- in_headers = True
- count = 0
- if line == '':
- in_headers = False
- if in_headers:
- if count == 1: # header line
- header = [x.strip() for x in line.split()]
- ofs_typ = header.index('Type')
- ofs_flags = header.index('Flg')
- # assert readelf output is what we expect
- if ofs_typ == -1 or ofs_flags == -1:
- raise ValueError('Cannot parse elfread -lW output')
- elif count > 1:
- splitline = [x.strip() for x in line.split()]
- typ = splitline[ofs_typ]
- if not typ.startswith('[R'): # skip [Requesting ...]
- splitline = [x.strip() for x in line.split()]
- flags = splitline[ofs_flags]
- # check for 'R', ' E'
- if splitline[ofs_flags + 1] == 'E':
- flags += ' E'
- headers.append((typ, flags, []))
- count += 1
-
- if line.startswith(' Section to Segment mapping:'):
- in_mapping = True
- count = 0
- if line == '':
- in_mapping = False
- if in_mapping:
- if count == 1: # header line
- ofs_segment = line.find('Segment')
- ofs_sections = line.find('Sections...')
- if ofs_segment == -1 or ofs_sections == -1:
- raise ValueError('Cannot parse elfread -lW output')
- elif count > 1:
- segment = int(line[ofs_segment:ofs_sections].strip())
- sections = line[ofs_sections:].strip().split()
- headers[segment][2].extend(sections)
- count += 1
- return headers
+ elf = pixie.load(executable)
+ return elf.hdr.e_type == pixie.ET_DYN
def check_ELF_NX(executable) -> bool:
'''
Check that no sections are writable and executable (including the stack)
'''
+ elf = pixie.load(executable)
have_wx = False
have_gnu_stack = False
- for (typ, flags, _) in get_ELF_program_headers(executable):
- if typ == 'GNU_STACK':
+ for ph in elf.program_headers:
+ if ph.p_type == pixie.PT_GNU_STACK:
have_gnu_stack = True
- if 'W' in flags and 'E' in flags: # section is both writable and executable
+ if (ph.p_flags & pixie.PF_W) != 0 and (ph.p_flags & pixie.PF_X) != 0: # section is both writable and executable
have_wx = True
return have_gnu_stack and not have_wx
@@ -104,35 +49,34 @@ def check_ELF_RELRO(executable) -> bool:
GNU_RELRO program header must exist
Dynamic section must have BIND_NOW flag
'''
+ elf = pixie.load(executable)
have_gnu_relro = False
- for (typ, flags, _) in get_ELF_program_headers(executable):
- # Note: not checking flags == 'R': here as linkers set the permission differently
+ for ph in elf.program_headers:
+ # Note: not checking p_flags == PF_R: here as linkers set the permission differently
# This does not affect security: the permission flags of the GNU_RELRO program
# header are ignored, the PT_LOAD header determines the effective permissions.
# However, the dynamic linker need to write to this area so these are RW.
# Glibc itself takes care of mprotecting this area R after relocations are finished.
# See also https://marc.info/?l=binutils&m=1498883354122353
- if typ == 'GNU_RELRO':
+ if ph.p_type == pixie.PT_GNU_RELRO:
have_gnu_relro = True
have_bindnow = False
- stdout = run_command([READELF_CMD, '-d', '-W', executable])
-
- for line in stdout.splitlines():
- tokens = line.split()
- if len(tokens)>1 and tokens[1] == '(BIND_NOW)' or (len(tokens)>2 and tokens[1] == '(FLAGS)' and 'BIND_NOW' in tokens[2:]):
+ for flags in elf.query_dyn_tags(pixie.DT_FLAGS):
+ assert isinstance(flags, int)
+ if flags & pixie.DF_BIND_NOW:
have_bindnow = True
+
return have_gnu_relro and have_bindnow
def check_ELF_Canary(executable) -> bool:
'''
Check for use of stack canary
'''
- stdout = run_command([READELF_CMD, '--dyn-syms', '-W', executable])
-
+ elf = pixie.load(executable)
ok = False
- for line in stdout.splitlines():
- if '__stack_chk_fail' in line:
+ for symbol in elf.dyn_symbols:
+ if symbol.name == b'__stack_chk_fail':
ok = True
return ok
@@ -142,48 +86,52 @@ def check_ELF_separate_code(executable):
based on their permissions. This checks for missing -Wl,-z,separate-code
and potentially other problems.
'''
+ elf = pixie.load(executable)
+ R = pixie.PF_R
+ W = pixie.PF_W
+ E = pixie.PF_X
EXPECTED_FLAGS = {
# Read + execute
- '.init': 'R E',
- '.plt': 'R E',
- '.plt.got': 'R E',
- '.plt.sec': 'R E',
- '.text': 'R E',
- '.fini': 'R E',
+ b'.init': R | E,
+ b'.plt': R | E,
+ b'.plt.got': R | E,
+ b'.plt.sec': R | E,
+ b'.text': R | E,
+ b'.fini': R | E,
# Read-only data
- '.interp': 'R',
- '.note.gnu.property': 'R',
- '.note.gnu.build-id': 'R',
- '.note.ABI-tag': 'R',
- '.gnu.hash': 'R',
- '.dynsym': 'R',
- '.dynstr': 'R',
- '.gnu.version': 'R',
- '.gnu.version_r': 'R',
- '.rela.dyn': 'R',
- '.rela.plt': 'R',
- '.rodata': 'R',
- '.eh_frame_hdr': 'R',
- '.eh_frame': 'R',
- '.qtmetadata': 'R',
- '.gcc_except_table': 'R',
- '.stapsdt.base': 'R',
+ b'.interp': R,
+ b'.note.gnu.property': R,
+ b'.note.gnu.build-id': R,
+ b'.note.ABI-tag': R,
+ b'.gnu.hash': R,
+ b'.dynsym': R,
+ b'.dynstr': R,
+ b'.gnu.version': R,
+ b'.gnu.version_r': R,
+ b'.rela.dyn': R,
+ b'.rela.plt': R,
+ b'.rodata': R,
+ b'.eh_frame_hdr': R,
+ b'.eh_frame': R,
+ b'.qtmetadata': R,
+ b'.gcc_except_table': R,
+ b'.stapsdt.base': R,
# Writable data
- '.init_array': 'RW',
- '.fini_array': 'RW',
- '.dynamic': 'RW',
- '.got': 'RW',
- '.data': 'RW',
- '.bss': 'RW',
+ b'.init_array': R | W,
+ b'.fini_array': R | W,
+ b'.dynamic': R | W,
+ b'.got': R | W,
+ b'.data': R | W,
+ b'.bss': R | W,
}
# For all LOAD program headers get mapping to the list of sections,
# and for each section, remember the flags of the associated program header.
flags_per_section = {}
- for (typ, flags, sections) in get_ELF_program_headers(executable):
- if typ == 'LOAD':
- for section in sections:
- assert(section not in flags_per_section)
- flags_per_section[section] = flags
+ for ph in elf.program_headers:
+ if ph.p_type == pixie.PT_LOAD:
+ for section in ph.sections:
+ assert(section.name not in flags_per_section)
+ flags_per_section[section.name] = ph.p_flags
# Spot-check ELF LOAD program header flags per section
# If these sections exist, check them against the expected R/W/E flags
for (section, flags) in flags_per_section.items():
@@ -236,7 +184,7 @@ def check_PE_NX(executable) -> bool:
def get_MACHO_executable_flags(executable) -> List[str]:
stdout = run_command([OTOOL_CMD, '-vh', executable])
- flags = []
+ flags: List[str] = []
for line in stdout.splitlines():
tokens = line.split()
# filter first two header lines
diff --git a/contrib/devtools/symbol-check.py b/contrib/devtools/symbol-check.py
index 6949cb7ced..6ca5076d6f 100755
--- a/contrib/devtools/symbol-check.py
+++ b/contrib/devtools/symbol-check.py
@@ -11,10 +11,11 @@ Example usage:
find ../gitian-builder/build -type f -executable | xargs python3 contrib/devtools/symbol-check.py
'''
import subprocess
-import re
import sys
import os
-from typing import List, Optional, Tuple
+from typing import List, Optional
+
+import pixie
# Debian 8 (Jessie) EOL: 2020. https://wiki.debian.org/DebianReleases#Production_Releases
#
@@ -50,7 +51,6 @@ IGNORE_EXPORTS = {
'_edata', '_end', '__end__', '_init', '__bss_start', '__bss_start__', '_bss_end__', '__bss_end__', '_fini', '_IO_stdin_used', 'stdin', 'stdout', 'stderr',
'environ', '_environ', '__environ',
}
-READELF_CMD = os.getenv('READELF', '/usr/bin/readelf')
CPPFILT_CMD = os.getenv('CPPFILT', '/usr/bin/c++filt')
OBJDUMP_CMD = os.getenv('OBJDUMP', '/usr/bin/objdump')
OTOOL_CMD = os.getenv('OTOOL', '/usr/bin/otool')
@@ -76,11 +76,11 @@ ELF_ALLOWED_LIBRARIES = {
'libdl.so.2' # programming interface to dynamic linker
}
ARCH_MIN_GLIBC_VER = {
-'80386': (2,1),
-'X86-64': (2,2,5),
-'ARM': (2,4),
-'AArch64':(2,17),
-'RISC-V': (2,27)
+pixie.EM_386: (2,1),
+pixie.EM_X86_64: (2,2,5),
+pixie.EM_ARM: (2,4),
+pixie.EM_AARCH64:(2,17),
+pixie.EM_RISCV: (2,27)
}
MACHO_ALLOWED_LIBRARIES = {
@@ -140,29 +140,6 @@ class CPPFilt(object):
self.proc.stdout.close()
self.proc.wait()
-def read_symbols(executable, imports=True) -> List[Tuple[str, str, str]]:
- '''
- Parse an ELF executable and return a list of (symbol,version, arch) tuples
- for dynamic, imported symbols.
- '''
- p = subprocess.Popen([READELF_CMD, '--dyn-syms', '-W', '-h', executable], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True)
- (stdout, stderr) = p.communicate()
- if p.returncode:
- raise IOError('Could not read symbols for {}: {}'.format(executable, stderr.strip()))
- syms = []
- for line in stdout.splitlines():
- line = line.split()
- if 'Machine:' in line:
- arch = line[-1]
- if len(line)>7 and re.match('[0-9]+:$', line[0]):
- (sym, _, version) = line[7].partition('@')
- is_import = line[6] == 'UND'
- if version.startswith('@'):
- version = version[1:]
- if is_import == imports:
- syms.append((sym, version, arch))
- return syms
-
def check_version(max_versions, version, arch) -> bool:
if '_' in version:
(lib, _, ver) = version.rpartition('_')
@@ -174,36 +151,30 @@ def check_version(max_versions, version, arch) -> bool:
return False
return ver <= max_versions[lib] or lib == 'GLIBC' and ver <= ARCH_MIN_GLIBC_VER[arch]
-def elf_read_libraries(filename) -> List[str]:
- p = subprocess.Popen([READELF_CMD, '-d', '-W', filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True)
- (stdout, stderr) = p.communicate()
- if p.returncode:
- raise IOError('Error opening file')
- libraries = []
- for line in stdout.splitlines():
- tokens = line.split()
- if len(tokens)>2 and tokens[1] == '(NEEDED)':
- match = re.match(r'^Shared library: \[(.*)\]$', ' '.join(tokens[2:]))
- if match:
- libraries.append(match.group(1))
- else:
- raise ValueError('Unparseable (NEEDED) specification')
- return libraries
-
def check_imported_symbols(filename) -> bool:
+ elf = pixie.load(filename)
cppfilt = CPPFilt()
ok = True
- for sym, version, arch in read_symbols(filename, True):
- if version and not check_version(MAX_VERSIONS, version, arch):
+
+ for symbol in elf.dyn_symbols:
+ if not symbol.is_import:
+ continue
+ sym = symbol.name.decode()
+ version = symbol.version.decode() if symbol.version is not None else None
+ if version and not check_version(MAX_VERSIONS, version, elf.hdr.e_machine):
print('{}: symbol {} from unsupported version {}'.format(filename, cppfilt(sym), version))
ok = False
return ok
def check_exported_symbols(filename) -> bool:
+ elf = pixie.load(filename)
cppfilt = CPPFilt()
ok = True
- for sym,version,arch in read_symbols(filename, False):
- if arch == 'RISC-V' or sym in IGNORE_EXPORTS:
+ for symbol in elf.dyn_symbols:
+ if not symbol.is_export:
+ continue
+ sym = symbol.name.decode()
+ if elf.hdr.e_machine == pixie.EM_RISCV or sym in IGNORE_EXPORTS:
continue
print('{}: export of symbol {} not allowed'.format(filename, cppfilt(sym)))
ok = False
@@ -211,9 +182,11 @@ def check_exported_symbols(filename) -> bool:
def check_ELF_libraries(filename) -> bool:
ok = True
- for library_name in elf_read_libraries(filename):
- if library_name not in ELF_ALLOWED_LIBRARIES:
- print('{}: NEEDED library {} is not allowed'.format(filename, library_name))
+ elf = pixie.load(filename)
+ for library_name in elf.query_dyn_tags(pixie.DT_NEEDED):
+ assert(isinstance(library_name, bytes))
+ if library_name.decode() not in ELF_ALLOWED_LIBRARIES:
+ print('{}: NEEDED library {} is not allowed'.format(filename, library_name.decode()))
ok = False
return ok
diff --git a/src/Makefile.am b/src/Makefile.am
index 67fd402603..8da754eee6 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -741,13 +741,13 @@ endif
if GLIBC_BACK_COMPAT
@echo "Checking glibc back compat..."
- $(AM_V_at) READELF=$(READELF) CPPFILT=$(CPPFILT) $(PYTHON) $(top_srcdir)/contrib/devtools/symbol-check.py $(bin_PROGRAMS)
+ $(AM_V_at) CPPFILT=$(CPPFILT) $(PYTHON) $(top_srcdir)/contrib/devtools/symbol-check.py $(bin_PROGRAMS)
endif
check-security: $(bin_PROGRAMS)
if HARDEN
@echo "Checking binary security..."
- $(AM_V_at) READELF=$(READELF) OBJDUMP=$(OBJDUMP) OTOOL=$(OTOOL) $(PYTHON) $(top_srcdir)/contrib/devtools/security-check.py $(bin_PROGRAMS)
+ $(AM_V_at) OBJDUMP=$(OBJDUMP) OTOOL=$(OTOOL) $(PYTHON) $(top_srcdir)/contrib/devtools/security-check.py $(bin_PROGRAMS)
endif
if EMBEDDED_LEVELDB