diff options
author | fanquake <fanquake@gmail.com> | 2023-05-04 12:07:26 +0100 |
---|---|---|
committer | fanquake <fanquake@gmail.com> | 2023-06-16 10:38:19 +0100 |
commit | 32e2ffc39374f61bb2435da507f285459985df9e (patch) | |
tree | 44103a701bd14b0c77163db5d557215d40842210 /src/util | |
parent | b3db18a0126bc4181d2a0880c27f45d203d06179 (diff) |
Remove the syscall sandbox
After initially being merged in #20487, it's no-longer clear that an
internal syscall sandboxing mechanism is something that Bitcoin Core
should have/maintain, especially when compared to better
maintained/supported alterantives, i.e firejail.
Note that given where it's used, the sandbox also gets dragged into the
kernel.
There is some related discussion in #24771.
This should not require any sort of deprecation, as this was only ever
an opt-in, experimental feature.
Closes #24771.
Diffstat (limited to 'src/util')
-rw-r--r-- | src/util/syscall_sandbox.cpp | 927 | ||||
-rw-r--r-- | src/util/syscall_sandbox.h | 54 |
2 files changed, 0 insertions, 981 deletions
diff --git a/src/util/syscall_sandbox.cpp b/src/util/syscall_sandbox.cpp deleted file mode 100644 index b1579bdb9c..0000000000 --- a/src/util/syscall_sandbox.cpp +++ /dev/null @@ -1,927 +0,0 @@ -// Copyright (c) 2020-2022 The Bitcoin Core developers -// Distributed under the MIT software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#if defined(HAVE_CONFIG_H) -#include <config/bitcoin-config.h> -#endif // defined(HAVE_CONFIG_H) - -#include <util/syscall_sandbox.h> - -#if defined(USE_SYSCALL_SANDBOX) -#include <array> -#include <cassert> -#include <cstdint> -#include <exception> -#include <map> -#include <new> -#include <set> -#include <string> -#include <vector> - -#include <logging.h> -#include <tinyformat.h> -#include <util/threadnames.h> - -#include <linux/audit.h> -#include <linux/filter.h> -#include <linux/seccomp.h> -#include <linux/unistd.h> -#include <signal.h> -#include <sys/prctl.h> -#include <sys/types.h> -#include <unistd.h> - -namespace { -bool g_syscall_sandbox_enabled{false}; -bool g_syscall_sandbox_log_violation_before_terminating{false}; - -#if !defined(__x86_64__) -#error Syscall sandbox is an experimental feature currently available only under Linux x86-64. -#endif // defined(__x86_64__) - -#ifndef SECCOMP_RET_KILL_PROCESS -#define SECCOMP_RET_KILL_PROCESS 0x80000000U -#endif - -// Define system call numbers for x86_64 that are referenced in the system call profile -// but not provided by the kernel headers used in the GUIX build. -// Usually, they can be found via "grep name /usr/include/x86_64-linux-gnu/asm/unistd_64.h" - -#ifndef __NR_clone3 -#define __NR_clone3 435 -#endif - -#ifndef __NR_statx -#define __NR_statx 332 -#endif - -#ifndef __NR_getrandom -#define __NR_getrandom 318 -#endif - -#ifndef __NR_membarrier -#define __NR_membarrier 324 -#endif - -#ifndef __NR_copy_file_range -#define __NR_copy_file_range 326 -#endif - -#ifndef __NR_rseq -#define __NR_rseq 334 -#endif - -// This list of syscalls in LINUX_SYSCALLS is only used to map syscall numbers to syscall names in -// order to be able to print user friendly error messages which include the syscall name in addition -// to the syscall number. -// -// Example output in case of a syscall violation where the syscall is present in LINUX_SYSCALLS: -// -// ``` -// 2021-06-09T12:34:56Z ERROR: The syscall "execve" (syscall number 59) is not allowed by the syscall sandbox in thread "msghand". Please report. -// ``` -// -// Example output in case of a syscall violation where the syscall is not present in LINUX_SYSCALLS: -// -// ``` -// 2021-06-09T12:34:56Z ERROR: The syscall "*unknown*" (syscall number 314) is not allowed by the syscall sandbox in thread "msghand". Please report. -// `` -// -// LINUX_SYSCALLS contains two types of syscalls: -// 1.) Syscalls that are present under all architectures or relevant Linux kernel versions for which -// we support the syscall sandbox feature (currently only Linux x86-64). Examples include read, -// write, open, close, etc. -// 2.) Syscalls that are present under a subset of architectures or relevant Linux kernel versions -// for which we support the syscall sandbox feature. This type of syscalls should be added to -// LINUX_SYSCALLS conditional on availability like in the following example: -// ... -// #if defined(__NR_arch_dependent_syscall) -// {__NR_arch_dependent_syscall, "arch_dependent_syscall"}, -// #endif // defined(__NR_arch_dependent_syscall) -// ... -const std::map<uint32_t, std::string> LINUX_SYSCALLS{ - {__NR_accept, "accept"}, - {__NR_accept4, "accept4"}, - {__NR_access, "access"}, - {__NR_acct, "acct"}, - {__NR_add_key, "add_key"}, - {__NR_adjtimex, "adjtimex"}, - {__NR_afs_syscall, "afs_syscall"}, - {__NR_alarm, "alarm"}, - {__NR_arch_prctl, "arch_prctl"}, - {__NR_bind, "bind"}, - {__NR_bpf, "bpf"}, - {__NR_brk, "brk"}, - {__NR_capget, "capget"}, - {__NR_capset, "capset"}, - {__NR_chdir, "chdir"}, - {__NR_chmod, "chmod"}, - {__NR_chown, "chown"}, - {__NR_chroot, "chroot"}, - {__NR_clock_adjtime, "clock_adjtime"}, - {__NR_clock_getres, "clock_getres"}, - {__NR_clock_gettime, "clock_gettime"}, - {__NR_clock_nanosleep, "clock_nanosleep"}, - {__NR_clock_settime, "clock_settime"}, - {__NR_clone, "clone"}, - {__NR_clone3, "clone3"}, - {__NR_close, "close"}, - {__NR_connect, "connect"}, - {__NR_copy_file_range, "copy_file_range"}, - {__NR_creat, "creat"}, - {__NR_create_module, "create_module"}, - {__NR_delete_module, "delete_module"}, - {__NR_dup, "dup"}, - {__NR_dup2, "dup2"}, - {__NR_dup3, "dup3"}, - {__NR_epoll_create, "epoll_create"}, - {__NR_epoll_create1, "epoll_create1"}, - {__NR_epoll_ctl, "epoll_ctl"}, - {__NR_epoll_ctl_old, "epoll_ctl_old"}, - {__NR_epoll_pwait, "epoll_pwait"}, - {__NR_epoll_wait, "epoll_wait"}, - {__NR_epoll_wait_old, "epoll_wait_old"}, - {__NR_eventfd, "eventfd"}, - {__NR_eventfd2, "eventfd2"}, - {__NR_execve, "execve"}, - {__NR_execveat, "execveat"}, - {__NR_exit, "exit"}, - {__NR_exit_group, "exit_group"}, - {__NR_faccessat, "faccessat"}, - {__NR_fadvise64, "fadvise64"}, - {__NR_fallocate, "fallocate"}, - {__NR_fanotify_init, "fanotify_init"}, - {__NR_fanotify_mark, "fanotify_mark"}, - {__NR_fchdir, "fchdir"}, - {__NR_fchmod, "fchmod"}, - {__NR_fchmodat, "fchmodat"}, - {__NR_fchown, "fchown"}, - {__NR_fchownat, "fchownat"}, - {__NR_fcntl, "fcntl"}, - {__NR_fdatasync, "fdatasync"}, - {__NR_fgetxattr, "fgetxattr"}, - {__NR_finit_module, "finit_module"}, - {__NR_flistxattr, "flistxattr"}, - {__NR_flock, "flock"}, - {__NR_fork, "fork"}, - {__NR_fremovexattr, "fremovexattr"}, - {__NR_fsetxattr, "fsetxattr"}, - {__NR_fstat, "fstat"}, - {__NR_fstatfs, "fstatfs"}, - {__NR_fsync, "fsync"}, - {__NR_ftruncate, "ftruncate"}, - {__NR_futex, "futex"}, - {__NR_futimesat, "futimesat"}, - {__NR_get_kernel_syms, "get_kernel_syms"}, - {__NR_get_mempolicy, "get_mempolicy"}, - {__NR_get_robust_list, "get_robust_list"}, - {__NR_get_thread_area, "get_thread_area"}, - {__NR_getcpu, "getcpu"}, - {__NR_getcwd, "getcwd"}, - {__NR_getdents, "getdents"}, - {__NR_getdents64, "getdents64"}, - {__NR_getegid, "getegid"}, - {__NR_geteuid, "geteuid"}, - {__NR_getgid, "getgid"}, - {__NR_getgroups, "getgroups"}, - {__NR_getitimer, "getitimer"}, - {__NR_getpeername, "getpeername"}, - {__NR_getpgid, "getpgid"}, - {__NR_getpgrp, "getpgrp"}, - {__NR_getpid, "getpid"}, - {__NR_getpmsg, "getpmsg"}, - {__NR_getppid, "getppid"}, - {__NR_getpriority, "getpriority"}, - {__NR_getrandom, "getrandom"}, - {__NR_getresgid, "getresgid"}, - {__NR_getresuid, "getresuid"}, - {__NR_getrlimit, "getrlimit"}, - {__NR_getrusage, "getrusage"}, - {__NR_getsid, "getsid"}, - {__NR_getsockname, "getsockname"}, - {__NR_getsockopt, "getsockopt"}, - {__NR_gettid, "gettid"}, - {__NR_gettimeofday, "gettimeofday"}, - {__NR_getuid, "getuid"}, - {__NR_getxattr, "getxattr"}, - {__NR_init_module, "init_module"}, - {__NR_inotify_add_watch, "inotify_add_watch"}, - {__NR_inotify_init, "inotify_init"}, - {__NR_inotify_init1, "inotify_init1"}, - {__NR_inotify_rm_watch, "inotify_rm_watch"}, - {__NR_io_cancel, "io_cancel"}, - {__NR_io_destroy, "io_destroy"}, - {__NR_io_getevents, "io_getevents"}, - {__NR_io_setup, "io_setup"}, - {__NR_io_submit, "io_submit"}, - {__NR_ioctl, "ioctl"}, - {__NR_ioperm, "ioperm"}, - {__NR_iopl, "iopl"}, - {__NR_ioprio_get, "ioprio_get"}, - {__NR_ioprio_set, "ioprio_set"}, - {__NR_kcmp, "kcmp"}, - {__NR_kexec_file_load, "kexec_file_load"}, - {__NR_kexec_load, "kexec_load"}, - {__NR_keyctl, "keyctl"}, - {__NR_kill, "kill"}, - {__NR_lchown, "lchown"}, - {__NR_lgetxattr, "lgetxattr"}, - {__NR_link, "link"}, - {__NR_linkat, "linkat"}, - {__NR_listen, "listen"}, - {__NR_listxattr, "listxattr"}, - {__NR_llistxattr, "llistxattr"}, - {__NR_lookup_dcookie, "lookup_dcookie"}, - {__NR_lremovexattr, "lremovexattr"}, - {__NR_lseek, "lseek"}, - {__NR_lsetxattr, "lsetxattr"}, - {__NR_lstat, "lstat"}, - {__NR_madvise, "madvise"}, - {__NR_mbind, "mbind"}, - {__NR_membarrier, "membarrier"}, - {__NR_memfd_create, "memfd_create"}, - {__NR_migrate_pages, "migrate_pages"}, - {__NR_mincore, "mincore"}, - {__NR_mkdir, "mkdir"}, - {__NR_mkdirat, "mkdirat"}, - {__NR_mknod, "mknod"}, - {__NR_mknodat, "mknodat"}, - {__NR_mlock, "mlock"}, - {__NR_mlock2, "mlock2"}, - {__NR_mlockall, "mlockall"}, - {__NR_mmap, "mmap"}, - {__NR_modify_ldt, "modify_ldt"}, - {__NR_mount, "mount"}, - {__NR_move_pages, "move_pages"}, - {__NR_mprotect, "mprotect"}, - {__NR_mq_getsetattr, "mq_getsetattr"}, - {__NR_mq_notify, "mq_notify"}, - {__NR_mq_open, "mq_open"}, - {__NR_mq_timedreceive, "mq_timedreceive"}, - {__NR_mq_timedsend, "mq_timedsend"}, - {__NR_mq_unlink, "mq_unlink"}, - {__NR_mremap, "mremap"}, - {__NR_msgctl, "msgctl"}, - {__NR_msgget, "msgget"}, - {__NR_msgrcv, "msgrcv"}, - {__NR_msgsnd, "msgsnd"}, - {__NR_msync, "msync"}, - {__NR_munlock, "munlock"}, - {__NR_munlockall, "munlockall"}, - {__NR_munmap, "munmap"}, - {__NR_name_to_handle_at, "name_to_handle_at"}, - {__NR_nanosleep, "nanosleep"}, - {__NR_newfstatat, "newfstatat"}, - {__NR_nfsservctl, "nfsservctl"}, - {__NR_open, "open"}, - {__NR_open_by_handle_at, "open_by_handle_at"}, - {__NR_openat, "openat"}, - {__NR_pause, "pause"}, - {__NR_perf_event_open, "perf_event_open"}, - {__NR_personality, "personality"}, - {__NR_pipe, "pipe"}, - {__NR_pipe2, "pipe2"}, - {__NR_pivot_root, "pivot_root"}, -#ifdef __NR_pkey_alloc - {__NR_pkey_alloc, "pkey_alloc"}, -#endif -#ifdef __NR_pkey_free - {__NR_pkey_free, "pkey_free"}, -#endif -#ifdef __NR_pkey_mprotect - {__NR_pkey_mprotect, "pkey_mprotect"}, -#endif - {__NR_poll, "poll"}, - {__NR_ppoll, "ppoll"}, - {__NR_prctl, "prctl"}, - {__NR_pread64, "pread64"}, - {__NR_preadv, "preadv"}, -#ifdef __NR_preadv2 - {__NR_preadv2, "preadv2"}, -#endif - {__NR_prlimit64, "prlimit64"}, - {__NR_process_vm_readv, "process_vm_readv"}, - {__NR_process_vm_writev, "process_vm_writev"}, - {__NR_pselect6, "pselect6"}, - {__NR_ptrace, "ptrace"}, - {__NR_putpmsg, "putpmsg"}, - {__NR_pwrite64, "pwrite64"}, - {__NR_pwritev, "pwritev"}, -#ifdef __NR_pwritev2 - {__NR_pwritev2, "pwritev2"}, -#endif - {__NR__sysctl, "_sysctl"}, - {__NR_query_module, "query_module"}, - {__NR_quotactl, "quotactl"}, - {__NR_read, "read"}, - {__NR_readahead, "readahead"}, - {__NR_readlink, "readlink"}, - {__NR_readlinkat, "readlinkat"}, - {__NR_readv, "readv"}, - {__NR_reboot, "reboot"}, - {__NR_recvfrom, "recvfrom"}, - {__NR_recvmmsg, "recvmmsg"}, - {__NR_recvmsg, "recvmsg"}, - {__NR_remap_file_pages, "remap_file_pages"}, - {__NR_removexattr, "removexattr"}, - {__NR_rename, "rename"}, - {__NR_renameat, "renameat"}, - {__NR_renameat2, "renameat2"}, - {__NR_request_key, "request_key"}, - {__NR_restart_syscall, "restart_syscall"}, - {__NR_rmdir, "rmdir"}, - {__NR_rseq, "rseq"}, - {__NR_rt_sigaction, "rt_sigaction"}, - {__NR_rt_sigpending, "rt_sigpending"}, - {__NR_rt_sigprocmask, "rt_sigprocmask"}, - {__NR_rt_sigqueueinfo, "rt_sigqueueinfo"}, - {__NR_rt_sigreturn, "rt_sigreturn"}, - {__NR_rt_sigsuspend, "rt_sigsuspend"}, - {__NR_rt_sigtimedwait, "rt_sigtimedwait"}, - {__NR_rt_tgsigqueueinfo, "rt_tgsigqueueinfo"}, - {__NR_sched_get_priority_max, "sched_get_priority_max"}, - {__NR_sched_get_priority_min, "sched_get_priority_min"}, - {__NR_sched_getaffinity, "sched_getaffinity"}, - {__NR_sched_getattr, "sched_getattr"}, - {__NR_sched_getparam, "sched_getparam"}, - {__NR_sched_getscheduler, "sched_getscheduler"}, - {__NR_sched_rr_get_interval, "sched_rr_get_interval"}, - {__NR_sched_setaffinity, "sched_setaffinity"}, - {__NR_sched_setattr, "sched_setattr"}, - {__NR_sched_setparam, "sched_setparam"}, - {__NR_sched_setscheduler, "sched_setscheduler"}, - {__NR_sched_yield, "sched_yield"}, - {__NR_seccomp, "seccomp"}, - {__NR_security, "security"}, - {__NR_select, "select"}, - {__NR_semctl, "semctl"}, - {__NR_semget, "semget"}, - {__NR_semop, "semop"}, - {__NR_semtimedop, "semtimedop"}, - {__NR_sendfile, "sendfile"}, - {__NR_sendmmsg, "sendmmsg"}, - {__NR_sendmsg, "sendmsg"}, - {__NR_sendto, "sendto"}, - {__NR_set_mempolicy, "set_mempolicy"}, - {__NR_set_robust_list, "set_robust_list"}, - {__NR_set_thread_area, "set_thread_area"}, - {__NR_set_tid_address, "set_tid_address"}, - {__NR_setdomainname, "setdomainname"}, - {__NR_setfsgid, "setfsgid"}, - {__NR_setfsuid, "setfsuid"}, - {__NR_setgid, "setgid"}, - {__NR_setgroups, "setgroups"}, - {__NR_sethostname, "sethostname"}, - {__NR_setitimer, "setitimer"}, - {__NR_setns, "setns"}, - {__NR_setpgid, "setpgid"}, - {__NR_setpriority, "setpriority"}, - {__NR_setregid, "setregid"}, - {__NR_setresgid, "setresgid"}, - {__NR_setresuid, "setresuid"}, - {__NR_setreuid, "setreuid"}, - {__NR_setrlimit, "setrlimit"}, - {__NR_setsid, "setsid"}, - {__NR_setsockopt, "setsockopt"}, - {__NR_settimeofday, "settimeofday"}, - {__NR_setuid, "setuid"}, - {__NR_setxattr, "setxattr"}, - {__NR_shmat, "shmat"}, - {__NR_shmctl, "shmctl"}, - {__NR_shmdt, "shmdt"}, - {__NR_shmget, "shmget"}, - {__NR_shutdown, "shutdown"}, - {__NR_sigaltstack, "sigaltstack"}, - {__NR_signalfd, "signalfd"}, - {__NR_signalfd4, "signalfd4"}, - {__NR_socket, "socket"}, - {__NR_socketpair, "socketpair"}, - {__NR_splice, "splice"}, - {__NR_stat, "stat"}, - {__NR_statfs, "statfs"}, - {__NR_statx, "statx"}, - {__NR_swapoff, "swapoff"}, - {__NR_swapon, "swapon"}, - {__NR_symlink, "symlink"}, - {__NR_symlinkat, "symlinkat"}, - {__NR_sync, "sync"}, - {__NR_sync_file_range, "sync_file_range"}, - {__NR_syncfs, "syncfs"}, - {__NR_sysfs, "sysfs"}, - {__NR_sysinfo, "sysinfo"}, - {__NR_syslog, "syslog"}, - {__NR_tee, "tee"}, - {__NR_tgkill, "tgkill"}, - {__NR_time, "time"}, - {__NR_timer_create, "timer_create"}, - {__NR_timer_delete, "timer_delete"}, - {__NR_timer_getoverrun, "timer_getoverrun"}, - {__NR_timer_gettime, "timer_gettime"}, - {__NR_timer_settime, "timer_settime"}, - {__NR_timerfd_create, "timerfd_create"}, - {__NR_timerfd_gettime, "timerfd_gettime"}, - {__NR_timerfd_settime, "timerfd_settime"}, - {__NR_times, "times"}, - {__NR_tkill, "tkill"}, - {__NR_truncate, "truncate"}, - {__NR_tuxcall, "tuxcall"}, - {__NR_umask, "umask"}, - {__NR_umount2, "umount2"}, - {__NR_uname, "uname"}, - {__NR_unlink, "unlink"}, - {__NR_unlinkat, "unlinkat"}, - {__NR_unshare, "unshare"}, - {__NR_uselib, "uselib"}, - {__NR_userfaultfd, "userfaultfd"}, - {__NR_ustat, "ustat"}, - {__NR_utime, "utime"}, - {__NR_utimensat, "utimensat"}, - {__NR_utimes, "utimes"}, - {__NR_vfork, "vfork"}, - {__NR_vhangup, "vhangup"}, - {__NR_vmsplice, "vmsplice"}, - {__NR_vserver, "vserver"}, - {__NR_wait4, "wait4"}, - {__NR_waitid, "waitid"}, - {__NR_write, "write"}, - {__NR_writev, "writev"}, -}; - -std::string GetLinuxSyscallName(uint32_t syscall_number) -{ - const auto element = LINUX_SYSCALLS.find(syscall_number); - if (element != LINUX_SYSCALLS.end()) { - return element->second; - } - return "*unknown*"; -} - -// See Linux kernel developer Kees Cook's seccomp guide at <https://outflux.net/teach-seccomp/> for -// an accessible introduction to using seccomp. -// -// This function largely follows <https://outflux.net/teach-seccomp/step-3/syscall-reporter.c> and -// <https://outflux.net/teach-seccomp/step-3/seccomp-bpf.h>. -// -// Seccomp BPF resources: -// * Seccomp BPF documentation: <https://www.kernel.org/doc/html/latest/userspace-api/seccomp_filter.html> -// * seccomp(2) manual page: <https://www.kernel.org/doc/man-pages/online/pages/man2/seccomp.2.html> -// * Seccomp BPF demo code samples: <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/samples/seccomp> -void SyscallSandboxDebugSignalHandler(int, siginfo_t* signal_info, void* void_signal_context) -{ - // The si_code field inside the siginfo_t argument that is passed to a SA_SIGINFO signal handler - // is a value indicating why the signal was sent. - // - // The following value can be placed in si_code for a SIGSYS signal: - // * SYS_SECCOMP (since Linux 3.5): Triggered by a seccomp(2) filter rule. - constexpr int32_t SYS_SECCOMP_SI_CODE{1}; - assert(signal_info->si_code == SYS_SECCOMP_SI_CODE); - - // The ucontext_t structure contains signal context information that was saved on the user-space - // stack by the kernel. - const ucontext_t* signal_context = static_cast<ucontext_t*>(void_signal_context); - assert(signal_context != nullptr); - - std::set_new_handler(std::terminate); - // Portability note: REG_RAX is Linux x86_64 specific. - const uint32_t syscall_number = static_cast<uint32_t>(signal_context->uc_mcontext.gregs[REG_RAX]); - const std::string syscall_name = GetLinuxSyscallName(syscall_number); - const std::string thread_name = !util::ThreadGetInternalName().empty() ? util::ThreadGetInternalName() : "*unnamed*"; - const std::string error_message = strprintf("ERROR: The syscall \"%s\" (syscall number %d) is not allowed by the syscall sandbox in thread \"%s\". Please report.", syscall_name, syscall_number, thread_name); - tfm::format(std::cerr, "%s\n", error_message); - LogPrintf("%s\n", error_message); - std::terminate(); -} - -// This function largely follows install_syscall_reporter from Kees Cook's seccomp guide: -// <https://outflux.net/teach-seccomp/step-3/syscall-reporter.c> -bool SetupSyscallSandboxDebugHandler() -{ - struct sigaction action = {}; - sigset_t mask; - sigemptyset(&mask); - sigaddset(&mask, SIGSYS); - action.sa_sigaction = &SyscallSandboxDebugSignalHandler; - action.sa_flags = SA_SIGINFO; - if (sigaction(SIGSYS, &action, nullptr) < 0) { - return false; - } - if (sigprocmask(SIG_UNBLOCK, &mask, nullptr)) { - return false; - } - return true; -} - -enum class SyscallSandboxAction { - KILL_PROCESS, - INVOKE_SIGNAL_HANDLER, -}; - -class SeccompPolicyBuilder -{ - std::set<uint32_t> allowed_syscalls; - -public: - SeccompPolicyBuilder() - { - // Allowed by default. - AllowAddressSpaceAccess(); - AllowEpoll(); - AllowEventFd(); - AllowFutex(); - AllowGeneralIo(); - AllowGetRandom(); - AllowGetSimpleId(); - AllowGetTime(); - AllowGlobalProcessEnvironment(); - AllowGlobalSystemStatus(); - AllowKernelInternalApi(); - AllowNetworkSocketInformation(); - AllowOperationOnExistingFileDescriptor(); - AllowPipe(); - AllowPrctl(); - AllowProcessStartOrDeath(); - AllowScheduling(); - AllowSignalHandling(); - AllowSleep(); - AllowUmask(); - } - - void AllowAddressSpaceAccess() - { - allowed_syscalls.insert(__NR_brk); // change data segment size - allowed_syscalls.insert(__NR_madvise); // give advice about use of memory - allowed_syscalls.insert(__NR_membarrier); // issue memory barriers on a set of threads - allowed_syscalls.insert(__NR_mincore); // check if virtual memory is in RAM - allowed_syscalls.insert(__NR_mlock); // lock memory - allowed_syscalls.insert(__NR_mmap); // map files or devices into memory - allowed_syscalls.insert(__NR_mprotect); // set protection on a region of memory - allowed_syscalls.insert(__NR_mremap); // remap a file in memory - allowed_syscalls.insert(__NR_munlock); // unlock memory - allowed_syscalls.insert(__NR_munmap); // unmap files or devices into memory - } - - void AllowEpoll() - { - allowed_syscalls.insert(__NR_epoll_create1); // open an epoll file descriptor - allowed_syscalls.insert(__NR_epoll_ctl); // control interface for an epoll file descriptor - allowed_syscalls.insert(__NR_epoll_pwait); // wait for an I/O event on an epoll file descriptor - allowed_syscalls.insert(__NR_epoll_wait); // wait for an I/O event on an epoll file descriptor - } - - void AllowEventFd() - { - allowed_syscalls.insert(__NR_eventfd2); // create a file descriptor for event notification - } - - void AllowFileSystem() - { - allowed_syscalls.insert(__NR_access); // check user's permissions for a file - allowed_syscalls.insert(__NR_chdir); // change working directory - allowed_syscalls.insert(__NR_chmod); // change permissions of a file - allowed_syscalls.insert(__NR_copy_file_range); // copy a range of data from one file to another - allowed_syscalls.insert(__NR_fallocate); // manipulate file space - allowed_syscalls.insert(__NR_fchmod); // change permissions of a file - allowed_syscalls.insert(__NR_fchown); // change ownership of a file - allowed_syscalls.insert(__NR_fdatasync); // synchronize a file's in-core state with storage device - allowed_syscalls.insert(__NR_flock); // apply or remove an advisory lock on an open file - allowed_syscalls.insert(__NR_fstat); // get file status - allowed_syscalls.insert(__NR_fstatfs); // get file system status - allowed_syscalls.insert(__NR_fsync); // synchronize a file's in-core state with storage device - allowed_syscalls.insert(__NR_ftruncate); // truncate a file to a specified length - allowed_syscalls.insert(__NR_getcwd); // get current working directory - allowed_syscalls.insert(__NR_getdents); // get directory entries - allowed_syscalls.insert(__NR_getdents64); // get directory entries - allowed_syscalls.insert(__NR_lstat); // get file status - allowed_syscalls.insert(__NR_mkdir); // create a directory - allowed_syscalls.insert(__NR_newfstatat); // get file status - allowed_syscalls.insert(__NR_open); // open and possibly create a file - allowed_syscalls.insert(__NR_openat); // open and possibly create a file - allowed_syscalls.insert(__NR_readlink); // read value of a symbolic link - allowed_syscalls.insert(__NR_rename); // change the name or location of a file - allowed_syscalls.insert(__NR_rmdir); // delete a directory - allowed_syscalls.insert(__NR_sendfile); // transfer data between file descriptors - allowed_syscalls.insert(__NR_stat); // get file status - allowed_syscalls.insert(__NR_statfs); // get filesystem statistics - allowed_syscalls.insert(__NR_statx); // get file status (extended) - allowed_syscalls.insert(__NR_unlink); // delete a name and possibly the file it refers to - allowed_syscalls.insert(__NR_unlinkat); // delete relative to a directory file descriptor - } - - void AllowFutex() - { - allowed_syscalls.insert(__NR_futex); // fast user-space locking - allowed_syscalls.insert(__NR_set_robust_list); // set list of robust futexes - } - - void AllowGeneralIo() - { - allowed_syscalls.insert(__NR_ioctl); // control device - allowed_syscalls.insert(__NR_lseek); // reposition read/write file offset - allowed_syscalls.insert(__NR_poll); // wait for some event on a file descriptor - allowed_syscalls.insert(__NR_ppoll); // wait for some event on a file descriptor - allowed_syscalls.insert(__NR_pread64); // read from a file descriptor at a given offset - allowed_syscalls.insert(__NR_pwrite64); // write to a file descriptor at a given offset - allowed_syscalls.insert(__NR_read); // read from a file descriptor - allowed_syscalls.insert(__NR_readv); // read data into multiple buffers - allowed_syscalls.insert(__NR_recvfrom); // receive a message from a socket - allowed_syscalls.insert(__NR_recvmsg); // receive a message from a socket - allowed_syscalls.insert(__NR_select); // synchronous I/O multiplexing - allowed_syscalls.insert(__NR_sendmmsg); // send multiple messages on a socket - allowed_syscalls.insert(__NR_sendmsg); // send a message on a socket - allowed_syscalls.insert(__NR_sendto); // send a message on a socket - allowed_syscalls.insert(__NR_write); // write to a file descriptor - allowed_syscalls.insert(__NR_writev); // write data into multiple buffers - } - - void AllowGetRandom() - { - allowed_syscalls.insert(__NR_getrandom); // obtain a series of random bytes - } - - void AllowGetSimpleId() - { - allowed_syscalls.insert(__NR_getegid); // get group identity - allowed_syscalls.insert(__NR_geteuid); // get user identity - allowed_syscalls.insert(__NR_getgid); // get group identity - allowed_syscalls.insert(__NR_getpgid); // get process group - allowed_syscalls.insert(__NR_getpid); // get process identification - allowed_syscalls.insert(__NR_getppid); // get process identification - allowed_syscalls.insert(__NR_getresgid); // get real, effective and saved group IDs - allowed_syscalls.insert(__NR_getresuid); // get real, effective and saved user IDs - allowed_syscalls.insert(__NR_getsid); // get session ID - allowed_syscalls.insert(__NR_gettid); // get thread identification - allowed_syscalls.insert(__NR_getuid); // get user identity - } - - void AllowGetTime() - { - allowed_syscalls.insert(__NR_clock_getres); // find the resolution (precision) of the specified clock - allowed_syscalls.insert(__NR_clock_gettime); // retrieve the time of the specified clock - allowed_syscalls.insert(__NR_gettimeofday); // get timeval - } - - void AllowGlobalProcessEnvironment() - { - allowed_syscalls.insert(__NR_getrlimit); // get resource limits - allowed_syscalls.insert(__NR_getrusage); // get resource usage - allowed_syscalls.insert(__NR_prlimit64); // get/set resource limits - } - - void AllowGlobalSystemStatus() - { - allowed_syscalls.insert(__NR_sysinfo); // return system information - allowed_syscalls.insert(__NR_uname); // get name and information about current kernel - } - - void AllowKernelInternalApi() - { - allowed_syscalls.insert(__NR_restart_syscall); // restart a system call after interruption by a stop signal - } - - void AllowNetwork() - { - allowed_syscalls.insert(__NR_accept); // accept a connection on a socket - allowed_syscalls.insert(__NR_accept4); // accept a connection on a socket - allowed_syscalls.insert(__NR_bind); // bind a name to a socket - allowed_syscalls.insert(__NR_connect); // initiate a connection on a socket - allowed_syscalls.insert(__NR_listen); // listen for connections on a socket - allowed_syscalls.insert(__NR_setsockopt); // set options on sockets - allowed_syscalls.insert(__NR_socket); // create an endpoint for communication - allowed_syscalls.insert(__NR_socketpair); // create a pair of connected sockets - } - - void AllowNetworkSocketInformation() - { - allowed_syscalls.insert(__NR_getpeername); // get name of connected peer socket - allowed_syscalls.insert(__NR_getsockname); // get socket name - allowed_syscalls.insert(__NR_getsockopt); // get options on sockets - } - - void AllowOperationOnExistingFileDescriptor() - { - allowed_syscalls.insert(__NR_close); // close a file descriptor - allowed_syscalls.insert(__NR_dup); // duplicate a file descriptor - allowed_syscalls.insert(__NR_dup2); // duplicate a file descriptor - allowed_syscalls.insert(__NR_fcntl); // manipulate file descriptor - allowed_syscalls.insert(__NR_shutdown); // shut down part of a full-duplex connection - } - - void AllowPipe() - { - allowed_syscalls.insert(__NR_pipe); // create pipe - allowed_syscalls.insert(__NR_pipe2); // create pipe - } - - void AllowPrctl() - { - allowed_syscalls.insert(__NR_arch_prctl); // set architecture-specific thread state - allowed_syscalls.insert(__NR_prctl); // operations on a process - } - - void AllowProcessStartOrDeath() - { - allowed_syscalls.insert(__NR_clone); // create a child process - allowed_syscalls.insert(__NR_clone3); // create a child process - allowed_syscalls.insert(__NR_exit); // terminate the calling process - allowed_syscalls.insert(__NR_exit_group); // exit all threads in a process - allowed_syscalls.insert(__NR_fork); // create a child process - allowed_syscalls.insert(__NR_tgkill); // send a signal to a thread - allowed_syscalls.insert(__NR_wait4); // wait for process to change state, BSD style - allowed_syscalls.insert(__NR_rseq); // register restartable sequence for thread - } - - void AllowScheduling() - { - allowed_syscalls.insert(__NR_sched_getaffinity); // set a thread's CPU affinity mask - allowed_syscalls.insert(__NR_sched_getparam); // get scheduling parameters - allowed_syscalls.insert(__NR_sched_getscheduler); // get scheduling policy/parameters - allowed_syscalls.insert(__NR_sched_setscheduler); // set scheduling policy/parameters - allowed_syscalls.insert(__NR_sched_yield); // yield the processor - } - - void AllowSignalHandling() - { - allowed_syscalls.insert(__NR_rt_sigaction); // examine and change a signal action - allowed_syscalls.insert(__NR_rt_sigprocmask); // examine and change blocked signals - allowed_syscalls.insert(__NR_rt_sigreturn); // return from signal handler and cleanup stack frame - allowed_syscalls.insert(__NR_sigaltstack); // set and/or get signal stack context - } - - void AllowSleep() - { - allowed_syscalls.insert(__NR_clock_nanosleep); // high-resolution sleep with specifiable clock - allowed_syscalls.insert(__NR_nanosleep); // high-resolution sleep - } - - void AllowUmask() - { - allowed_syscalls.insert(__NR_umask); // set file mode creation mask - } - - // See Linux kernel developer Kees Cook's seccomp guide at <https://outflux.net/teach-seccomp/> - // for an accessible introduction to using seccomp. - // - // This function largely follows <https://outflux.net/teach-seccomp/step-3/seccomp-bpf.h>. - std::vector<sock_filter> BuildFilter(SyscallSandboxAction default_action) - { - std::vector<sock_filter> bpf_policy; - // See VALIDATE_ARCHITECTURE in seccomp-bpf.h referenced above. - bpf_policy.push_back(BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct seccomp_data, arch))); - // Portability note: AUDIT_ARCH_X86_64 is Linux x86_64 specific. - bpf_policy.push_back(BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, AUDIT_ARCH_X86_64, 1, 0)); - bpf_policy.push_back(BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_KILL_PROCESS)); - // See EXAMINE_SYSCALL in seccomp-bpf.h referenced above. - bpf_policy.push_back(BPF_STMT(BPF_LD + BPF_W + BPF_ABS, offsetof(struct seccomp_data, nr))); - for (const uint32_t allowed_syscall : allowed_syscalls) { - // See ALLOW_SYSCALL in seccomp-bpf.h referenced above. - bpf_policy.push_back(BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, allowed_syscall, 0, 1)); - bpf_policy.push_back(BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW)); - } - switch (default_action) { - case SyscallSandboxAction::KILL_PROCESS: - // Disallow syscall and kill the process. - // - // See KILL_PROCESS in seccomp-bpf.h referenced above. - // - // Note that we're using SECCOMP_RET_KILL_PROCESS (kill the process) instead - // of SECCOMP_RET_KILL_THREAD (kill the thread). The SECCOMP_RET_KILL_PROCESS - // action was introduced in Linux 4.14. - // - // SECCOMP_RET_KILL_PROCESS: Results in the entire process exiting immediately without - // executing the system call. - // - // SECCOMP_RET_KILL_PROCESS documentation: - // <https://www.kernel.org/doc/html/latest/userspace-api/seccomp_filter.html> - bpf_policy.push_back(BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_KILL_PROCESS)); - break; - case SyscallSandboxAction::INVOKE_SIGNAL_HANDLER: - // Disallow syscall and force a SIGSYS to trigger syscall debug reporter. - // - // SECCOMP_RET_TRAP: Results in the kernel sending a SIGSYS signal to the triggering - // task without executing the system call. - // - // SECCOMP_RET_TRAP documentation: - // <https://www.kernel.org/doc/html/latest/userspace-api/seccomp_filter.html> - bpf_policy.push_back(BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_TRAP)); - break; - } - return bpf_policy; - } -}; -} // namespace - -bool SetupSyscallSandbox(bool log_syscall_violation_before_terminating) -{ - assert(!g_syscall_sandbox_enabled && "SetupSyscallSandbox(...) should only be called once."); - g_syscall_sandbox_enabled = true; - g_syscall_sandbox_log_violation_before_terminating = log_syscall_violation_before_terminating; - if (log_syscall_violation_before_terminating) { - if (!SetupSyscallSandboxDebugHandler()) { - return false; - } - } - return true; -} - -void TestDisallowedSandboxCall() -{ - // The getgroups syscall is assumed NOT to be allowed by the syscall sandbox policy. - std::array<gid_t, 1> groups; - [[maybe_unused]] int32_t ignored = getgroups(groups.size(), groups.data()); -} -#endif // defined(USE_SYSCALL_SANDBOX) - -void SetSyscallSandboxPolicy(SyscallSandboxPolicy syscall_policy) -{ -#if defined(USE_SYSCALL_SANDBOX) - if (!g_syscall_sandbox_enabled) { - return; - } - SeccompPolicyBuilder seccomp_policy_builder; - switch (syscall_policy) { - case SyscallSandboxPolicy::INITIALIZATION: // Thread: main thread (state: init) - // SyscallSandboxPolicy::INITIALIZATION is the first policy loaded. - // - // Subsequently loaded policies can reduce the abilities further, but - // abilities can never be regained. - // - // SyscallSandboxPolicy::INITIALIZATION must thus be a superset of all - // other policies. - seccomp_policy_builder.AllowFileSystem(); - seccomp_policy_builder.AllowNetwork(); - break; - case SyscallSandboxPolicy::INITIALIZATION_DNS_SEED: // Thread: dnsseed - seccomp_policy_builder.AllowFileSystem(); - seccomp_policy_builder.AllowNetwork(); - break; - case SyscallSandboxPolicy::INITIALIZATION_LOAD_BLOCKS: // Thread: loadblk - seccomp_policy_builder.AllowFileSystem(); - break; - case SyscallSandboxPolicy::INITIALIZATION_MAP_PORT: // Thread: mapport - seccomp_policy_builder.AllowFileSystem(); - seccomp_policy_builder.AllowNetwork(); - break; - case SyscallSandboxPolicy::MESSAGE_HANDLER: // Thread: msghand - seccomp_policy_builder.AllowFileSystem(); - break; - case SyscallSandboxPolicy::NET: // Thread: net - seccomp_policy_builder.AllowFileSystem(); - seccomp_policy_builder.AllowNetwork(); - break; - case SyscallSandboxPolicy::NET_ADD_CONNECTION: // Thread: addcon - seccomp_policy_builder.AllowFileSystem(); - seccomp_policy_builder.AllowNetwork(); - break; - case SyscallSandboxPolicy::NET_HTTP_SERVER: // Thread: http - seccomp_policy_builder.AllowFileSystem(); - seccomp_policy_builder.AllowNetwork(); - break; - case SyscallSandboxPolicy::NET_HTTP_SERVER_WORKER: // Thread: httpworker.<N> - seccomp_policy_builder.AllowFileSystem(); - seccomp_policy_builder.AllowNetwork(); - break; - case SyscallSandboxPolicy::NET_OPEN_CONNECTION: // Thread: opencon - seccomp_policy_builder.AllowFileSystem(); - seccomp_policy_builder.AllowNetwork(); - break; - case SyscallSandboxPolicy::SCHEDULER: // Thread: scheduler - seccomp_policy_builder.AllowFileSystem(); - break; - case SyscallSandboxPolicy::TOR_CONTROL: // Thread: torcontrol - seccomp_policy_builder.AllowFileSystem(); - seccomp_policy_builder.AllowNetwork(); - break; - case SyscallSandboxPolicy::TX_INDEX: // Thread: txindex - seccomp_policy_builder.AllowFileSystem(); - break; - case SyscallSandboxPolicy::VALIDATION_SCRIPT_CHECK: // Thread: scriptch.<N> - break; - case SyscallSandboxPolicy::SHUTOFF: // Thread: main thread (state: shutoff) - seccomp_policy_builder.AllowFileSystem(); - break; - } - - const SyscallSandboxAction default_action = g_syscall_sandbox_log_violation_before_terminating ? SyscallSandboxAction::INVOKE_SIGNAL_HANDLER : SyscallSandboxAction::KILL_PROCESS; - std::vector<sock_filter> filter = seccomp_policy_builder.BuildFilter(default_action); - const sock_fprog prog = { - .len = static_cast<uint16_t>(filter.size()), - .filter = filter.data(), - }; - // Do not allow abilities to be regained after being dropped. - // - // PR_SET_NO_NEW_PRIVS documentation: <https://www.kernel.org/doc/html/latest/userspace-api/no_new_privs.html> - if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) != 0) { - throw std::runtime_error("Syscall sandbox enforcement failed: prctl(PR_SET_NO_NEW_PRIVS)"); - } - // Install seccomp-bpf syscall filter. - // - // PR_SET_SECCOMP documentation: <https://www.kernel.org/doc/html/latest/userspace-api/seccomp_filter.html> - if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) != 0) { - throw std::runtime_error("Syscall sandbox enforcement failed: prctl(PR_SET_SECCOMP)"); - } - - const std::string thread_name = !util::ThreadGetInternalName().empty() ? util::ThreadGetInternalName() : "*unnamed*"; - LogPrint(BCLog::UTIL, "Syscall filter installed for thread \"%s\"\n", thread_name); -#endif // defined(USE_SYSCALL_SANDBOX) -} diff --git a/src/util/syscall_sandbox.h b/src/util/syscall_sandbox.h deleted file mode 100644 index 3e56ebe937..0000000000 --- a/src/util/syscall_sandbox.h +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) 2020-2022 The Bitcoin Core developers -// Distributed under the MIT software license, see the accompanying -// file COPYING or http://www.opensource.org/licenses/mit-license.php. - -#ifndef BITCOIN_UTIL_SYSCALL_SANDBOX_H -#define BITCOIN_UTIL_SYSCALL_SANDBOX_H - -enum class SyscallSandboxPolicy { - // 1. Initialization - INITIALIZATION, - INITIALIZATION_DNS_SEED, - INITIALIZATION_LOAD_BLOCKS, - INITIALIZATION_MAP_PORT, - - // 2. Steady state (non-initialization, non-shutdown) - MESSAGE_HANDLER, - NET, - NET_ADD_CONNECTION, - NET_HTTP_SERVER, - NET_HTTP_SERVER_WORKER, - NET_OPEN_CONNECTION, - SCHEDULER, - TOR_CONTROL, - TX_INDEX, - VALIDATION_SCRIPT_CHECK, - - // 3. Shutdown - SHUTOFF, -}; - -//! Force the current thread (and threads created from the current thread) into a restricted-service -//! operating mode where only a subset of all syscalls are available. -//! -//! Subsequent calls to this function can reduce the abilities further, but abilities can never be -//! regained. -//! -//! This function is a no-op unless SetupSyscallSandbox(...) has been called. -//! -//! SetupSyscallSandbox(...) is called during bitcoind initialization if Bitcoin Core was compiled -//! with seccomp-bpf support (--with-seccomp) *and* the parameter -sandbox=<mode> was passed to -//! bitcoind. -//! -//! This experimental feature is available under Linux x86_64 only. -void SetSyscallSandboxPolicy(SyscallSandboxPolicy syscall_policy); - -#if defined(USE_SYSCALL_SANDBOX) -//! Setup and enable the experimental syscall sandbox for the running process. -[[nodiscard]] bool SetupSyscallSandbox(bool log_syscall_violation_before_terminating); - -//! Invoke a disallowed syscall. Use for testing purposes. -void TestDisallowedSandboxCall(); -#endif // defined(USE_SYSCALL_SANDBOX) - -#endif // BITCOIN_UTIL_SYSCALL_SANDBOX_H |