aboutsummaryrefslogtreecommitdiff
path: root/src/libstore/platform/linux.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/libstore/platform/linux.cc')
-rw-r--r--src/libstore/platform/linux.cc633
1 files changed, 633 insertions, 0 deletions
diff --git a/src/libstore/platform/linux.cc b/src/libstore/platform/linux.cc
index 204f62b71..03b8bc0be 100644
--- a/src/libstore/platform/linux.cc
+++ b/src/libstore/platform/linux.cc
@@ -8,8 +8,23 @@
#include <grp.h>
#include <regex>
+#include <sys/prctl.h>
+
+#if HAVE_SECCOMP
+#include <linux/filter.h>
+#include <seccomp.h>
+#endif
namespace nix {
+
+namespace {
+/**
+ * The system for which Nix is compiled.
+ */
+[[gnu::unused]]
+constexpr const std::string_view nativeSystem = SYSTEM;
+}
+
static RegisterStoreImplementation<LinuxLocalStore, LocalStoreConfig> regLocalStore;
static void readProcLink(const std::string & file, UncheckedRoots & roots)
@@ -119,6 +134,617 @@ void LinuxLocalStore::findPlatformRoots(UncheckedRoots & unchecked)
readFileRoots("/proc/sys/kernel/poweroff_cmd", unchecked);
}
+#if HAVE_SECCOMP
+
+static void allowSyscall(scmp_filter_ctx ctx, int syscall) {
+ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscall, 0) != 0)
+ throw SysError("unable to add seccomp rule");
+}
+
+#define ALLOW_CHMOD_IF_SAFE(ctx, syscall, modePos) \
+ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscall, 1, SCMP_A##modePos(SCMP_CMP_MASKED_EQ, S_ISUID | S_ISGID, 0)) != 0 || \
+ seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), syscall, 1, SCMP_A##modePos(SCMP_CMP_MASKED_EQ, S_ISUID, S_ISUID)) != 0 || \
+ seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), syscall, 1, SCMP_A##modePos(SCMP_CMP_MASKED_EQ, S_ISGID, S_ISGID)) != 0) \
+ throw SysError("unable to add seccomp rule");
+
+static std::vector<struct sock_filter> compileSyscallFilter()
+{
+ scmp_filter_ctx ctx;
+
+ // Pretend that syscalls we don't yet know about don't exist.
+ // This is the best option for compatibility: after all, they did in fact not exist not too long ago.
+ if (!(ctx = seccomp_init(SCMP_ACT_ERRNO(ENOSYS))))
+ throw SysError("unable to initialize seccomp mode 2");
+
+ Finally cleanup([&]() {
+ seccomp_release(ctx);
+ });
+
+ if (nativeSystem == "x86_64-linux" &&
+ seccomp_arch_add(ctx, SCMP_ARCH_X86) != 0)
+ throw SysError("unable to add 32-bit seccomp architecture");
+
+ if (nativeSystem == "x86_64-linux" &&
+ seccomp_arch_add(ctx, SCMP_ARCH_X32) != 0)
+ throw SysError("unable to add X32 seccomp architecture");
+
+ if (nativeSystem == "aarch64-linux" &&
+ seccomp_arch_add(ctx, SCMP_ARCH_ARM) != 0)
+ printError("unable to add ARM seccomp architecture; this may result in spurious build failures if running 32-bit ARM processes");
+
+ if (nativeSystem == "mips64-linux" &&
+ seccomp_arch_add(ctx, SCMP_ARCH_MIPS) != 0)
+ printError("unable to add mips seccomp architecture");
+
+ if (nativeSystem == "mips64-linux" &&
+ seccomp_arch_add(ctx, SCMP_ARCH_MIPS64N32) != 0)
+ printError("unable to add mips64-*abin32 seccomp architecture");
+
+ if (nativeSystem == "mips64el-linux" &&
+ seccomp_arch_add(ctx, SCMP_ARCH_MIPSEL) != 0)
+ printError("unable to add mipsel seccomp architecture");
+
+ if (nativeSystem == "mips64el-linux" &&
+ seccomp_arch_add(ctx, SCMP_ARCH_MIPSEL64N32) != 0)
+ printError("unable to add mips64el-*abin32 seccomp architecture");
+
+ // This list is intended for machine consumption.
+ // Please keep its format, order and BEGIN/END markers.
+ //
+ // Currently, it is up to date with libseccomp 2.5.5 and glibc 2.39.
+ // Run check-syscalls to determine which new syscalls should be added.
+ // New syscalls must be audited and handled in a way that blocks the following dangerous operations:
+ // * Creation of non-empty setuid/setgid files
+ // * Creation of extended attributes (including ACLs)
+ //
+ // BEGIN extract-syscalls
+ allowSyscall(ctx, SCMP_SYS(accept));
+ allowSyscall(ctx, SCMP_SYS(accept4));
+ allowSyscall(ctx, SCMP_SYS(access));
+ allowSyscall(ctx, SCMP_SYS(acct));
+ allowSyscall(ctx, SCMP_SYS(add_key));
+ allowSyscall(ctx, SCMP_SYS(adjtimex));
+ allowSyscall(ctx, SCMP_SYS(afs_syscall));
+ allowSyscall(ctx, SCMP_SYS(alarm));
+ allowSyscall(ctx, SCMP_SYS(arch_prctl));
+ allowSyscall(ctx, SCMP_SYS(arm_fadvise64_64));
+ allowSyscall(ctx, SCMP_SYS(arm_sync_file_range));
+ allowSyscall(ctx, SCMP_SYS(bdflush));
+ allowSyscall(ctx, SCMP_SYS(bind));
+ allowSyscall(ctx, SCMP_SYS(bpf));
+ allowSyscall(ctx, SCMP_SYS(break));
+ allowSyscall(ctx, SCMP_SYS(breakpoint));
+ allowSyscall(ctx, SCMP_SYS(brk));
+ allowSyscall(ctx, SCMP_SYS(cachectl));
+ allowSyscall(ctx, SCMP_SYS(cacheflush));
+ allowSyscall(ctx, SCMP_SYS(cachestat));
+ allowSyscall(ctx, SCMP_SYS(capget));
+ allowSyscall(ctx, SCMP_SYS(capset));
+ allowSyscall(ctx, SCMP_SYS(chdir));
+ // skip chmod (dangerous)
+ allowSyscall(ctx, SCMP_SYS(chown));
+ allowSyscall(ctx, SCMP_SYS(chown32));
+ allowSyscall(ctx, SCMP_SYS(chroot));
+ allowSyscall(ctx, SCMP_SYS(clock_adjtime));
+ allowSyscall(ctx, SCMP_SYS(clock_adjtime64));
+ allowSyscall(ctx, SCMP_SYS(clock_getres));
+ allowSyscall(ctx, SCMP_SYS(clock_getres_time64));
+ allowSyscall(ctx, SCMP_SYS(clock_gettime));
+ allowSyscall(ctx, SCMP_SYS(clock_gettime64));
+ allowSyscall(ctx, SCMP_SYS(clock_nanosleep));
+ allowSyscall(ctx, SCMP_SYS(clock_nanosleep_time64));
+ allowSyscall(ctx, SCMP_SYS(clock_settime));
+ allowSyscall(ctx, SCMP_SYS(clock_settime64));
+ allowSyscall(ctx, SCMP_SYS(clone));
+ allowSyscall(ctx, SCMP_SYS(clone3));
+ allowSyscall(ctx, SCMP_SYS(close));
+ allowSyscall(ctx, SCMP_SYS(close_range));
+ allowSyscall(ctx, SCMP_SYS(connect));
+ allowSyscall(ctx, SCMP_SYS(copy_file_range));
+ allowSyscall(ctx, SCMP_SYS(creat));
+ allowSyscall(ctx, SCMP_SYS(create_module));
+ allowSyscall(ctx, SCMP_SYS(delete_module));
+ allowSyscall(ctx, SCMP_SYS(dup));
+ allowSyscall(ctx, SCMP_SYS(dup2));
+ allowSyscall(ctx, SCMP_SYS(dup3));
+ allowSyscall(ctx, SCMP_SYS(epoll_create));
+ allowSyscall(ctx, SCMP_SYS(epoll_create1));
+ allowSyscall(ctx, SCMP_SYS(epoll_ctl));
+ allowSyscall(ctx, SCMP_SYS(epoll_ctl_old));
+ allowSyscall(ctx, SCMP_SYS(epoll_pwait));
+ allowSyscall(ctx, SCMP_SYS(epoll_pwait2));
+ allowSyscall(ctx, SCMP_SYS(epoll_wait));
+ allowSyscall(ctx, SCMP_SYS(epoll_wait_old));
+ allowSyscall(ctx, SCMP_SYS(eventfd));
+ allowSyscall(ctx, SCMP_SYS(eventfd2));
+ allowSyscall(ctx, SCMP_SYS(execve));
+ allowSyscall(ctx, SCMP_SYS(execveat));
+ allowSyscall(ctx, SCMP_SYS(exit));
+ allowSyscall(ctx, SCMP_SYS(exit_group));
+ allowSyscall(ctx, SCMP_SYS(faccessat));
+ allowSyscall(ctx, SCMP_SYS(faccessat2));
+ allowSyscall(ctx, SCMP_SYS(fadvise64));
+ allowSyscall(ctx, SCMP_SYS(fadvise64_64));
+ allowSyscall(ctx, SCMP_SYS(fallocate));
+ allowSyscall(ctx, SCMP_SYS(fanotify_init));
+ allowSyscall(ctx, SCMP_SYS(fanotify_mark));
+ allowSyscall(ctx, SCMP_SYS(fchdir));
+ // skip fchmod (dangerous)
+ // skip fchmodat (dangerous)
+ // skip fchmodat2 (dangerous)
+ allowSyscall(ctx, SCMP_SYS(fchown));
+ allowSyscall(ctx, SCMP_SYS(fchown32));
+ allowSyscall(ctx, SCMP_SYS(fchownat));
+ allowSyscall(ctx, SCMP_SYS(fcntl));
+ allowSyscall(ctx, SCMP_SYS(fcntl64));
+ allowSyscall(ctx, SCMP_SYS(fdatasync));
+ allowSyscall(ctx, SCMP_SYS(fgetxattr));
+ allowSyscall(ctx, SCMP_SYS(finit_module));
+ allowSyscall(ctx, SCMP_SYS(flistxattr));
+ allowSyscall(ctx, SCMP_SYS(flock));
+ allowSyscall(ctx, SCMP_SYS(fork));
+ allowSyscall(ctx, SCMP_SYS(fremovexattr));
+ allowSyscall(ctx, SCMP_SYS(fsconfig));
+ // skip fsetxattr (dangerous)
+ allowSyscall(ctx, SCMP_SYS(fsmount));
+ allowSyscall(ctx, SCMP_SYS(fsopen));
+ allowSyscall(ctx, SCMP_SYS(fspick));
+ allowSyscall(ctx, SCMP_SYS(fstat));
+ allowSyscall(ctx, SCMP_SYS(fstat64));
+ allowSyscall(ctx, SCMP_SYS(fstatat64));
+ allowSyscall(ctx, SCMP_SYS(fstatfs));
+ allowSyscall(ctx, SCMP_SYS(fstatfs64));
+ allowSyscall(ctx, SCMP_SYS(fsync));
+ allowSyscall(ctx, SCMP_SYS(ftime));
+ allowSyscall(ctx, SCMP_SYS(ftruncate));
+ allowSyscall(ctx, SCMP_SYS(ftruncate64));
+ allowSyscall(ctx, SCMP_SYS(futex));
+ allowSyscall(ctx, SCMP_SYS(futex_requeue));
+ allowSyscall(ctx, SCMP_SYS(futex_time64));
+ allowSyscall(ctx, SCMP_SYS(futex_wait));
+ allowSyscall(ctx, SCMP_SYS(futex_waitv));
+ allowSyscall(ctx, SCMP_SYS(futex_wake));
+ allowSyscall(ctx, SCMP_SYS(futimesat));
+ allowSyscall(ctx, SCMP_SYS(getcpu));
+ allowSyscall(ctx, SCMP_SYS(getcwd));
+ allowSyscall(ctx, SCMP_SYS(getdents));
+ allowSyscall(ctx, SCMP_SYS(getdents64));
+ allowSyscall(ctx, SCMP_SYS(getegid));
+ allowSyscall(ctx, SCMP_SYS(getegid32));
+ allowSyscall(ctx, SCMP_SYS(geteuid));
+ allowSyscall(ctx, SCMP_SYS(geteuid32));
+ allowSyscall(ctx, SCMP_SYS(getgid));
+ allowSyscall(ctx, SCMP_SYS(getgid32));
+ allowSyscall(ctx, SCMP_SYS(getgroups));
+ allowSyscall(ctx, SCMP_SYS(getgroups32));
+ allowSyscall(ctx, SCMP_SYS(getitimer));
+ allowSyscall(ctx, SCMP_SYS(get_kernel_syms));
+ allowSyscall(ctx, SCMP_SYS(get_mempolicy));
+ allowSyscall(ctx, SCMP_SYS(getpeername));
+ allowSyscall(ctx, SCMP_SYS(getpgid));
+ allowSyscall(ctx, SCMP_SYS(getpgrp));
+ allowSyscall(ctx, SCMP_SYS(getpid));
+ allowSyscall(ctx, SCMP_SYS(getpmsg));
+ allowSyscall(ctx, SCMP_SYS(getppid));
+ allowSyscall(ctx, SCMP_SYS(getpriority));
+ allowSyscall(ctx, SCMP_SYS(getrandom));
+ allowSyscall(ctx, SCMP_SYS(getresgid));
+ allowSyscall(ctx, SCMP_SYS(getresgid32));
+ allowSyscall(ctx, SCMP_SYS(getresuid));
+ allowSyscall(ctx, SCMP_SYS(getresuid32));
+ allowSyscall(ctx, SCMP_SYS(getrlimit));
+ allowSyscall(ctx, SCMP_SYS(get_robust_list));
+ allowSyscall(ctx, SCMP_SYS(getrusage));
+ allowSyscall(ctx, SCMP_SYS(getsid));
+ allowSyscall(ctx, SCMP_SYS(getsockname));
+ allowSyscall(ctx, SCMP_SYS(getsockopt));
+ allowSyscall(ctx, SCMP_SYS(get_thread_area));
+ allowSyscall(ctx, SCMP_SYS(gettid));
+ allowSyscall(ctx, SCMP_SYS(gettimeofday));
+ allowSyscall(ctx, SCMP_SYS(get_tls));
+ allowSyscall(ctx, SCMP_SYS(getuid));
+ allowSyscall(ctx, SCMP_SYS(getuid32));
+ allowSyscall(ctx, SCMP_SYS(getxattr));
+ allowSyscall(ctx, SCMP_SYS(gtty));
+ allowSyscall(ctx, SCMP_SYS(idle));
+ allowSyscall(ctx, SCMP_SYS(init_module));
+ allowSyscall(ctx, SCMP_SYS(inotify_add_watch));
+ allowSyscall(ctx, SCMP_SYS(inotify_init));
+ allowSyscall(ctx, SCMP_SYS(inotify_init1));
+ allowSyscall(ctx, SCMP_SYS(inotify_rm_watch));
+ allowSyscall(ctx, SCMP_SYS(io_cancel));
+ allowSyscall(ctx, SCMP_SYS(ioctl));
+ allowSyscall(ctx, SCMP_SYS(io_destroy));
+ allowSyscall(ctx, SCMP_SYS(io_getevents));
+ allowSyscall(ctx, SCMP_SYS(ioperm));
+ allowSyscall(ctx, SCMP_SYS(io_pgetevents));
+ allowSyscall(ctx, SCMP_SYS(io_pgetevents_time64));
+ allowSyscall(ctx, SCMP_SYS(iopl));
+ allowSyscall(ctx, SCMP_SYS(ioprio_get));
+ allowSyscall(ctx, SCMP_SYS(ioprio_set));
+ allowSyscall(ctx, SCMP_SYS(io_setup));
+ allowSyscall(ctx, SCMP_SYS(io_submit));
+ // skip io_uring_enter (may become dangerous)
+ // skip io_uring_register (may become dangerous)
+ // skip io_uring_setup (may become dangerous)
+ allowSyscall(ctx, SCMP_SYS(ipc));
+ allowSyscall(ctx, SCMP_SYS(kcmp));
+ allowSyscall(ctx, SCMP_SYS(kexec_file_load));
+ allowSyscall(ctx, SCMP_SYS(kexec_load));
+ allowSyscall(ctx, SCMP_SYS(keyctl));
+ allowSyscall(ctx, SCMP_SYS(kill));
+ allowSyscall(ctx, SCMP_SYS(landlock_add_rule));
+ allowSyscall(ctx, SCMP_SYS(landlock_create_ruleset));
+ allowSyscall(ctx, SCMP_SYS(landlock_restrict_self));
+ allowSyscall(ctx, SCMP_SYS(lchown));
+ allowSyscall(ctx, SCMP_SYS(lchown32));
+ allowSyscall(ctx, SCMP_SYS(lgetxattr));
+ allowSyscall(ctx, SCMP_SYS(link));
+ allowSyscall(ctx, SCMP_SYS(linkat));
+ allowSyscall(ctx, SCMP_SYS(listen));
+ allowSyscall(ctx, SCMP_SYS(listxattr));
+ allowSyscall(ctx, SCMP_SYS(llistxattr));
+ allowSyscall(ctx, SCMP_SYS(_llseek));
+ allowSyscall(ctx, SCMP_SYS(lock));
+ allowSyscall(ctx, SCMP_SYS(lookup_dcookie));
+ allowSyscall(ctx, SCMP_SYS(lremovexattr));
+ allowSyscall(ctx, SCMP_SYS(lseek));
+ // skip lsetxattr (dangerous)
+ allowSyscall(ctx, SCMP_SYS(lstat));
+ allowSyscall(ctx, SCMP_SYS(lstat64));
+ allowSyscall(ctx, SCMP_SYS(madvise));
+ allowSyscall(ctx, SCMP_SYS(map_shadow_stack));
+ allowSyscall(ctx, SCMP_SYS(mbind));
+ allowSyscall(ctx, SCMP_SYS(membarrier));
+ allowSyscall(ctx, SCMP_SYS(memfd_create));
+ allowSyscall(ctx, SCMP_SYS(memfd_secret));
+ allowSyscall(ctx, SCMP_SYS(migrate_pages));
+ allowSyscall(ctx, SCMP_SYS(mincore));
+ allowSyscall(ctx, SCMP_SYS(mkdir));
+ allowSyscall(ctx, SCMP_SYS(mkdirat));
+ allowSyscall(ctx, SCMP_SYS(mknod));
+ allowSyscall(ctx, SCMP_SYS(mknodat));
+ allowSyscall(ctx, SCMP_SYS(mlock));
+ allowSyscall(ctx, SCMP_SYS(mlock2));
+ allowSyscall(ctx, SCMP_SYS(mlockall));
+ allowSyscall(ctx, SCMP_SYS(mmap));
+ allowSyscall(ctx, SCMP_SYS(mmap2));
+ allowSyscall(ctx, SCMP_SYS(modify_ldt));
+ allowSyscall(ctx, SCMP_SYS(mount));
+ allowSyscall(ctx, SCMP_SYS(mount_setattr));
+ allowSyscall(ctx, SCMP_SYS(move_mount));
+ allowSyscall(ctx, SCMP_SYS(move_pages));
+ allowSyscall(ctx, SCMP_SYS(mprotect));
+ allowSyscall(ctx, SCMP_SYS(mpx));
+ allowSyscall(ctx, SCMP_SYS(mq_getsetattr));
+ allowSyscall(ctx, SCMP_SYS(mq_notify));
+ allowSyscall(ctx, SCMP_SYS(mq_open));
+ allowSyscall(ctx, SCMP_SYS(mq_timedreceive));
+ allowSyscall(ctx, SCMP_SYS(mq_timedreceive_time64));
+ allowSyscall(ctx, SCMP_SYS(mq_timedsend));
+ allowSyscall(ctx, SCMP_SYS(mq_timedsend_time64));
+ allowSyscall(ctx, SCMP_SYS(mq_unlink));
+ allowSyscall(ctx, SCMP_SYS(mremap));
+ allowSyscall(ctx, SCMP_SYS(msgctl));
+ allowSyscall(ctx, SCMP_SYS(msgget));
+ allowSyscall(ctx, SCMP_SYS(msgrcv));
+ allowSyscall(ctx, SCMP_SYS(msgsnd));
+ allowSyscall(ctx, SCMP_SYS(msync));
+ allowSyscall(ctx, SCMP_SYS(multiplexer));
+ allowSyscall(ctx, SCMP_SYS(munlock));
+ allowSyscall(ctx, SCMP_SYS(munlockall));
+ allowSyscall(ctx, SCMP_SYS(munmap));
+ allowSyscall(ctx, SCMP_SYS(name_to_handle_at));
+ allowSyscall(ctx, SCMP_SYS(nanosleep));
+ allowSyscall(ctx, SCMP_SYS(newfstatat));
+ allowSyscall(ctx, SCMP_SYS(_newselect));
+ allowSyscall(ctx, SCMP_SYS(nfsservctl));
+ allowSyscall(ctx, SCMP_SYS(nice));
+ allowSyscall(ctx, SCMP_SYS(oldfstat));
+ allowSyscall(ctx, SCMP_SYS(oldlstat));
+ allowSyscall(ctx, SCMP_SYS(oldolduname));
+ allowSyscall(ctx, SCMP_SYS(oldstat));
+ allowSyscall(ctx, SCMP_SYS(olduname));
+ allowSyscall(ctx, SCMP_SYS(open));
+ allowSyscall(ctx, SCMP_SYS(openat));
+ allowSyscall(ctx, SCMP_SYS(openat2));
+ allowSyscall(ctx, SCMP_SYS(open_by_handle_at));
+ allowSyscall(ctx, SCMP_SYS(open_tree));
+ allowSyscall(ctx, SCMP_SYS(pause));
+ allowSyscall(ctx, SCMP_SYS(pciconfig_iobase));
+ allowSyscall(ctx, SCMP_SYS(pciconfig_read));
+ allowSyscall(ctx, SCMP_SYS(pciconfig_write));
+ allowSyscall(ctx, SCMP_SYS(perf_event_open));
+ allowSyscall(ctx, SCMP_SYS(personality));
+ allowSyscall(ctx, SCMP_SYS(pidfd_getfd));
+ allowSyscall(ctx, SCMP_SYS(pidfd_open));
+ allowSyscall(ctx, SCMP_SYS(pidfd_send_signal));
+ allowSyscall(ctx, SCMP_SYS(pipe));
+ allowSyscall(ctx, SCMP_SYS(pipe2));
+ allowSyscall(ctx, SCMP_SYS(pivot_root));
+ allowSyscall(ctx, SCMP_SYS(pkey_alloc));
+ allowSyscall(ctx, SCMP_SYS(pkey_free));
+ allowSyscall(ctx, SCMP_SYS(pkey_mprotect));
+ allowSyscall(ctx, SCMP_SYS(poll));
+ allowSyscall(ctx, SCMP_SYS(ppoll));
+ allowSyscall(ctx, SCMP_SYS(ppoll_time64));
+ allowSyscall(ctx, SCMP_SYS(prctl));
+ allowSyscall(ctx, SCMP_SYS(pread64));
+ allowSyscall(ctx, SCMP_SYS(preadv));
+ allowSyscall(ctx, SCMP_SYS(preadv2));
+ allowSyscall(ctx, SCMP_SYS(prlimit64));
+ allowSyscall(ctx, SCMP_SYS(process_madvise));
+ allowSyscall(ctx, SCMP_SYS(process_mrelease));
+ allowSyscall(ctx, SCMP_SYS(process_vm_readv));
+ allowSyscall(ctx, SCMP_SYS(process_vm_writev));
+ allowSyscall(ctx, SCMP_SYS(prof));
+ allowSyscall(ctx, SCMP_SYS(profil));
+ allowSyscall(ctx, SCMP_SYS(pselect6));
+ allowSyscall(ctx, SCMP_SYS(pselect6_time64));
+ allowSyscall(ctx, SCMP_SYS(ptrace));
+ allowSyscall(ctx, SCMP_SYS(putpmsg));
+ allowSyscall(ctx, SCMP_SYS(pwrite64));
+ allowSyscall(ctx, SCMP_SYS(pwritev));
+ allowSyscall(ctx, SCMP_SYS(pwritev2));
+ allowSyscall(ctx, SCMP_SYS(query_module));
+ allowSyscall(ctx, SCMP_SYS(quotactl));
+ allowSyscall(ctx, SCMP_SYS(quotactl_fd));
+ allowSyscall(ctx, SCMP_SYS(read));
+ allowSyscall(ctx, SCMP_SYS(readahead));
+ allowSyscall(ctx, SCMP_SYS(readdir));
+ allowSyscall(ctx, SCMP_SYS(readlink));
+ allowSyscall(ctx, SCMP_SYS(readlinkat));
+ allowSyscall(ctx, SCMP_SYS(readv));
+ allowSyscall(ctx, SCMP_SYS(reboot));
+ allowSyscall(ctx, SCMP_SYS(recv));
+ allowSyscall(ctx, SCMP_SYS(recvfrom));
+ allowSyscall(ctx, SCMP_SYS(recvmmsg));
+ allowSyscall(ctx, SCMP_SYS(recvmmsg_time64));
+ allowSyscall(ctx, SCMP_SYS(recvmsg));
+ allowSyscall(ctx, SCMP_SYS(remap_file_pages));
+ allowSyscall(ctx, SCMP_SYS(removexattr));
+ allowSyscall(ctx, SCMP_SYS(rename));
+ allowSyscall(ctx, SCMP_SYS(renameat));
+ allowSyscall(ctx, SCMP_SYS(renameat2));
+ allowSyscall(ctx, SCMP_SYS(request_key));
+ allowSyscall(ctx, SCMP_SYS(restart_syscall));
+ allowSyscall(ctx, SCMP_SYS(riscv_flush_icache));
+ allowSyscall(ctx, SCMP_SYS(rmdir));
+ allowSyscall(ctx, SCMP_SYS(rseq));
+ allowSyscall(ctx, SCMP_SYS(rtas));
+ allowSyscall(ctx, SCMP_SYS(rt_sigaction));
+ allowSyscall(ctx, SCMP_SYS(rt_sigpending));
+ allowSyscall(ctx, SCMP_SYS(rt_sigprocmask));
+ allowSyscall(ctx, SCMP_SYS(rt_sigqueueinfo));
+ allowSyscall(ctx, SCMP_SYS(rt_sigreturn));
+ allowSyscall(ctx, SCMP_SYS(rt_sigsuspend));
+ allowSyscall(ctx, SCMP_SYS(rt_sigtimedwait));
+ allowSyscall(ctx, SCMP_SYS(rt_sigtimedwait_time64));
+ allowSyscall(ctx, SCMP_SYS(rt_tgsigqueueinfo));
+ allowSyscall(ctx, SCMP_SYS(s390_guarded_storage));
+ allowSyscall(ctx, SCMP_SYS(s390_pci_mmio_read));
+ allowSyscall(ctx, SCMP_SYS(s390_pci_mmio_write));
+ allowSyscall(ctx, SCMP_SYS(s390_runtime_instr));
+ allowSyscall(ctx, SCMP_SYS(s390_sthyi));
+ allowSyscall(ctx, SCMP_SYS(sched_getaffinity));
+ allowSyscall(ctx, SCMP_SYS(sched_getattr));
+ allowSyscall(ctx, SCMP_SYS(sched_getparam));
+ allowSyscall(ctx, SCMP_SYS(sched_get_priority_max));
+ allowSyscall(ctx, SCMP_SYS(sched_get_priority_min));
+ allowSyscall(ctx, SCMP_SYS(sched_getscheduler));
+ allowSyscall(ctx, SCMP_SYS(sched_rr_get_interval));
+ allowSyscall(ctx, SCMP_SYS(sched_rr_get_interval_time64));
+ allowSyscall(ctx, SCMP_SYS(sched_setaffinity));
+ allowSyscall(ctx, SCMP_SYS(sched_setattr));
+ allowSyscall(ctx, SCMP_SYS(sched_setparam));
+ allowSyscall(ctx, SCMP_SYS(sched_setscheduler));
+ allowSyscall(ctx, SCMP_SYS(sched_yield));
+ allowSyscall(ctx, SCMP_SYS(seccomp));
+ allowSyscall(ctx, SCMP_SYS(security));
+ allowSyscall(ctx, SCMP_SYS(select));
+ allowSyscall(ctx, SCMP_SYS(semctl));
+ allowSyscall(ctx, SCMP_SYS(semget));
+ allowSyscall(ctx, SCMP_SYS(semop));
+ allowSyscall(ctx, SCMP_SYS(semtimedop));
+ allowSyscall(ctx, SCMP_SYS(semtimedop_time64));
+ allowSyscall(ctx, SCMP_SYS(send));
+ allowSyscall(ctx, SCMP_SYS(sendfile));
+ allowSyscall(ctx, SCMP_SYS(sendfile64));
+ allowSyscall(ctx, SCMP_SYS(sendmmsg));
+ allowSyscall(ctx, SCMP_SYS(sendmsg));
+ allowSyscall(ctx, SCMP_SYS(sendto));
+ allowSyscall(ctx, SCMP_SYS(setdomainname));
+ allowSyscall(ctx, SCMP_SYS(setfsgid));
+ allowSyscall(ctx, SCMP_SYS(setfsgid32));
+ allowSyscall(ctx, SCMP_SYS(setfsuid));
+ allowSyscall(ctx, SCMP_SYS(setfsuid32));
+ allowSyscall(ctx, SCMP_SYS(setgid));
+ allowSyscall(ctx, SCMP_SYS(setgid32));
+ allowSyscall(ctx, SCMP_SYS(setgroups));
+ allowSyscall(ctx, SCMP_SYS(setgroups32));
+ allowSyscall(ctx, SCMP_SYS(sethostname));
+ allowSyscall(ctx, SCMP_SYS(setitimer));
+ allowSyscall(ctx, SCMP_SYS(set_mempolicy));
+ allowSyscall(ctx, SCMP_SYS(set_mempolicy_home_node));
+ allowSyscall(ctx, SCMP_SYS(setns));
+ allowSyscall(ctx, SCMP_SYS(setpgid));
+ allowSyscall(ctx, SCMP_SYS(setpriority));
+ allowSyscall(ctx, SCMP_SYS(setregid));
+ allowSyscall(ctx, SCMP_SYS(setregid32));
+ allowSyscall(ctx, SCMP_SYS(setresgid));
+ allowSyscall(ctx, SCMP_SYS(setresgid32));
+ allowSyscall(ctx, SCMP_SYS(setresuid));
+ allowSyscall(ctx, SCMP_SYS(setresuid32));
+ allowSyscall(ctx, SCMP_SYS(setreuid));
+ allowSyscall(ctx, SCMP_SYS(setreuid32));
+ allowSyscall(ctx, SCMP_SYS(setrlimit));
+ allowSyscall(ctx, SCMP_SYS(set_robust_list));
+ allowSyscall(ctx, SCMP_SYS(setsid));
+ allowSyscall(ctx, SCMP_SYS(setsockopt));
+ allowSyscall(ctx, SCMP_SYS(set_thread_area));
+ allowSyscall(ctx, SCMP_SYS(set_tid_address));
+ allowSyscall(ctx, SCMP_SYS(settimeofday));
+ allowSyscall(ctx, SCMP_SYS(set_tls));
+ allowSyscall(ctx, SCMP_SYS(setuid));
+ allowSyscall(ctx, SCMP_SYS(setuid32));
+ // skip setxattr (dangerous)
+ allowSyscall(ctx, SCMP_SYS(sgetmask));
+ allowSyscall(ctx, SCMP_SYS(shmat));
+ allowSyscall(ctx, SCMP_SYS(shmctl));
+ allowSyscall(ctx, SCMP_SYS(shmdt));
+ allowSyscall(ctx, SCMP_SYS(shmget));
+ allowSyscall(ctx, SCMP_SYS(shutdown));
+ allowSyscall(ctx, SCMP_SYS(sigaction));
+ allowSyscall(ctx, SCMP_SYS(sigaltstack));
+ allowSyscall(ctx, SCMP_SYS(signal));
+ allowSyscall(ctx, SCMP_SYS(signalfd));
+ allowSyscall(ctx, SCMP_SYS(signalfd4));
+ allowSyscall(ctx, SCMP_SYS(sigpending));
+ allowSyscall(ctx, SCMP_SYS(sigprocmask));
+ allowSyscall(ctx, SCMP_SYS(sigreturn));
+ allowSyscall(ctx, SCMP_SYS(sigsuspend));
+ allowSyscall(ctx, SCMP_SYS(socket));
+ allowSyscall(ctx, SCMP_SYS(socketcall));
+ allowSyscall(ctx, SCMP_SYS(socketpair));
+ allowSyscall(ctx, SCMP_SYS(splice));
+ allowSyscall(ctx, SCMP_SYS(spu_create));
+ allowSyscall(ctx, SCMP_SYS(spu_run));
+ allowSyscall(ctx, SCMP_SYS(ssetmask));
+ allowSyscall(ctx, SCMP_SYS(stat));
+ allowSyscall(ctx, SCMP_SYS(stat64));
+ allowSyscall(ctx, SCMP_SYS(statfs));
+ allowSyscall(ctx, SCMP_SYS(statfs64));
+ allowSyscall(ctx, SCMP_SYS(statx));
+ allowSyscall(ctx, SCMP_SYS(stime));
+ allowSyscall(ctx, SCMP_SYS(stty));
+ allowSyscall(ctx, SCMP_SYS(subpage_prot));
+ allowSyscall(ctx, SCMP_SYS(swapcontext));
+ allowSyscall(ctx, SCMP_SYS(swapoff));
+ allowSyscall(ctx, SCMP_SYS(swapon));
+ allowSyscall(ctx, SCMP_SYS(switch_endian));
+ allowSyscall(ctx, SCMP_SYS(symlink));
+ allowSyscall(ctx, SCMP_SYS(symlinkat));
+ allowSyscall(ctx, SCMP_SYS(sync));
+ allowSyscall(ctx, SCMP_SYS(sync_file_range));
+ allowSyscall(ctx, SCMP_SYS(sync_file_range2));
+ allowSyscall(ctx, SCMP_SYS(syncfs));
+ allowSyscall(ctx, SCMP_SYS(syscall));
+ allowSyscall(ctx, SCMP_SYS(_sysctl));
+ allowSyscall(ctx, SCMP_SYS(sys_debug_setcontext));
+ allowSyscall(ctx, SCMP_SYS(sysfs));
+ allowSyscall(ctx, SCMP_SYS(sysinfo));
+ allowSyscall(ctx, SCMP_SYS(syslog));
+ allowSyscall(ctx, SCMP_SYS(sysmips));
+ allowSyscall(ctx, SCMP_SYS(tee));
+ allowSyscall(ctx, SCMP_SYS(tgkill));
+ allowSyscall(ctx, SCMP_SYS(time));
+ allowSyscall(ctx, SCMP_SYS(timer_create));
+ allowSyscall(ctx, SCMP_SYS(timer_delete));
+ allowSyscall(ctx, SCMP_SYS(timerfd));
+ allowSyscall(ctx, SCMP_SYS(timerfd_create));
+ allowSyscall(ctx, SCMP_SYS(timerfd_gettime));
+ allowSyscall(ctx, SCMP_SYS(timerfd_gettime64));
+ allowSyscall(ctx, SCMP_SYS(timerfd_settime));
+ allowSyscall(ctx, SCMP_SYS(timerfd_settime64));
+ allowSyscall(ctx, SCMP_SYS(timer_getoverrun));
+ allowSyscall(ctx, SCMP_SYS(timer_gettime));
+ allowSyscall(ctx, SCMP_SYS(timer_gettime64));
+ allowSyscall(ctx, SCMP_SYS(timer_settime));
+ allowSyscall(ctx, SCMP_SYS(timer_settime64));
+ allowSyscall(ctx, SCMP_SYS(times));
+ allowSyscall(ctx, SCMP_SYS(tkill));
+ allowSyscall(ctx, SCMP_SYS(truncate));
+ allowSyscall(ctx, SCMP_SYS(truncate64));
+ allowSyscall(ctx, SCMP_SYS(tuxcall));
+ allowSyscall(ctx, SCMP_SYS(ugetrlimit));
+ allowSyscall(ctx, SCMP_SYS(ulimit));
+ allowSyscall(ctx, SCMP_SYS(umask));
+ allowSyscall(ctx, SCMP_SYS(umount));
+ allowSyscall(ctx, SCMP_SYS(umount2));
+ allowSyscall(ctx, SCMP_SYS(uname));
+ allowSyscall(ctx, SCMP_SYS(unlink));
+ allowSyscall(ctx, SCMP_SYS(unlinkat));
+ allowSyscall(ctx, SCMP_SYS(unshare));
+ allowSyscall(ctx, SCMP_SYS(uselib));
+ allowSyscall(ctx, SCMP_SYS(userfaultfd));
+ allowSyscall(ctx, SCMP_SYS(usr26));
+ allowSyscall(ctx, SCMP_SYS(usr32));
+ allowSyscall(ctx, SCMP_SYS(ustat));
+ allowSyscall(ctx, SCMP_SYS(utime));
+ allowSyscall(ctx, SCMP_SYS(utimensat));
+ allowSyscall(ctx, SCMP_SYS(utimensat_time64));
+ allowSyscall(ctx, SCMP_SYS(utimes));
+ allowSyscall(ctx, SCMP_SYS(vfork));
+ allowSyscall(ctx, SCMP_SYS(vhangup));
+ allowSyscall(ctx, SCMP_SYS(vm86));
+ allowSyscall(ctx, SCMP_SYS(vm86old));
+ allowSyscall(ctx, SCMP_SYS(vmsplice));
+ allowSyscall(ctx, SCMP_SYS(vserver));
+ allowSyscall(ctx, SCMP_SYS(wait4));
+ allowSyscall(ctx, SCMP_SYS(waitid));
+ allowSyscall(ctx, SCMP_SYS(waitpid));
+ allowSyscall(ctx, SCMP_SYS(write));
+ allowSyscall(ctx, SCMP_SYS(writev));
+ // END extract-syscalls
+
+ // chmod family: prevent adding setuid/setgid bits to existing files.
+ // The Nix store does not support setuid/setgid, and even their temporary creation can weaken the security of the sandbox.
+ ALLOW_CHMOD_IF_SAFE(ctx, SCMP_SYS(chmod), 1);
+ ALLOW_CHMOD_IF_SAFE(ctx, SCMP_SYS(fchmod), 1);
+ ALLOW_CHMOD_IF_SAFE(ctx, SCMP_SYS(fchmodat), 2);
+ ALLOW_CHMOD_IF_SAFE(ctx, SCMP_SYS(fchmodat2), 2);
+
+ // setxattr family: prevent creation of extended attributes or ACLs.
+ // Not all filesystems support them, and they're incompatible with the NAR format.
+ if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(setxattr), 0) != 0 ||
+ seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(lsetxattr), 0) != 0 ||
+ seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(fsetxattr), 0) != 0)
+ throw SysError("unable to add seccomp rule");
+
+ Pipe filterPipe;
+ filterPipe.create();
+ auto filterBytes_ = std::async([&]() {
+ return drainFD(filterPipe.readSide.get());
+ });
+ if (seccomp_export_bpf(ctx, filterPipe.writeSide.get()) != 0)
+ throw SysError("unable to compile seccomp BPF program");
+ filterPipe.writeSide.close();
+ auto filterBytes = filterBytes_.get();
+
+ assert(filterBytes.size() % sizeof(struct sock_filter) == 0);
+ std::vector<struct sock_filter> filter(filterBytes.size() / sizeof(struct sock_filter));
+ std::memcpy(filter.data(), filterBytes.data(), filterBytes.size());
+ return filter;
+}
+
+static const std::vector<struct sock_filter> &getSyscallFilter()
+{
+ static auto filter = compileSyscallFilter();
+ return filter;
+}
+
+#endif
+
+void LinuxLocalDerivationGoal::setupSyscallFilter()
+{
+ // Set the NO_NEW_PRIVS prctl flag.
+ // This both makes loading seccomp filters work for unprivileged users,
+ // and is an additional security measure in its own right.
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1)
+ throw SysError("PR_SET_NO_NEW_PRIVS failed");
+#if HAVE_SECCOMP
+ const auto &seccompBPF = getSyscallFilter();
+ assert(seccompBPF.size() <= std::numeric_limits<unsigned short>::max());
+ struct sock_fprog fprog = {
+ .len = static_cast<unsigned short>(seccompBPF.size()),
+ // the kernel does not actually write to the filter
+ .filter = const_cast<struct sock_filter *>(seccompBPF.data()),
+ };
+ if (syscall(SYS_seccomp, SECCOMP_SET_MODE_FILTER, 0, &fprog) != 0)
+ throw SysError("unable to load seccomp BPF program");
+#endif
+}
+
void LinuxLocalDerivationGoal::prepareSandbox()
{
/* Create a temporary directory in which we set up the chroot
@@ -208,6 +834,13 @@ void LinuxLocalDerivationGoal::prepareSandbox()
Pid LinuxLocalDerivationGoal::startChild(std::function<void()> openSlave)
{
+#if HAVE_SECCOMP
+ // Our seccomp filter program is surprisingly expensive to compile (~10ms).
+ // For this reason, we precompile it once and then cache it.
+ // This has to be done in the parent so that all builds get to use the same cache.
+ getSyscallFilter();
+#endif
+
// If we're not sandboxing no need to faff about, use the fallback
if (!useChroot) {
return LocalDerivationGoal::startChild(openSlave);