aboutsummaryrefslogtreecommitdiff
path: root/src/libstore/build/local-derivation-goal.cc
diff options
context:
space:
mode:
authorAlois Wohlschlager <alois1@gmx-topmail.de>2024-05-26 10:35:03 +0200
committerAlois Wohlschlager <alois1@gmx-topmail.de>2024-07-25 18:24:40 +0200
commit127ee1a101e3f5ebab39ad98cbe58fefcd52eca5 (patch)
treebe6a52c93f39712e9f01d8077280b2b6149386eb /src/libstore/build/local-derivation-goal.cc
parent233408f677f8cd310d9760ff6c1bd9c14282cc57 (diff)
libstore/build: use an allowlist approach to syscall filtering
Previously, system call filtering (to prevent builders from storing files with setuid/setgid permission bits or extended attributes) was performed using a blocklist. While this looks simple at first, it actually carries significant security and maintainability risks: after all, the kernel may add new syscalls to achieve the same functionality one is trying to block, and it can even be hard to actually add the syscall to the blocklist when building against a C library that doesn't know about it yet. For a recent demonstration of this happening in practice to Nix, see the introduction of fchmodat2 [0] [1]. The allowlist approach does not share the same drawback. While it does require a rather large list of harmless syscalls to be maintained in the codebase, failing to update this list (and roll out the update to all users) in time has rather benign effects; at worst, very recent programs that already rely on new syscalls will fail with an error the same way they would on a slightly older kernel that doesn't support them yet. Most importantly, no unintended new ways of performing dangerous operations will be silently allowed. Another possible drawback is reduced system call performance due to the larger filter created by the allowlist requiring more computation [2]. However, this issue has not convincingly been demonstrated yet in practice, for example in systemd or various browsers. To the contrary, it has been measured that the the actual filter constructed here has approximately the same overhead as a very simple filter blocking only one system call. This commit tries to keep the behavior as close to unchanged as possible. The system call list is in line with libseccomp 2.5.5 and glibc 2.39, which are the latest versions at the point of writing. Since libseccomp 2.5.5 is already a requirement and the distributions shipping this together with older versions of glibc are mostly not a thing any more, this should not lead to more build failures any more. [0] https://github.com/NixOS/nixpkgs/issues/300635 [1] https://github.com/NixOS/nix/issues/10424 [2] https://github.com/flatpak/flatpak/pull/4462#issuecomment-1061690607 Change-Id: I541be3ea9b249bcceddfed6a5a13ac10b11e16ad
Diffstat (limited to 'src/libstore/build/local-derivation-goal.cc')
-rw-r--r--src/libstore/build/local-derivation-goal.cc549
1 files changed, 525 insertions, 24 deletions
diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc
index 8e1f4e849..33cbfb29d 100644
--- a/src/libstore/build/local-derivation-goal.cc
+++ b/src/libstore/build/local-derivation-goal.cc
@@ -45,7 +45,6 @@
#include <sys/prctl.h>
#include <sys/syscall.h>
#if HAVE_SECCOMP
-#include "linux/fchmodat2-compat.hh"
#include <seccomp.h>
#endif
#define pivot_root(new_root, put_old) (syscall(SYS_pivot_root, new_root, put_old))
@@ -1363,6 +1362,20 @@ void LocalDerivationGoal::chownToBuilder(const Path & path)
throw SysError("cannot change ownership of '%1%'", path);
}
+#if HAVE_SECCOMP
+
+static void allowSyscall(scmp_filter_ctx ctx, int syscall) {
+ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscall, 0) != 0)
+ throw SysError("unable to add seccomp rule");
+}
+
+#define ALLOW_CHMOD_IF_SAFE(ctx, syscall, modePos) \
+ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscall, 1, SCMP_A##modePos(SCMP_CMP_MASKED_EQ, S_ISUID | S_ISGID, 0)) != 0 || \
+ seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), syscall, 1, SCMP_A##modePos(SCMP_CMP_MASKED_EQ, S_ISUID, S_ISUID)) != 0 || \
+ seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), syscall, 1, SCMP_A##modePos(SCMP_CMP_MASKED_EQ, S_ISGID, S_ISGID)) != 0) \
+ throw SysError("unable to add seccomp rule");
+
+#endif
void setupSeccomp()
{
@@ -1370,7 +1383,9 @@ void setupSeccomp()
#if HAVE_SECCOMP
scmp_filter_ctx ctx;
- if (!(ctx = seccomp_init(SCMP_ACT_ALLOW)))
+ // Pretend that syscalls we don't yet know about don't exist.
+ // This is the best option for compatibility: after all, they did in fact not exist not too long ago.
+ if (!(ctx = seccomp_init(SCMP_ACT_ERRNO(ENOSYS))))
throw SysError("unable to initialize seccomp mode 2");
Finally cleanup([&]() {
@@ -1405,28 +1420,514 @@ void setupSeccomp()
seccomp_arch_add(ctx, SCMP_ARCH_MIPSEL64N32) != 0)
printError("unable to add mips64el-*abin32 seccomp architecture");
- /* Prevent builders from creating setuid/setgid binaries. */
- for (int perm : { S_ISUID, S_ISGID }) {
- if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(chmod), 1,
- SCMP_A1(SCMP_CMP_MASKED_EQ, (scmp_datum_t) perm, (scmp_datum_t) perm)) != 0)
- throw SysError("unable to add seccomp rule");
-
- if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(fchmod), 1,
- SCMP_A1(SCMP_CMP_MASKED_EQ, (scmp_datum_t) perm, (scmp_datum_t) perm)) != 0)
- throw SysError("unable to add seccomp rule");
-
- if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(fchmodat), 1,
- SCMP_A2(SCMP_CMP_MASKED_EQ, (scmp_datum_t) perm, (scmp_datum_t) perm)) != 0)
- throw SysError("unable to add seccomp rule");
-
- if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), NIX_SYSCALL_FCHMODAT2, 1,
- SCMP_A2(SCMP_CMP_MASKED_EQ, (scmp_datum_t) perm, (scmp_datum_t) perm)) != 0)
- throw SysError("unable to add seccomp rule");
- }
-
- /* Prevent builders from creating EAs or ACLs. Not all filesystems
- support these, and they're not allowed in the Nix store because
- they're not representable in the NAR serialisation. */
+ // This list is intended for machine consumption.
+ // Please keep its format, order and BEGIN/END markers.
+ //
+ // Currently, it is up to date with libseccomp 2.5.5 and glibc 2.39.
+ // Run check-syscalls to determine which new syscalls should be added.
+ // New syscalls must be audited and handled in a way that blocks the following dangerous operations:
+ // * Creation of non-empty setuid/setgid files
+ // * Creation of extended attributes (including ACLs)
+ //
+ // BEGIN extract-syscalls
+ allowSyscall(ctx, SCMP_SYS(accept));
+ allowSyscall(ctx, SCMP_SYS(accept4));
+ allowSyscall(ctx, SCMP_SYS(access));
+ allowSyscall(ctx, SCMP_SYS(acct));
+ allowSyscall(ctx, SCMP_SYS(add_key));
+ allowSyscall(ctx, SCMP_SYS(adjtimex));
+ allowSyscall(ctx, SCMP_SYS(afs_syscall));
+ allowSyscall(ctx, SCMP_SYS(alarm));
+ allowSyscall(ctx, SCMP_SYS(arch_prctl));
+ allowSyscall(ctx, SCMP_SYS(arm_fadvise64_64));
+ allowSyscall(ctx, SCMP_SYS(arm_sync_file_range));
+ allowSyscall(ctx, SCMP_SYS(bdflush));
+ allowSyscall(ctx, SCMP_SYS(bind));
+ allowSyscall(ctx, SCMP_SYS(bpf));
+ allowSyscall(ctx, SCMP_SYS(break));
+ allowSyscall(ctx, SCMP_SYS(breakpoint));
+ allowSyscall(ctx, SCMP_SYS(brk));
+ allowSyscall(ctx, SCMP_SYS(cachectl));
+ allowSyscall(ctx, SCMP_SYS(cacheflush));
+ allowSyscall(ctx, SCMP_SYS(cachestat));
+ allowSyscall(ctx, SCMP_SYS(capget));
+ allowSyscall(ctx, SCMP_SYS(capset));
+ allowSyscall(ctx, SCMP_SYS(chdir));
+ // skip chmod (dangerous)
+ allowSyscall(ctx, SCMP_SYS(chown));
+ allowSyscall(ctx, SCMP_SYS(chown32));
+ allowSyscall(ctx, SCMP_SYS(chroot));
+ allowSyscall(ctx, SCMP_SYS(clock_adjtime));
+ allowSyscall(ctx, SCMP_SYS(clock_adjtime64));
+ allowSyscall(ctx, SCMP_SYS(clock_getres));
+ allowSyscall(ctx, SCMP_SYS(clock_getres_time64));
+ allowSyscall(ctx, SCMP_SYS(clock_gettime));
+ allowSyscall(ctx, SCMP_SYS(clock_gettime64));
+ allowSyscall(ctx, SCMP_SYS(clock_nanosleep));
+ allowSyscall(ctx, SCMP_SYS(clock_nanosleep_time64));
+ allowSyscall(ctx, SCMP_SYS(clock_settime));
+ allowSyscall(ctx, SCMP_SYS(clock_settime64));
+ allowSyscall(ctx, SCMP_SYS(clone));
+ allowSyscall(ctx, SCMP_SYS(clone3));
+ allowSyscall(ctx, SCMP_SYS(close));
+ allowSyscall(ctx, SCMP_SYS(close_range));
+ allowSyscall(ctx, SCMP_SYS(connect));
+ allowSyscall(ctx, SCMP_SYS(copy_file_range));
+ allowSyscall(ctx, SCMP_SYS(creat));
+ allowSyscall(ctx, SCMP_SYS(create_module));
+ allowSyscall(ctx, SCMP_SYS(delete_module));
+ allowSyscall(ctx, SCMP_SYS(dup));
+ allowSyscall(ctx, SCMP_SYS(dup2));
+ allowSyscall(ctx, SCMP_SYS(dup3));
+ allowSyscall(ctx, SCMP_SYS(epoll_create));
+ allowSyscall(ctx, SCMP_SYS(epoll_create1));
+ allowSyscall(ctx, SCMP_SYS(epoll_ctl));
+ allowSyscall(ctx, SCMP_SYS(epoll_ctl_old));
+ allowSyscall(ctx, SCMP_SYS(epoll_pwait));
+ allowSyscall(ctx, SCMP_SYS(epoll_pwait2));
+ allowSyscall(ctx, SCMP_SYS(epoll_wait));
+ allowSyscall(ctx, SCMP_SYS(epoll_wait_old));
+ allowSyscall(ctx, SCMP_SYS(eventfd));
+ allowSyscall(ctx, SCMP_SYS(eventfd2));
+ allowSyscall(ctx, SCMP_SYS(execve));
+ allowSyscall(ctx, SCMP_SYS(execveat));
+ allowSyscall(ctx, SCMP_SYS(exit));
+ allowSyscall(ctx, SCMP_SYS(exit_group));
+ allowSyscall(ctx, SCMP_SYS(faccessat));
+ allowSyscall(ctx, SCMP_SYS(faccessat2));
+ allowSyscall(ctx, SCMP_SYS(fadvise64));
+ allowSyscall(ctx, SCMP_SYS(fadvise64_64));
+ allowSyscall(ctx, SCMP_SYS(fallocate));
+ allowSyscall(ctx, SCMP_SYS(fanotify_init));
+ allowSyscall(ctx, SCMP_SYS(fanotify_mark));
+ allowSyscall(ctx, SCMP_SYS(fchdir));
+ // skip fchmod (dangerous)
+ // skip fchmodat (dangerous)
+ // skip fchmodat2 (dangerous)
+ allowSyscall(ctx, SCMP_SYS(fchown));
+ allowSyscall(ctx, SCMP_SYS(fchown32));
+ allowSyscall(ctx, SCMP_SYS(fchownat));
+ allowSyscall(ctx, SCMP_SYS(fcntl));
+ allowSyscall(ctx, SCMP_SYS(fcntl64));
+ allowSyscall(ctx, SCMP_SYS(fdatasync));
+ allowSyscall(ctx, SCMP_SYS(fgetxattr));
+ allowSyscall(ctx, SCMP_SYS(finit_module));
+ allowSyscall(ctx, SCMP_SYS(flistxattr));
+ allowSyscall(ctx, SCMP_SYS(flock));
+ allowSyscall(ctx, SCMP_SYS(fork));
+ allowSyscall(ctx, SCMP_SYS(fremovexattr));
+ allowSyscall(ctx, SCMP_SYS(fsconfig));
+ // skip fsetxattr (dangerous)
+ allowSyscall(ctx, SCMP_SYS(fsmount));
+ allowSyscall(ctx, SCMP_SYS(fsopen));
+ allowSyscall(ctx, SCMP_SYS(fspick));
+ allowSyscall(ctx, SCMP_SYS(fstat));
+ allowSyscall(ctx, SCMP_SYS(fstat64));
+ allowSyscall(ctx, SCMP_SYS(fstatat64));
+ allowSyscall(ctx, SCMP_SYS(fstatfs));
+ allowSyscall(ctx, SCMP_SYS(fstatfs64));
+ allowSyscall(ctx, SCMP_SYS(fsync));
+ allowSyscall(ctx, SCMP_SYS(ftime));
+ allowSyscall(ctx, SCMP_SYS(ftruncate));
+ allowSyscall(ctx, SCMP_SYS(ftruncate64));
+ allowSyscall(ctx, SCMP_SYS(futex));
+ allowSyscall(ctx, SCMP_SYS(futex_requeue));
+ allowSyscall(ctx, SCMP_SYS(futex_time64));
+ allowSyscall(ctx, SCMP_SYS(futex_wait));
+ allowSyscall(ctx, SCMP_SYS(futex_waitv));
+ allowSyscall(ctx, SCMP_SYS(futex_wake));
+ allowSyscall(ctx, SCMP_SYS(futimesat));
+ allowSyscall(ctx, SCMP_SYS(getcpu));
+ allowSyscall(ctx, SCMP_SYS(getcwd));
+ allowSyscall(ctx, SCMP_SYS(getdents));
+ allowSyscall(ctx, SCMP_SYS(getdents64));
+ allowSyscall(ctx, SCMP_SYS(getegid));
+ allowSyscall(ctx, SCMP_SYS(getegid32));
+ allowSyscall(ctx, SCMP_SYS(geteuid));
+ allowSyscall(ctx, SCMP_SYS(geteuid32));
+ allowSyscall(ctx, SCMP_SYS(getgid));
+ allowSyscall(ctx, SCMP_SYS(getgid32));
+ allowSyscall(ctx, SCMP_SYS(getgroups));
+ allowSyscall(ctx, SCMP_SYS(getgroups32));
+ allowSyscall(ctx, SCMP_SYS(getitimer));
+ allowSyscall(ctx, SCMP_SYS(get_kernel_syms));
+ allowSyscall(ctx, SCMP_SYS(get_mempolicy));
+ allowSyscall(ctx, SCMP_SYS(getpeername));
+ allowSyscall(ctx, SCMP_SYS(getpgid));
+ allowSyscall(ctx, SCMP_SYS(getpgrp));
+ allowSyscall(ctx, SCMP_SYS(getpid));
+ allowSyscall(ctx, SCMP_SYS(getpmsg));
+ allowSyscall(ctx, SCMP_SYS(getppid));
+ allowSyscall(ctx, SCMP_SYS(getpriority));
+ allowSyscall(ctx, SCMP_SYS(getrandom));
+ allowSyscall(ctx, SCMP_SYS(getresgid));
+ allowSyscall(ctx, SCMP_SYS(getresgid32));
+ allowSyscall(ctx, SCMP_SYS(getresuid));
+ allowSyscall(ctx, SCMP_SYS(getresuid32));
+ allowSyscall(ctx, SCMP_SYS(getrlimit));
+ allowSyscall(ctx, SCMP_SYS(get_robust_list));
+ allowSyscall(ctx, SCMP_SYS(getrusage));
+ allowSyscall(ctx, SCMP_SYS(getsid));
+ allowSyscall(ctx, SCMP_SYS(getsockname));
+ allowSyscall(ctx, SCMP_SYS(getsockopt));
+ allowSyscall(ctx, SCMP_SYS(get_thread_area));
+ allowSyscall(ctx, SCMP_SYS(gettid));
+ allowSyscall(ctx, SCMP_SYS(gettimeofday));
+ allowSyscall(ctx, SCMP_SYS(get_tls));
+ allowSyscall(ctx, SCMP_SYS(getuid));
+ allowSyscall(ctx, SCMP_SYS(getuid32));
+ allowSyscall(ctx, SCMP_SYS(getxattr));
+ allowSyscall(ctx, SCMP_SYS(gtty));
+ allowSyscall(ctx, SCMP_SYS(idle));
+ allowSyscall(ctx, SCMP_SYS(init_module));
+ allowSyscall(ctx, SCMP_SYS(inotify_add_watch));
+ allowSyscall(ctx, SCMP_SYS(inotify_init));
+ allowSyscall(ctx, SCMP_SYS(inotify_init1));
+ allowSyscall(ctx, SCMP_SYS(inotify_rm_watch));
+ allowSyscall(ctx, SCMP_SYS(io_cancel));
+ allowSyscall(ctx, SCMP_SYS(ioctl));
+ allowSyscall(ctx, SCMP_SYS(io_destroy));
+ allowSyscall(ctx, SCMP_SYS(io_getevents));
+ allowSyscall(ctx, SCMP_SYS(ioperm));
+ allowSyscall(ctx, SCMP_SYS(io_pgetevents));
+ allowSyscall(ctx, SCMP_SYS(io_pgetevents_time64));
+ allowSyscall(ctx, SCMP_SYS(iopl));
+ allowSyscall(ctx, SCMP_SYS(ioprio_get));
+ allowSyscall(ctx, SCMP_SYS(ioprio_set));
+ allowSyscall(ctx, SCMP_SYS(io_setup));
+ allowSyscall(ctx, SCMP_SYS(io_submit));
+ allowSyscall(ctx, SCMP_SYS(io_uring_enter));
+ allowSyscall(ctx, SCMP_SYS(io_uring_register));
+ allowSyscall(ctx, SCMP_SYS(io_uring_setup));
+ allowSyscall(ctx, SCMP_SYS(ipc));
+ allowSyscall(ctx, SCMP_SYS(kcmp));
+ allowSyscall(ctx, SCMP_SYS(kexec_file_load));
+ allowSyscall(ctx, SCMP_SYS(kexec_load));
+ allowSyscall(ctx, SCMP_SYS(keyctl));
+ allowSyscall(ctx, SCMP_SYS(kill));
+ allowSyscall(ctx, SCMP_SYS(landlock_add_rule));
+ allowSyscall(ctx, SCMP_SYS(landlock_create_ruleset));
+ allowSyscall(ctx, SCMP_SYS(landlock_restrict_self));
+ allowSyscall(ctx, SCMP_SYS(lchown));
+ allowSyscall(ctx, SCMP_SYS(lchown32));
+ allowSyscall(ctx, SCMP_SYS(lgetxattr));
+ allowSyscall(ctx, SCMP_SYS(link));
+ allowSyscall(ctx, SCMP_SYS(linkat));
+ allowSyscall(ctx, SCMP_SYS(listen));
+ allowSyscall(ctx, SCMP_SYS(listxattr));
+ allowSyscall(ctx, SCMP_SYS(llistxattr));
+ allowSyscall(ctx, SCMP_SYS(_llseek));
+ allowSyscall(ctx, SCMP_SYS(lock));
+ allowSyscall(ctx, SCMP_SYS(lookup_dcookie));
+ allowSyscall(ctx, SCMP_SYS(lremovexattr));
+ allowSyscall(ctx, SCMP_SYS(lseek));
+ // skip lsetxattr (dangerous)
+ allowSyscall(ctx, SCMP_SYS(lstat));
+ allowSyscall(ctx, SCMP_SYS(lstat64));
+ allowSyscall(ctx, SCMP_SYS(madvise));
+ allowSyscall(ctx, SCMP_SYS(map_shadow_stack));
+ allowSyscall(ctx, SCMP_SYS(mbind));
+ allowSyscall(ctx, SCMP_SYS(membarrier));
+ allowSyscall(ctx, SCMP_SYS(memfd_create));
+ allowSyscall(ctx, SCMP_SYS(memfd_secret));
+ allowSyscall(ctx, SCMP_SYS(migrate_pages));
+ allowSyscall(ctx, SCMP_SYS(mincore));
+ allowSyscall(ctx, SCMP_SYS(mkdir));
+ allowSyscall(ctx, SCMP_SYS(mkdirat));
+ allowSyscall(ctx, SCMP_SYS(mknod));
+ allowSyscall(ctx, SCMP_SYS(mknodat));
+ allowSyscall(ctx, SCMP_SYS(mlock));
+ allowSyscall(ctx, SCMP_SYS(mlock2));
+ allowSyscall(ctx, SCMP_SYS(mlockall));
+ allowSyscall(ctx, SCMP_SYS(mmap));
+ allowSyscall(ctx, SCMP_SYS(mmap2));
+ allowSyscall(ctx, SCMP_SYS(modify_ldt));
+ allowSyscall(ctx, SCMP_SYS(mount));
+ allowSyscall(ctx, SCMP_SYS(mount_setattr));
+ allowSyscall(ctx, SCMP_SYS(move_mount));
+ allowSyscall(ctx, SCMP_SYS(move_pages));
+ allowSyscall(ctx, SCMP_SYS(mprotect));
+ allowSyscall(ctx, SCMP_SYS(mpx));
+ allowSyscall(ctx, SCMP_SYS(mq_getsetattr));
+ allowSyscall(ctx, SCMP_SYS(mq_notify));
+ allowSyscall(ctx, SCMP_SYS(mq_open));
+ allowSyscall(ctx, SCMP_SYS(mq_timedreceive));
+ allowSyscall(ctx, SCMP_SYS(mq_timedreceive_time64));
+ allowSyscall(ctx, SCMP_SYS(mq_timedsend));
+ allowSyscall(ctx, SCMP_SYS(mq_timedsend_time64));
+ allowSyscall(ctx, SCMP_SYS(mq_unlink));
+ allowSyscall(ctx, SCMP_SYS(mremap));
+ allowSyscall(ctx, SCMP_SYS(msgctl));
+ allowSyscall(ctx, SCMP_SYS(msgget));
+ allowSyscall(ctx, SCMP_SYS(msgrcv));
+ allowSyscall(ctx, SCMP_SYS(msgsnd));
+ allowSyscall(ctx, SCMP_SYS(msync));
+ allowSyscall(ctx, SCMP_SYS(multiplexer));
+ allowSyscall(ctx, SCMP_SYS(munlock));
+ allowSyscall(ctx, SCMP_SYS(munlockall));
+ allowSyscall(ctx, SCMP_SYS(munmap));
+ allowSyscall(ctx, SCMP_SYS(name_to_handle_at));
+ allowSyscall(ctx, SCMP_SYS(nanosleep));
+ allowSyscall(ctx, SCMP_SYS(newfstatat));
+ allowSyscall(ctx, SCMP_SYS(_newselect));
+ allowSyscall(ctx, SCMP_SYS(nfsservctl));
+ allowSyscall(ctx, SCMP_SYS(nice));
+ allowSyscall(ctx, SCMP_SYS(oldfstat));
+ allowSyscall(ctx, SCMP_SYS(oldlstat));
+ allowSyscall(ctx, SCMP_SYS(oldolduname));
+ allowSyscall(ctx, SCMP_SYS(oldstat));
+ allowSyscall(ctx, SCMP_SYS(olduname));
+ allowSyscall(ctx, SCMP_SYS(open));
+ allowSyscall(ctx, SCMP_SYS(openat));
+ allowSyscall(ctx, SCMP_SYS(openat2));
+ allowSyscall(ctx, SCMP_SYS(open_by_handle_at));
+ allowSyscall(ctx, SCMP_SYS(open_tree));
+ allowSyscall(ctx, SCMP_SYS(pause));
+ allowSyscall(ctx, SCMP_SYS(pciconfig_iobase));
+ allowSyscall(ctx, SCMP_SYS(pciconfig_read));
+ allowSyscall(ctx, SCMP_SYS(pciconfig_write));
+ allowSyscall(ctx, SCMP_SYS(perf_event_open));
+ allowSyscall(ctx, SCMP_SYS(personality));
+ allowSyscall(ctx, SCMP_SYS(pidfd_getfd));
+ allowSyscall(ctx, SCMP_SYS(pidfd_open));
+ allowSyscall(ctx, SCMP_SYS(pidfd_send_signal));
+ allowSyscall(ctx, SCMP_SYS(pipe));
+ allowSyscall(ctx, SCMP_SYS(pipe2));
+ allowSyscall(ctx, SCMP_SYS(pivot_root));
+ allowSyscall(ctx, SCMP_SYS(pkey_alloc));
+ allowSyscall(ctx, SCMP_SYS(pkey_free));
+ allowSyscall(ctx, SCMP_SYS(pkey_mprotect));
+ allowSyscall(ctx, SCMP_SYS(poll));
+ allowSyscall(ctx, SCMP_SYS(ppoll));
+ allowSyscall(ctx, SCMP_SYS(ppoll_time64));
+ allowSyscall(ctx, SCMP_SYS(prctl));
+ allowSyscall(ctx, SCMP_SYS(pread64));
+ allowSyscall(ctx, SCMP_SYS(preadv));
+ allowSyscall(ctx, SCMP_SYS(preadv2));
+ allowSyscall(ctx, SCMP_SYS(prlimit64));
+ allowSyscall(ctx, SCMP_SYS(process_madvise));
+ allowSyscall(ctx, SCMP_SYS(process_mrelease));
+ allowSyscall(ctx, SCMP_SYS(process_vm_readv));
+ allowSyscall(ctx, SCMP_SYS(process_vm_writev));
+ allowSyscall(ctx, SCMP_SYS(prof));
+ allowSyscall(ctx, SCMP_SYS(profil));
+ allowSyscall(ctx, SCMP_SYS(pselect6));
+ allowSyscall(ctx, SCMP_SYS(pselect6_time64));
+ allowSyscall(ctx, SCMP_SYS(ptrace));
+ allowSyscall(ctx, SCMP_SYS(putpmsg));
+ allowSyscall(ctx, SCMP_SYS(pwrite64));
+ allowSyscall(ctx, SCMP_SYS(pwritev));
+ allowSyscall(ctx, SCMP_SYS(pwritev2));
+ allowSyscall(ctx, SCMP_SYS(query_module));
+ allowSyscall(ctx, SCMP_SYS(quotactl));
+ allowSyscall(ctx, SCMP_SYS(quotactl_fd));
+ allowSyscall(ctx, SCMP_SYS(read));
+ allowSyscall(ctx, SCMP_SYS(readahead));
+ allowSyscall(ctx, SCMP_SYS(readdir));
+ allowSyscall(ctx, SCMP_SYS(readlink));
+ allowSyscall(ctx, SCMP_SYS(readlinkat));
+ allowSyscall(ctx, SCMP_SYS(readv));
+ allowSyscall(ctx, SCMP_SYS(reboot));
+ allowSyscall(ctx, SCMP_SYS(recv));
+ allowSyscall(ctx, SCMP_SYS(recvfrom));
+ allowSyscall(ctx, SCMP_SYS(recvmmsg));
+ allowSyscall(ctx, SCMP_SYS(recvmmsg_time64));
+ allowSyscall(ctx, SCMP_SYS(recvmsg));
+ allowSyscall(ctx, SCMP_SYS(remap_file_pages));
+ allowSyscall(ctx, SCMP_SYS(removexattr));
+ allowSyscall(ctx, SCMP_SYS(rename));
+ allowSyscall(ctx, SCMP_SYS(renameat));
+ allowSyscall(ctx, SCMP_SYS(renameat2));
+ allowSyscall(ctx, SCMP_SYS(request_key));
+ allowSyscall(ctx, SCMP_SYS(restart_syscall));
+ allowSyscall(ctx, SCMP_SYS(riscv_flush_icache));
+ allowSyscall(ctx, SCMP_SYS(rmdir));
+ allowSyscall(ctx, SCMP_SYS(rseq));
+ allowSyscall(ctx, SCMP_SYS(rtas));
+ allowSyscall(ctx, SCMP_SYS(rt_sigaction));
+ allowSyscall(ctx, SCMP_SYS(rt_sigpending));
+ allowSyscall(ctx, SCMP_SYS(rt_sigprocmask));
+ allowSyscall(ctx, SCMP_SYS(rt_sigqueueinfo));
+ allowSyscall(ctx, SCMP_SYS(rt_sigreturn));
+ allowSyscall(ctx, SCMP_SYS(rt_sigsuspend));
+ allowSyscall(ctx, SCMP_SYS(rt_sigtimedwait));
+ allowSyscall(ctx, SCMP_SYS(rt_sigtimedwait_time64));
+ allowSyscall(ctx, SCMP_SYS(rt_tgsigqueueinfo));
+ allowSyscall(ctx, SCMP_SYS(s390_guarded_storage));
+ allowSyscall(ctx, SCMP_SYS(s390_pci_mmio_read));
+ allowSyscall(ctx, SCMP_SYS(s390_pci_mmio_write));
+ allowSyscall(ctx, SCMP_SYS(s390_runtime_instr));
+ allowSyscall(ctx, SCMP_SYS(s390_sthyi));
+ allowSyscall(ctx, SCMP_SYS(sched_getaffinity));
+ allowSyscall(ctx, SCMP_SYS(sched_getattr));
+ allowSyscall(ctx, SCMP_SYS(sched_getparam));
+ allowSyscall(ctx, SCMP_SYS(sched_get_priority_max));
+ allowSyscall(ctx, SCMP_SYS(sched_get_priority_min));
+ allowSyscall(ctx, SCMP_SYS(sched_getscheduler));
+ allowSyscall(ctx, SCMP_SYS(sched_rr_get_interval));
+ allowSyscall(ctx, SCMP_SYS(sched_rr_get_interval_time64));
+ allowSyscall(ctx, SCMP_SYS(sched_setaffinity));
+ allowSyscall(ctx, SCMP_SYS(sched_setattr));
+ allowSyscall(ctx, SCMP_SYS(sched_setparam));
+ allowSyscall(ctx, SCMP_SYS(sched_setscheduler));
+ allowSyscall(ctx, SCMP_SYS(sched_yield));
+ allowSyscall(ctx, SCMP_SYS(seccomp));
+ allowSyscall(ctx, SCMP_SYS(security));
+ allowSyscall(ctx, SCMP_SYS(select));
+ allowSyscall(ctx, SCMP_SYS(semctl));
+ allowSyscall(ctx, SCMP_SYS(semget));
+ allowSyscall(ctx, SCMP_SYS(semop));
+ allowSyscall(ctx, SCMP_SYS(semtimedop));
+ allowSyscall(ctx, SCMP_SYS(semtimedop_time64));
+ allowSyscall(ctx, SCMP_SYS(send));
+ allowSyscall(ctx, SCMP_SYS(sendfile));
+ allowSyscall(ctx, SCMP_SYS(sendfile64));
+ allowSyscall(ctx, SCMP_SYS(sendmmsg));
+ allowSyscall(ctx, SCMP_SYS(sendmsg));
+ allowSyscall(ctx, SCMP_SYS(sendto));
+ allowSyscall(ctx, SCMP_SYS(setdomainname));
+ allowSyscall(ctx, SCMP_SYS(setfsgid));
+ allowSyscall(ctx, SCMP_SYS(setfsgid32));
+ allowSyscall(ctx, SCMP_SYS(setfsuid));
+ allowSyscall(ctx, SCMP_SYS(setfsuid32));
+ allowSyscall(ctx, SCMP_SYS(setgid));
+ allowSyscall(ctx, SCMP_SYS(setgid32));
+ allowSyscall(ctx, SCMP_SYS(setgroups));
+ allowSyscall(ctx, SCMP_SYS(setgroups32));
+ allowSyscall(ctx, SCMP_SYS(sethostname));
+ allowSyscall(ctx, SCMP_SYS(setitimer));
+ allowSyscall(ctx, SCMP_SYS(set_mempolicy));
+ allowSyscall(ctx, SCMP_SYS(set_mempolicy_home_node));
+ allowSyscall(ctx, SCMP_SYS(setns));
+ allowSyscall(ctx, SCMP_SYS(setpgid));
+ allowSyscall(ctx, SCMP_SYS(setpriority));
+ allowSyscall(ctx, SCMP_SYS(setregid));
+ allowSyscall(ctx, SCMP_SYS(setregid32));
+ allowSyscall(ctx, SCMP_SYS(setresgid));
+ allowSyscall(ctx, SCMP_SYS(setresgid32));
+ allowSyscall(ctx, SCMP_SYS(setresuid));
+ allowSyscall(ctx, SCMP_SYS(setresuid32));
+ allowSyscall(ctx, SCMP_SYS(setreuid));
+ allowSyscall(ctx, SCMP_SYS(setreuid32));
+ allowSyscall(ctx, SCMP_SYS(setrlimit));
+ allowSyscall(ctx, SCMP_SYS(set_robust_list));
+ allowSyscall(ctx, SCMP_SYS(setsid));
+ allowSyscall(ctx, SCMP_SYS(setsockopt));
+ allowSyscall(ctx, SCMP_SYS(set_thread_area));
+ allowSyscall(ctx, SCMP_SYS(set_tid_address));
+ allowSyscall(ctx, SCMP_SYS(settimeofday));
+ allowSyscall(ctx, SCMP_SYS(set_tls));
+ allowSyscall(ctx, SCMP_SYS(setuid));
+ allowSyscall(ctx, SCMP_SYS(setuid32));
+ // skip setxattr (dangerous)
+ allowSyscall(ctx, SCMP_SYS(sgetmask));
+ allowSyscall(ctx, SCMP_SYS(shmat));
+ allowSyscall(ctx, SCMP_SYS(shmctl));
+ allowSyscall(ctx, SCMP_SYS(shmdt));
+ allowSyscall(ctx, SCMP_SYS(shmget));
+ allowSyscall(ctx, SCMP_SYS(shutdown));
+ allowSyscall(ctx, SCMP_SYS(sigaction));
+ allowSyscall(ctx, SCMP_SYS(sigaltstack));
+ allowSyscall(ctx, SCMP_SYS(signal));
+ allowSyscall(ctx, SCMP_SYS(signalfd));
+ allowSyscall(ctx, SCMP_SYS(signalfd4));
+ allowSyscall(ctx, SCMP_SYS(sigpending));
+ allowSyscall(ctx, SCMP_SYS(sigprocmask));
+ allowSyscall(ctx, SCMP_SYS(sigreturn));
+ allowSyscall(ctx, SCMP_SYS(sigsuspend));
+ allowSyscall(ctx, SCMP_SYS(socket));
+ allowSyscall(ctx, SCMP_SYS(socketcall));
+ allowSyscall(ctx, SCMP_SYS(socketpair));
+ allowSyscall(ctx, SCMP_SYS(splice));
+ allowSyscall(ctx, SCMP_SYS(spu_create));
+ allowSyscall(ctx, SCMP_SYS(spu_run));
+ allowSyscall(ctx, SCMP_SYS(ssetmask));
+ allowSyscall(ctx, SCMP_SYS(stat));
+ allowSyscall(ctx, SCMP_SYS(stat64));
+ allowSyscall(ctx, SCMP_SYS(statfs));
+ allowSyscall(ctx, SCMP_SYS(statfs64));
+ allowSyscall(ctx, SCMP_SYS(statx));
+ allowSyscall(ctx, SCMP_SYS(stime));
+ allowSyscall(ctx, SCMP_SYS(stty));
+ allowSyscall(ctx, SCMP_SYS(subpage_prot));
+ allowSyscall(ctx, SCMP_SYS(swapcontext));
+ allowSyscall(ctx, SCMP_SYS(swapoff));
+ allowSyscall(ctx, SCMP_SYS(swapon));
+ allowSyscall(ctx, SCMP_SYS(switch_endian));
+ allowSyscall(ctx, SCMP_SYS(symlink));
+ allowSyscall(ctx, SCMP_SYS(symlinkat));
+ allowSyscall(ctx, SCMP_SYS(sync));
+ allowSyscall(ctx, SCMP_SYS(sync_file_range));
+ allowSyscall(ctx, SCMP_SYS(sync_file_range2));
+ allowSyscall(ctx, SCMP_SYS(syncfs));
+ allowSyscall(ctx, SCMP_SYS(syscall));
+ allowSyscall(ctx, SCMP_SYS(_sysctl));
+ allowSyscall(ctx, SCMP_SYS(sys_debug_setcontext));
+ allowSyscall(ctx, SCMP_SYS(sysfs));
+ allowSyscall(ctx, SCMP_SYS(sysinfo));
+ allowSyscall(ctx, SCMP_SYS(syslog));
+ allowSyscall(ctx, SCMP_SYS(sysmips));
+ allowSyscall(ctx, SCMP_SYS(tee));
+ allowSyscall(ctx, SCMP_SYS(tgkill));
+ allowSyscall(ctx, SCMP_SYS(time));
+ allowSyscall(ctx, SCMP_SYS(timer_create));
+ allowSyscall(ctx, SCMP_SYS(timer_delete));
+ allowSyscall(ctx, SCMP_SYS(timerfd));
+ allowSyscall(ctx, SCMP_SYS(timerfd_create));
+ allowSyscall(ctx, SCMP_SYS(timerfd_gettime));
+ allowSyscall(ctx, SCMP_SYS(timerfd_gettime64));
+ allowSyscall(ctx, SCMP_SYS(timerfd_settime));
+ allowSyscall(ctx, SCMP_SYS(timerfd_settime64));
+ allowSyscall(ctx, SCMP_SYS(timer_getoverrun));
+ allowSyscall(ctx, SCMP_SYS(timer_gettime));
+ allowSyscall(ctx, SCMP_SYS(timer_gettime64));
+ allowSyscall(ctx, SCMP_SYS(timer_settime));
+ allowSyscall(ctx, SCMP_SYS(timer_settime64));
+ allowSyscall(ctx, SCMP_SYS(times));
+ allowSyscall(ctx, SCMP_SYS(tkill));
+ allowSyscall(ctx, SCMP_SYS(truncate));
+ allowSyscall(ctx, SCMP_SYS(truncate64));
+ allowSyscall(ctx, SCMP_SYS(tuxcall));
+ allowSyscall(ctx, SCMP_SYS(ugetrlimit));
+ allowSyscall(ctx, SCMP_SYS(ulimit));
+ allowSyscall(ctx, SCMP_SYS(umask));
+ allowSyscall(ctx, SCMP_SYS(umount));
+ allowSyscall(ctx, SCMP_SYS(umount2));
+ allowSyscall(ctx, SCMP_SYS(uname));
+ allowSyscall(ctx, SCMP_SYS(unlink));
+ allowSyscall(ctx, SCMP_SYS(unlinkat));
+ allowSyscall(ctx, SCMP_SYS(unshare));
+ allowSyscall(ctx, SCMP_SYS(uselib));
+ allowSyscall(ctx, SCMP_SYS(userfaultfd));
+ allowSyscall(ctx, SCMP_SYS(usr26));
+ allowSyscall(ctx, SCMP_SYS(usr32));
+ allowSyscall(ctx, SCMP_SYS(ustat));
+ allowSyscall(ctx, SCMP_SYS(utime));
+ allowSyscall(ctx, SCMP_SYS(utimensat));
+ allowSyscall(ctx, SCMP_SYS(utimensat_time64));
+ allowSyscall(ctx, SCMP_SYS(utimes));
+ allowSyscall(ctx, SCMP_SYS(vfork));
+ allowSyscall(ctx, SCMP_SYS(vhangup));
+ allowSyscall(ctx, SCMP_SYS(vm86));
+ allowSyscall(ctx, SCMP_SYS(vm86old));
+ allowSyscall(ctx, SCMP_SYS(vmsplice));
+ allowSyscall(ctx, SCMP_SYS(vserver));
+ allowSyscall(ctx, SCMP_SYS(wait4));
+ allowSyscall(ctx, SCMP_SYS(waitid));
+ allowSyscall(ctx, SCMP_SYS(waitpid));
+ allowSyscall(ctx, SCMP_SYS(write));
+ allowSyscall(ctx, SCMP_SYS(writev));
+ // END extract-syscalls
+
+ // chmod family: prevent adding setuid/setgid bits to existing files.
+ // The Nix store does not support setuid/setgid, and even their temporary creation can weaken the security of the sandbox.
+ ALLOW_CHMOD_IF_SAFE(ctx, SCMP_SYS(chmod), 1);
+ ALLOW_CHMOD_IF_SAFE(ctx, SCMP_SYS(fchmod), 1);
+ ALLOW_CHMOD_IF_SAFE(ctx, SCMP_SYS(fchmodat), 2);
+ ALLOW_CHMOD_IF_SAFE(ctx, SCMP_SYS(fchmodat2), 2);
+
+ // setxattr family: prevent creation of extended attributes or ACLs.
+ // Not all filesystems support them, and they're incompatible with the NAR format.
if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(setxattr), 0) != 0 ||
seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(lsetxattr), 0) != 0 ||
seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(fsetxattr), 0) != 0)