aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--flake.nix1
-rw-r--r--maintainers/check-syscalls.nix16
-rwxr-xr-xmaintainers/check-syscalls.sh7
-rw-r--r--package.nix2
-rw-r--r--src/libstore/build/local-derivation-goal.cc549
-rw-r--r--src/libstore/linux/fchmodat2-compat.hh35
-rw-r--r--tests/nixos/setuid/fchmodat2-suid.c5
7 files changed, 552 insertions, 63 deletions
diff --git a/flake.nix b/flake.nix
index 0609a8681..1fc02a590 100644
--- a/flake.nix
+++ b/flake.nix
@@ -164,6 +164,7 @@
nixUnstable = prev.nixUnstable;
check-headers = final.buildPackages.callPackage ./maintainers/check-headers.nix { };
+ check-syscalls = final.buildPackages.callPackage ./maintainers/check-syscalls.nix { };
default-busybox-sandbox-shell = final.busybox.override {
useMusl = true;
diff --git a/maintainers/check-syscalls.nix b/maintainers/check-syscalls.nix
new file mode 100644
index 000000000..1a3de5c6d
--- /dev/null
+++ b/maintainers/check-syscalls.nix
@@ -0,0 +1,16 @@
+{
+ runCommandNoCC,
+ lib,
+ libseccomp,
+ writeShellScriptBin,
+}:
+let
+ syscalls-csv = runCommandNoCC "syscalls.csv" { } ''
+ echo ${lib.escapeShellArg libseccomp.src}
+ tar -xf ${lib.escapeShellArg libseccomp.src} --strip-components=2 ${libseccomp.name}/src/syscalls.csv
+ mv syscalls.csv "$out"
+ '';
+in
+writeShellScriptBin "check-syscalls" ''
+ ${./check-syscalls.sh} ${syscalls-csv}
+''
diff --git a/maintainers/check-syscalls.sh b/maintainers/check-syscalls.sh
new file mode 100755
index 000000000..cd72ac23b
--- /dev/null
+++ b/maintainers/check-syscalls.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+set -e
+
+diff -u <(awk < src/libstore/build/local-derivation-goal.cc '/BEGIN extract-syscalls/ { extracting = 1; next }
+match($0, /allowSyscall\(ctx, SCMP_SYS\(([^)]*)\)\);|\/\/ skip ([^ ]*)/, result) { print result[1] result[2] }
+/END extract-syscalls/ { extracting = 0; next }') <(tail -n+2 "$1" | cut -d, -f 1)
diff --git a/package.nix b/package.nix
index 16477a6a2..c6b2e4451 100644
--- a/package.nix
+++ b/package.nix
@@ -403,6 +403,7 @@ stdenv.mkDerivation (finalAttrs: {
# Lix specific packages
pre-commit-checks,
contribNotice,
+ check-syscalls,
}:
let
glibcFix = lib.optionalAttrs (buildPlatform.isLinux && glibcLocales != null) {
@@ -453,6 +454,7 @@ stdenv.mkDerivation (finalAttrs: {
# `bash` from inside `nix develop`, say, because you are using it
# via direnv, you will by default get bash (unusable edition).
bashInteractive
+ check-syscalls
pythonEnv
# docker image tool
skopeo
diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc
index 8e1f4e849..33cbfb29d 100644
--- a/src/libstore/build/local-derivation-goal.cc
+++ b/src/libstore/build/local-derivation-goal.cc
@@ -45,7 +45,6 @@
#include <sys/prctl.h>
#include <sys/syscall.h>
#if HAVE_SECCOMP
-#include "linux/fchmodat2-compat.hh"
#include <seccomp.h>
#endif
#define pivot_root(new_root, put_old) (syscall(SYS_pivot_root, new_root, put_old))
@@ -1363,6 +1362,20 @@ void LocalDerivationGoal::chownToBuilder(const Path & path)
throw SysError("cannot change ownership of '%1%'", path);
}
+#if HAVE_SECCOMP
+
+static void allowSyscall(scmp_filter_ctx ctx, int syscall) {
+ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscall, 0) != 0)
+ throw SysError("unable to add seccomp rule");
+}
+
+#define ALLOW_CHMOD_IF_SAFE(ctx, syscall, modePos) \
+ if (seccomp_rule_add(ctx, SCMP_ACT_ALLOW, syscall, 1, SCMP_A##modePos(SCMP_CMP_MASKED_EQ, S_ISUID | S_ISGID, 0)) != 0 || \
+ seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), syscall, 1, SCMP_A##modePos(SCMP_CMP_MASKED_EQ, S_ISUID, S_ISUID)) != 0 || \
+ seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), syscall, 1, SCMP_A##modePos(SCMP_CMP_MASKED_EQ, S_ISGID, S_ISGID)) != 0) \
+ throw SysError("unable to add seccomp rule");
+
+#endif
void setupSeccomp()
{
@@ -1370,7 +1383,9 @@ void setupSeccomp()
#if HAVE_SECCOMP
scmp_filter_ctx ctx;
- if (!(ctx = seccomp_init(SCMP_ACT_ALLOW)))
+ // Pretend that syscalls we don't yet know about don't exist.
+ // This is the best option for compatibility: after all, they did in fact not exist not too long ago.
+ if (!(ctx = seccomp_init(SCMP_ACT_ERRNO(ENOSYS))))
throw SysError("unable to initialize seccomp mode 2");
Finally cleanup([&]() {
@@ -1405,28 +1420,514 @@ void setupSeccomp()
seccomp_arch_add(ctx, SCMP_ARCH_MIPSEL64N32) != 0)
printError("unable to add mips64el-*abin32 seccomp architecture");
- /* Prevent builders from creating setuid/setgid binaries. */
- for (int perm : { S_ISUID, S_ISGID }) {
- if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(chmod), 1,
- SCMP_A1(SCMP_CMP_MASKED_EQ, (scmp_datum_t) perm, (scmp_datum_t) perm)) != 0)
- throw SysError("unable to add seccomp rule");
-
- if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(fchmod), 1,
- SCMP_A1(SCMP_CMP_MASKED_EQ, (scmp_datum_t) perm, (scmp_datum_t) perm)) != 0)
- throw SysError("unable to add seccomp rule");
-
- if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), SCMP_SYS(fchmodat), 1,
- SCMP_A2(SCMP_CMP_MASKED_EQ, (scmp_datum_t) perm, (scmp_datum_t) perm)) != 0)
- throw SysError("unable to add seccomp rule");
-
- if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(EPERM), NIX_SYSCALL_FCHMODAT2, 1,
- SCMP_A2(SCMP_CMP_MASKED_EQ, (scmp_datum_t) perm, (scmp_datum_t) perm)) != 0)
- throw SysError("unable to add seccomp rule");
- }
-
- /* Prevent builders from creating EAs or ACLs. Not all filesystems
- support these, and they're not allowed in the Nix store because
- they're not representable in the NAR serialisation. */
+ // This list is intended for machine consumption.
+ // Please keep its format, order and BEGIN/END markers.
+ //
+ // Currently, it is up to date with libseccomp 2.5.5 and glibc 2.39.
+ // Run check-syscalls to determine which new syscalls should be added.
+ // New syscalls must be audited and handled in a way that blocks the following dangerous operations:
+ // * Creation of non-empty setuid/setgid files
+ // * Creation of extended attributes (including ACLs)
+ //
+ // BEGIN extract-syscalls
+ allowSyscall(ctx, SCMP_SYS(accept));
+ allowSyscall(ctx, SCMP_SYS(accept4));
+ allowSyscall(ctx, SCMP_SYS(access));
+ allowSyscall(ctx, SCMP_SYS(acct));
+ allowSyscall(ctx, SCMP_SYS(add_key));
+ allowSyscall(ctx, SCMP_SYS(adjtimex));
+ allowSyscall(ctx, SCMP_SYS(afs_syscall));
+ allowSyscall(ctx, SCMP_SYS(alarm));
+ allowSyscall(ctx, SCMP_SYS(arch_prctl));
+ allowSyscall(ctx, SCMP_SYS(arm_fadvise64_64));
+ allowSyscall(ctx, SCMP_SYS(arm_sync_file_range));
+ allowSyscall(ctx, SCMP_SYS(bdflush));
+ allowSyscall(ctx, SCMP_SYS(bind));
+ allowSyscall(ctx, SCMP_SYS(bpf));
+ allowSyscall(ctx, SCMP_SYS(break));
+ allowSyscall(ctx, SCMP_SYS(breakpoint));
+ allowSyscall(ctx, SCMP_SYS(brk));
+ allowSyscall(ctx, SCMP_SYS(cachectl));
+ allowSyscall(ctx, SCMP_SYS(cacheflush));
+ allowSyscall(ctx, SCMP_SYS(cachestat));
+ allowSyscall(ctx, SCMP_SYS(capget));
+ allowSyscall(ctx, SCMP_SYS(capset));
+ allowSyscall(ctx, SCMP_SYS(chdir));
+ // skip chmod (dangerous)
+ allowSyscall(ctx, SCMP_SYS(chown));
+ allowSyscall(ctx, SCMP_SYS(chown32));
+ allowSyscall(ctx, SCMP_SYS(chroot));
+ allowSyscall(ctx, SCMP_SYS(clock_adjtime));
+ allowSyscall(ctx, SCMP_SYS(clock_adjtime64));
+ allowSyscall(ctx, SCMP_SYS(clock_getres));
+ allowSyscall(ctx, SCMP_SYS(clock_getres_time64));
+ allowSyscall(ctx, SCMP_SYS(clock_gettime));
+ allowSyscall(ctx, SCMP_SYS(clock_gettime64));
+ allowSyscall(ctx, SCMP_SYS(clock_nanosleep));
+ allowSyscall(ctx, SCMP_SYS(clock_nanosleep_time64));
+ allowSyscall(ctx, SCMP_SYS(clock_settime));
+ allowSyscall(ctx, SCMP_SYS(clock_settime64));
+ allowSyscall(ctx, SCMP_SYS(clone));
+ allowSyscall(ctx, SCMP_SYS(clone3));
+ allowSyscall(ctx, SCMP_SYS(close));
+ allowSyscall(ctx, SCMP_SYS(close_range));
+ allowSyscall(ctx, SCMP_SYS(connect));
+ allowSyscall(ctx, SCMP_SYS(copy_file_range));
+ allowSyscall(ctx, SCMP_SYS(creat));
+ allowSyscall(ctx, SCMP_SYS(create_module));
+ allowSyscall(ctx, SCMP_SYS(delete_module));
+ allowSyscall(ctx, SCMP_SYS(dup));
+ allowSyscall(ctx, SCMP_SYS(dup2));
+ allowSyscall(ctx, SCMP_SYS(dup3));
+ allowSyscall(ctx, SCMP_SYS(epoll_create));
+ allowSyscall(ctx, SCMP_SYS(epoll_create1));
+ allowSyscall(ctx, SCMP_SYS(epoll_ctl));
+ allowSyscall(ctx, SCMP_SYS(epoll_ctl_old));
+ allowSyscall(ctx, SCMP_SYS(epoll_pwait));
+ allowSyscall(ctx, SCMP_SYS(epoll_pwait2));
+ allowSyscall(ctx, SCMP_SYS(epoll_wait));
+ allowSyscall(ctx, SCMP_SYS(epoll_wait_old));
+ allowSyscall(ctx, SCMP_SYS(eventfd));
+ allowSyscall(ctx, SCMP_SYS(eventfd2));
+ allowSyscall(ctx, SCMP_SYS(execve));
+ allowSyscall(ctx, SCMP_SYS(execveat));
+ allowSyscall(ctx, SCMP_SYS(exit));
+ allowSyscall(ctx, SCMP_SYS(exit_group));
+ allowSyscall(ctx, SCMP_SYS(faccessat));
+ allowSyscall(ctx, SCMP_SYS(faccessat2));
+ allowSyscall(ctx, SCMP_SYS(fadvise64));
+ allowSyscall(ctx, SCMP_SYS(fadvise64_64));
+ allowSyscall(ctx, SCMP_SYS(fallocate));
+ allowSyscall(ctx, SCMP_SYS(fanotify_init));
+ allowSyscall(ctx, SCMP_SYS(fanotify_mark));
+ allowSyscall(ctx, SCMP_SYS(fchdir));
+ // skip fchmod (dangerous)
+ // skip fchmodat (dangerous)
+ // skip fchmodat2 (dangerous)
+ allowSyscall(ctx, SCMP_SYS(fchown));
+ allowSyscall(ctx, SCMP_SYS(fchown32));
+ allowSyscall(ctx, SCMP_SYS(fchownat));
+ allowSyscall(ctx, SCMP_SYS(fcntl));
+ allowSyscall(ctx, SCMP_SYS(fcntl64));
+ allowSyscall(ctx, SCMP_SYS(fdatasync));
+ allowSyscall(ctx, SCMP_SYS(fgetxattr));
+ allowSyscall(ctx, SCMP_SYS(finit_module));
+ allowSyscall(ctx, SCMP_SYS(flistxattr));
+ allowSyscall(ctx, SCMP_SYS(flock));
+ allowSyscall(ctx, SCMP_SYS(fork));
+ allowSyscall(ctx, SCMP_SYS(fremovexattr));
+ allowSyscall(ctx, SCMP_SYS(fsconfig));
+ // skip fsetxattr (dangerous)
+ allowSyscall(ctx, SCMP_SYS(fsmount));
+ allowSyscall(ctx, SCMP_SYS(fsopen));
+ allowSyscall(ctx, SCMP_SYS(fspick));
+ allowSyscall(ctx, SCMP_SYS(fstat));
+ allowSyscall(ctx, SCMP_SYS(fstat64));
+ allowSyscall(ctx, SCMP_SYS(fstatat64));
+ allowSyscall(ctx, SCMP_SYS(fstatfs));
+ allowSyscall(ctx, SCMP_SYS(fstatfs64));
+ allowSyscall(ctx, SCMP_SYS(fsync));
+ allowSyscall(ctx, SCMP_SYS(ftime));
+ allowSyscall(ctx, SCMP_SYS(ftruncate));
+ allowSyscall(ctx, SCMP_SYS(ftruncate64));
+ allowSyscall(ctx, SCMP_SYS(futex));
+ allowSyscall(ctx, SCMP_SYS(futex_requeue));
+ allowSyscall(ctx, SCMP_SYS(futex_time64));
+ allowSyscall(ctx, SCMP_SYS(futex_wait));
+ allowSyscall(ctx, SCMP_SYS(futex_waitv));
+ allowSyscall(ctx, SCMP_SYS(futex_wake));
+ allowSyscall(ctx, SCMP_SYS(futimesat));
+ allowSyscall(ctx, SCMP_SYS(getcpu));
+ allowSyscall(ctx, SCMP_SYS(getcwd));
+ allowSyscall(ctx, SCMP_SYS(getdents));
+ allowSyscall(ctx, SCMP_SYS(getdents64));
+ allowSyscall(ctx, SCMP_SYS(getegid));
+ allowSyscall(ctx, SCMP_SYS(getegid32));
+ allowSyscall(ctx, SCMP_SYS(geteuid));
+ allowSyscall(ctx, SCMP_SYS(geteuid32));
+ allowSyscall(ctx, SCMP_SYS(getgid));
+ allowSyscall(ctx, SCMP_SYS(getgid32));
+ allowSyscall(ctx, SCMP_SYS(getgroups));
+ allowSyscall(ctx, SCMP_SYS(getgroups32));
+ allowSyscall(ctx, SCMP_SYS(getitimer));
+ allowSyscall(ctx, SCMP_SYS(get_kernel_syms));
+ allowSyscall(ctx, SCMP_SYS(get_mempolicy));
+ allowSyscall(ctx, SCMP_SYS(getpeername));
+ allowSyscall(ctx, SCMP_SYS(getpgid));
+ allowSyscall(ctx, SCMP_SYS(getpgrp));
+ allowSyscall(ctx, SCMP_SYS(getpid));
+ allowSyscall(ctx, SCMP_SYS(getpmsg));
+ allowSyscall(ctx, SCMP_SYS(getppid));
+ allowSyscall(ctx, SCMP_SYS(getpriority));
+ allowSyscall(ctx, SCMP_SYS(getrandom));
+ allowSyscall(ctx, SCMP_SYS(getresgid));
+ allowSyscall(ctx, SCMP_SYS(getresgid32));
+ allowSyscall(ctx, SCMP_SYS(getresuid));
+ allowSyscall(ctx, SCMP_SYS(getresuid32));
+ allowSyscall(ctx, SCMP_SYS(getrlimit));
+ allowSyscall(ctx, SCMP_SYS(get_robust_list));
+ allowSyscall(ctx, SCMP_SYS(getrusage));
+ allowSyscall(ctx, SCMP_SYS(getsid));
+ allowSyscall(ctx, SCMP_SYS(getsockname));
+ allowSyscall(ctx, SCMP_SYS(getsockopt));
+ allowSyscall(ctx, SCMP_SYS(get_thread_area));
+ allowSyscall(ctx, SCMP_SYS(gettid));
+ allowSyscall(ctx, SCMP_SYS(gettimeofday));
+ allowSyscall(ctx, SCMP_SYS(get_tls));
+ allowSyscall(ctx, SCMP_SYS(getuid));
+ allowSyscall(ctx, SCMP_SYS(getuid32));
+ allowSyscall(ctx, SCMP_SYS(getxattr));
+ allowSyscall(ctx, SCMP_SYS(gtty));
+ allowSyscall(ctx, SCMP_SYS(idle));
+ allowSyscall(ctx, SCMP_SYS(init_module));
+ allowSyscall(ctx, SCMP_SYS(inotify_add_watch));
+ allowSyscall(ctx, SCMP_SYS(inotify_init));
+ allowSyscall(ctx, SCMP_SYS(inotify_init1));
+ allowSyscall(ctx, SCMP_SYS(inotify_rm_watch));
+ allowSyscall(ctx, SCMP_SYS(io_cancel));
+ allowSyscall(ctx, SCMP_SYS(ioctl));
+ allowSyscall(ctx, SCMP_SYS(io_destroy));
+ allowSyscall(ctx, SCMP_SYS(io_getevents));
+ allowSyscall(ctx, SCMP_SYS(ioperm));
+ allowSyscall(ctx, SCMP_SYS(io_pgetevents));
+ allowSyscall(ctx, SCMP_SYS(io_pgetevents_time64));
+ allowSyscall(ctx, SCMP_SYS(iopl));
+ allowSyscall(ctx, SCMP_SYS(ioprio_get));
+ allowSyscall(ctx, SCMP_SYS(ioprio_set));
+ allowSyscall(ctx, SCMP_SYS(io_setup));
+ allowSyscall(ctx, SCMP_SYS(io_submit));
+ allowSyscall(ctx, SCMP_SYS(io_uring_enter));
+ allowSyscall(ctx, SCMP_SYS(io_uring_register));
+ allowSyscall(ctx, SCMP_SYS(io_uring_setup));
+ allowSyscall(ctx, SCMP_SYS(ipc));
+ allowSyscall(ctx, SCMP_SYS(kcmp));
+ allowSyscall(ctx, SCMP_SYS(kexec_file_load));
+ allowSyscall(ctx, SCMP_SYS(kexec_load));
+ allowSyscall(ctx, SCMP_SYS(keyctl));
+ allowSyscall(ctx, SCMP_SYS(kill));
+ allowSyscall(ctx, SCMP_SYS(landlock_add_rule));
+ allowSyscall(ctx, SCMP_SYS(landlock_create_ruleset));
+ allowSyscall(ctx, SCMP_SYS(landlock_restrict_self));
+ allowSyscall(ctx, SCMP_SYS(lchown));
+ allowSyscall(ctx, SCMP_SYS(lchown32));
+ allowSyscall(ctx, SCMP_SYS(lgetxattr));
+ allowSyscall(ctx, SCMP_SYS(link));
+ allowSyscall(ctx, SCMP_SYS(linkat));
+ allowSyscall(ctx, SCMP_SYS(listen));
+ allowSyscall(ctx, SCMP_SYS(listxattr));
+ allowSyscall(ctx, SCMP_SYS(llistxattr));
+ allowSyscall(ctx, SCMP_SYS(_llseek));
+ allowSyscall(ctx, SCMP_SYS(lock));
+ allowSyscall(ctx, SCMP_SYS(lookup_dcookie));
+ allowSyscall(ctx, SCMP_SYS(lremovexattr));
+ allowSyscall(ctx, SCMP_SYS(lseek));
+ // skip lsetxattr (dangerous)
+ allowSyscall(ctx, SCMP_SYS(lstat));
+ allowSyscall(ctx, SCMP_SYS(lstat64));
+ allowSyscall(ctx, SCMP_SYS(madvise));
+ allowSyscall(ctx, SCMP_SYS(map_shadow_stack));
+ allowSyscall(ctx, SCMP_SYS(mbind));
+ allowSyscall(ctx, SCMP_SYS(membarrier));
+ allowSyscall(ctx, SCMP_SYS(memfd_create));
+ allowSyscall(ctx, SCMP_SYS(memfd_secret));
+ allowSyscall(ctx, SCMP_SYS(migrate_pages));
+ allowSyscall(ctx, SCMP_SYS(mincore));
+ allowSyscall(ctx, SCMP_SYS(mkdir));
+ allowSyscall(ctx, SCMP_SYS(mkdirat));
+ allowSyscall(ctx, SCMP_SYS(mknod));
+ allowSyscall(ctx, SCMP_SYS(mknodat));
+ allowSyscall(ctx, SCMP_SYS(mlock));
+ allowSyscall(ctx, SCMP_SYS(mlock2));
+ allowSyscall(ctx, SCMP_SYS(mlockall));
+ allowSyscall(ctx, SCMP_SYS(mmap));
+ allowSyscall(ctx, SCMP_SYS(mmap2));
+ allowSyscall(ctx, SCMP_SYS(modify_ldt));
+ allowSyscall(ctx, SCMP_SYS(mount));
+ allowSyscall(ctx, SCMP_SYS(mount_setattr));
+ allowSyscall(ctx, SCMP_SYS(move_mount));
+ allowSyscall(ctx, SCMP_SYS(move_pages));
+ allowSyscall(ctx, SCMP_SYS(mprotect));
+ allowSyscall(ctx, SCMP_SYS(mpx));
+ allowSyscall(ctx, SCMP_SYS(mq_getsetattr));
+ allowSyscall(ctx, SCMP_SYS(mq_notify));
+ allowSyscall(ctx, SCMP_SYS(mq_open));
+ allowSyscall(ctx, SCMP_SYS(mq_timedreceive));
+ allowSyscall(ctx, SCMP_SYS(mq_timedreceive_time64));
+ allowSyscall(ctx, SCMP_SYS(mq_timedsend));
+ allowSyscall(ctx, SCMP_SYS(mq_timedsend_time64));
+ allowSyscall(ctx, SCMP_SYS(mq_unlink));
+ allowSyscall(ctx, SCMP_SYS(mremap));
+ allowSyscall(ctx, SCMP_SYS(msgctl));
+ allowSyscall(ctx, SCMP_SYS(msgget));
+ allowSyscall(ctx, SCMP_SYS(msgrcv));
+ allowSyscall(ctx, SCMP_SYS(msgsnd));
+ allowSyscall(ctx, SCMP_SYS(msync));
+ allowSyscall(ctx, SCMP_SYS(multiplexer));
+ allowSyscall(ctx, SCMP_SYS(munlock));
+ allowSyscall(ctx, SCMP_SYS(munlockall));
+ allowSyscall(ctx, SCMP_SYS(munmap));
+ allowSyscall(ctx, SCMP_SYS(name_to_handle_at));
+ allowSyscall(ctx, SCMP_SYS(nanosleep));
+ allowSyscall(ctx, SCMP_SYS(newfstatat));
+ allowSyscall(ctx, SCMP_SYS(_newselect));
+ allowSyscall(ctx, SCMP_SYS(nfsservctl));
+ allowSyscall(ctx, SCMP_SYS(nice));
+ allowSyscall(ctx, SCMP_SYS(oldfstat));
+ allowSyscall(ctx, SCMP_SYS(oldlstat));
+ allowSyscall(ctx, SCMP_SYS(oldolduname));
+ allowSyscall(ctx, SCMP_SYS(oldstat));
+ allowSyscall(ctx, SCMP_SYS(olduname));
+ allowSyscall(ctx, SCMP_SYS(open));
+ allowSyscall(ctx, SCMP_SYS(openat));
+ allowSyscall(ctx, SCMP_SYS(openat2));
+ allowSyscall(ctx, SCMP_SYS(open_by_handle_at));
+ allowSyscall(ctx, SCMP_SYS(open_tree));
+ allowSyscall(ctx, SCMP_SYS(pause));
+ allowSyscall(ctx, SCMP_SYS(pciconfig_iobase));
+ allowSyscall(ctx, SCMP_SYS(pciconfig_read));
+ allowSyscall(ctx, SCMP_SYS(pciconfig_write));
+ allowSyscall(ctx, SCMP_SYS(perf_event_open));
+ allowSyscall(ctx, SCMP_SYS(personality));
+ allowSyscall(ctx, SCMP_SYS(pidfd_getfd));
+ allowSyscall(ctx, SCMP_SYS(pidfd_open));
+ allowSyscall(ctx, SCMP_SYS(pidfd_send_signal));
+ allowSyscall(ctx, SCMP_SYS(pipe));
+ allowSyscall(ctx, SCMP_SYS(pipe2));
+ allowSyscall(ctx, SCMP_SYS(pivot_root));
+ allowSyscall(ctx, SCMP_SYS(pkey_alloc));
+ allowSyscall(ctx, SCMP_SYS(pkey_free));
+ allowSyscall(ctx, SCMP_SYS(pkey_mprotect));
+ allowSyscall(ctx, SCMP_SYS(poll));
+ allowSyscall(ctx, SCMP_SYS(ppoll));
+ allowSyscall(ctx, SCMP_SYS(ppoll_time64));
+ allowSyscall(ctx, SCMP_SYS(prctl));
+ allowSyscall(ctx, SCMP_SYS(pread64));
+ allowSyscall(ctx, SCMP_SYS(preadv));
+ allowSyscall(ctx, SCMP_SYS(preadv2));
+ allowSyscall(ctx, SCMP_SYS(prlimit64));
+ allowSyscall(ctx, SCMP_SYS(process_madvise));
+ allowSyscall(ctx, SCMP_SYS(process_mrelease));
+ allowSyscall(ctx, SCMP_SYS(process_vm_readv));
+ allowSyscall(ctx, SCMP_SYS(process_vm_writev));
+ allowSyscall(ctx, SCMP_SYS(prof));
+ allowSyscall(ctx, SCMP_SYS(profil));
+ allowSyscall(ctx, SCMP_SYS(pselect6));
+ allowSyscall(ctx, SCMP_SYS(pselect6_time64));
+ allowSyscall(ctx, SCMP_SYS(ptrace));
+ allowSyscall(ctx, SCMP_SYS(putpmsg));
+ allowSyscall(ctx, SCMP_SYS(pwrite64));
+ allowSyscall(ctx, SCMP_SYS(pwritev));
+ allowSyscall(ctx, SCMP_SYS(pwritev2));
+ allowSyscall(ctx, SCMP_SYS(query_module));
+ allowSyscall(ctx, SCMP_SYS(quotactl));
+ allowSyscall(ctx, SCMP_SYS(quotactl_fd));
+ allowSyscall(ctx, SCMP_SYS(read));
+ allowSyscall(ctx, SCMP_SYS(readahead));
+ allowSyscall(ctx, SCMP_SYS(readdir));
+ allowSyscall(ctx, SCMP_SYS(readlink));
+ allowSyscall(ctx, SCMP_SYS(readlinkat));
+ allowSyscall(ctx, SCMP_SYS(readv));
+ allowSyscall(ctx, SCMP_SYS(reboot));
+ allowSyscall(ctx, SCMP_SYS(recv));
+ allowSyscall(ctx, SCMP_SYS(recvfrom));
+ allowSyscall(ctx, SCMP_SYS(recvmmsg));
+ allowSyscall(ctx, SCMP_SYS(recvmmsg_time64));
+ allowSyscall(ctx, SCMP_SYS(recvmsg));
+ allowSyscall(ctx, SCMP_SYS(remap_file_pages));
+ allowSyscall(ctx, SCMP_SYS(removexattr));
+ allowSyscall(ctx, SCMP_SYS(rename));
+ allowSyscall(ctx, SCMP_SYS(renameat));
+ allowSyscall(ctx, SCMP_SYS(renameat2));
+ allowSyscall(ctx, SCMP_SYS(request_key));
+ allowSyscall(ctx, SCMP_SYS(restart_syscall));
+ allowSyscall(ctx, SCMP_SYS(riscv_flush_icache));
+ allowSyscall(ctx, SCMP_SYS(rmdir));
+ allowSyscall(ctx, SCMP_SYS(rseq));
+ allowSyscall(ctx, SCMP_SYS(rtas));
+ allowSyscall(ctx, SCMP_SYS(rt_sigaction));
+ allowSyscall(ctx, SCMP_SYS(rt_sigpending));
+ allowSyscall(ctx, SCMP_SYS(rt_sigprocmask));
+ allowSyscall(ctx, SCMP_SYS(rt_sigqueueinfo));
+ allowSyscall(ctx, SCMP_SYS(rt_sigreturn));
+ allowSyscall(ctx, SCMP_SYS(rt_sigsuspend));
+ allowSyscall(ctx, SCMP_SYS(rt_sigtimedwait));
+ allowSyscall(ctx, SCMP_SYS(rt_sigtimedwait_time64));
+ allowSyscall(ctx, SCMP_SYS(rt_tgsigqueueinfo));
+ allowSyscall(ctx, SCMP_SYS(s390_guarded_storage));
+ allowSyscall(ctx, SCMP_SYS(s390_pci_mmio_read));
+ allowSyscall(ctx, SCMP_SYS(s390_pci_mmio_write));
+ allowSyscall(ctx, SCMP_SYS(s390_runtime_instr));
+ allowSyscall(ctx, SCMP_SYS(s390_sthyi));
+ allowSyscall(ctx, SCMP_SYS(sched_getaffinity));
+ allowSyscall(ctx, SCMP_SYS(sched_getattr));
+ allowSyscall(ctx, SCMP_SYS(sched_getparam));
+ allowSyscall(ctx, SCMP_SYS(sched_get_priority_max));
+ allowSyscall(ctx, SCMP_SYS(sched_get_priority_min));
+ allowSyscall(ctx, SCMP_SYS(sched_getscheduler));
+ allowSyscall(ctx, SCMP_SYS(sched_rr_get_interval));
+ allowSyscall(ctx, SCMP_SYS(sched_rr_get_interval_time64));
+ allowSyscall(ctx, SCMP_SYS(sched_setaffinity));
+ allowSyscall(ctx, SCMP_SYS(sched_setattr));
+ allowSyscall(ctx, SCMP_SYS(sched_setparam));
+ allowSyscall(ctx, SCMP_SYS(sched_setscheduler));
+ allowSyscall(ctx, SCMP_SYS(sched_yield));
+ allowSyscall(ctx, SCMP_SYS(seccomp));
+ allowSyscall(ctx, SCMP_SYS(security));
+ allowSyscall(ctx, SCMP_SYS(select));
+ allowSyscall(ctx, SCMP_SYS(semctl));
+ allowSyscall(ctx, SCMP_SYS(semget));
+ allowSyscall(ctx, SCMP_SYS(semop));
+ allowSyscall(ctx, SCMP_SYS(semtimedop));
+ allowSyscall(ctx, SCMP_SYS(semtimedop_time64));
+ allowSyscall(ctx, SCMP_SYS(send));
+ allowSyscall(ctx, SCMP_SYS(sendfile));
+ allowSyscall(ctx, SCMP_SYS(sendfile64));
+ allowSyscall(ctx, SCMP_SYS(sendmmsg));
+ allowSyscall(ctx, SCMP_SYS(sendmsg));
+ allowSyscall(ctx, SCMP_SYS(sendto));
+ allowSyscall(ctx, SCMP_SYS(setdomainname));
+ allowSyscall(ctx, SCMP_SYS(setfsgid));
+ allowSyscall(ctx, SCMP_SYS(setfsgid32));
+ allowSyscall(ctx, SCMP_SYS(setfsuid));
+ allowSyscall(ctx, SCMP_SYS(setfsuid32));
+ allowSyscall(ctx, SCMP_SYS(setgid));
+ allowSyscall(ctx, SCMP_SYS(setgid32));
+ allowSyscall(ctx, SCMP_SYS(setgroups));
+ allowSyscall(ctx, SCMP_SYS(setgroups32));
+ allowSyscall(ctx, SCMP_SYS(sethostname));
+ allowSyscall(ctx, SCMP_SYS(setitimer));
+ allowSyscall(ctx, SCMP_SYS(set_mempolicy));
+ allowSyscall(ctx, SCMP_SYS(set_mempolicy_home_node));
+ allowSyscall(ctx, SCMP_SYS(setns));
+ allowSyscall(ctx, SCMP_SYS(setpgid));
+ allowSyscall(ctx, SCMP_SYS(setpriority));
+ allowSyscall(ctx, SCMP_SYS(setregid));
+ allowSyscall(ctx, SCMP_SYS(setregid32));
+ allowSyscall(ctx, SCMP_SYS(setresgid));
+ allowSyscall(ctx, SCMP_SYS(setresgid32));
+ allowSyscall(ctx, SCMP_SYS(setresuid));
+ allowSyscall(ctx, SCMP_SYS(setresuid32));
+ allowSyscall(ctx, SCMP_SYS(setreuid));
+ allowSyscall(ctx, SCMP_SYS(setreuid32));
+ allowSyscall(ctx, SCMP_SYS(setrlimit));
+ allowSyscall(ctx, SCMP_SYS(set_robust_list));
+ allowSyscall(ctx, SCMP_SYS(setsid));
+ allowSyscall(ctx, SCMP_SYS(setsockopt));
+ allowSyscall(ctx, SCMP_SYS(set_thread_area));
+ allowSyscall(ctx, SCMP_SYS(set_tid_address));
+ allowSyscall(ctx, SCMP_SYS(settimeofday));
+ allowSyscall(ctx, SCMP_SYS(set_tls));
+ allowSyscall(ctx, SCMP_SYS(setuid));
+ allowSyscall(ctx, SCMP_SYS(setuid32));
+ // skip setxattr (dangerous)
+ allowSyscall(ctx, SCMP_SYS(sgetmask));
+ allowSyscall(ctx, SCMP_SYS(shmat));
+ allowSyscall(ctx, SCMP_SYS(shmctl));
+ allowSyscall(ctx, SCMP_SYS(shmdt));
+ allowSyscall(ctx, SCMP_SYS(shmget));
+ allowSyscall(ctx, SCMP_SYS(shutdown));
+ allowSyscall(ctx, SCMP_SYS(sigaction));
+ allowSyscall(ctx, SCMP_SYS(sigaltstack));
+ allowSyscall(ctx, SCMP_SYS(signal));
+ allowSyscall(ctx, SCMP_SYS(signalfd));
+ allowSyscall(ctx, SCMP_SYS(signalfd4));
+ allowSyscall(ctx, SCMP_SYS(sigpending));
+ allowSyscall(ctx, SCMP_SYS(sigprocmask));
+ allowSyscall(ctx, SCMP_SYS(sigreturn));
+ allowSyscall(ctx, SCMP_SYS(sigsuspend));
+ allowSyscall(ctx, SCMP_SYS(socket));
+ allowSyscall(ctx, SCMP_SYS(socketcall));
+ allowSyscall(ctx, SCMP_SYS(socketpair));
+ allowSyscall(ctx, SCMP_SYS(splice));
+ allowSyscall(ctx, SCMP_SYS(spu_create));
+ allowSyscall(ctx, SCMP_SYS(spu_run));
+ allowSyscall(ctx, SCMP_SYS(ssetmask));
+ allowSyscall(ctx, SCMP_SYS(stat));
+ allowSyscall(ctx, SCMP_SYS(stat64));
+ allowSyscall(ctx, SCMP_SYS(statfs));
+ allowSyscall(ctx, SCMP_SYS(statfs64));
+ allowSyscall(ctx, SCMP_SYS(statx));
+ allowSyscall(ctx, SCMP_SYS(stime));
+ allowSyscall(ctx, SCMP_SYS(stty));
+ allowSyscall(ctx, SCMP_SYS(subpage_prot));
+ allowSyscall(ctx, SCMP_SYS(swapcontext));
+ allowSyscall(ctx, SCMP_SYS(swapoff));
+ allowSyscall(ctx, SCMP_SYS(swapon));
+ allowSyscall(ctx, SCMP_SYS(switch_endian));
+ allowSyscall(ctx, SCMP_SYS(symlink));
+ allowSyscall(ctx, SCMP_SYS(symlinkat));
+ allowSyscall(ctx, SCMP_SYS(sync));
+ allowSyscall(ctx, SCMP_SYS(sync_file_range));
+ allowSyscall(ctx, SCMP_SYS(sync_file_range2));
+ allowSyscall(ctx, SCMP_SYS(syncfs));
+ allowSyscall(ctx, SCMP_SYS(syscall));
+ allowSyscall(ctx, SCMP_SYS(_sysctl));
+ allowSyscall(ctx, SCMP_SYS(sys_debug_setcontext));
+ allowSyscall(ctx, SCMP_SYS(sysfs));
+ allowSyscall(ctx, SCMP_SYS(sysinfo));
+ allowSyscall(ctx, SCMP_SYS(syslog));
+ allowSyscall(ctx, SCMP_SYS(sysmips));
+ allowSyscall(ctx, SCMP_SYS(tee));
+ allowSyscall(ctx, SCMP_SYS(tgkill));
+ allowSyscall(ctx, SCMP_SYS(time));
+ allowSyscall(ctx, SCMP_SYS(timer_create));
+ allowSyscall(ctx, SCMP_SYS(timer_delete));
+ allowSyscall(ctx, SCMP_SYS(timerfd));
+ allowSyscall(ctx, SCMP_SYS(timerfd_create));
+ allowSyscall(ctx, SCMP_SYS(timerfd_gettime));
+ allowSyscall(ctx, SCMP_SYS(timerfd_gettime64));
+ allowSyscall(ctx, SCMP_SYS(timerfd_settime));
+ allowSyscall(ctx, SCMP_SYS(timerfd_settime64));
+ allowSyscall(ctx, SCMP_SYS(timer_getoverrun));
+ allowSyscall(ctx, SCMP_SYS(timer_gettime));
+ allowSyscall(ctx, SCMP_SYS(timer_gettime64));
+ allowSyscall(ctx, SCMP_SYS(timer_settime));
+ allowSyscall(ctx, SCMP_SYS(timer_settime64));
+ allowSyscall(ctx, SCMP_SYS(times));
+ allowSyscall(ctx, SCMP_SYS(tkill));
+ allowSyscall(ctx, SCMP_SYS(truncate));
+ allowSyscall(ctx, SCMP_SYS(truncate64));
+ allowSyscall(ctx, SCMP_SYS(tuxcall));
+ allowSyscall(ctx, SCMP_SYS(ugetrlimit));
+ allowSyscall(ctx, SCMP_SYS(ulimit));
+ allowSyscall(ctx, SCMP_SYS(umask));
+ allowSyscall(ctx, SCMP_SYS(umount));
+ allowSyscall(ctx, SCMP_SYS(umount2));
+ allowSyscall(ctx, SCMP_SYS(uname));
+ allowSyscall(ctx, SCMP_SYS(unlink));
+ allowSyscall(ctx, SCMP_SYS(unlinkat));
+ allowSyscall(ctx, SCMP_SYS(unshare));
+ allowSyscall(ctx, SCMP_SYS(uselib));
+ allowSyscall(ctx, SCMP_SYS(userfaultfd));
+ allowSyscall(ctx, SCMP_SYS(usr26));
+ allowSyscall(ctx, SCMP_SYS(usr32));
+ allowSyscall(ctx, SCMP_SYS(ustat));
+ allowSyscall(ctx, SCMP_SYS(utime));
+ allowSyscall(ctx, SCMP_SYS(utimensat));
+ allowSyscall(ctx, SCMP_SYS(utimensat_time64));
+ allowSyscall(ctx, SCMP_SYS(utimes));
+ allowSyscall(ctx, SCMP_SYS(vfork));
+ allowSyscall(ctx, SCMP_SYS(vhangup));
+ allowSyscall(ctx, SCMP_SYS(vm86));
+ allowSyscall(ctx, SCMP_SYS(vm86old));
+ allowSyscall(ctx, SCMP_SYS(vmsplice));
+ allowSyscall(ctx, SCMP_SYS(vserver));
+ allowSyscall(ctx, SCMP_SYS(wait4));
+ allowSyscall(ctx, SCMP_SYS(waitid));
+ allowSyscall(ctx, SCMP_SYS(waitpid));
+ allowSyscall(ctx, SCMP_SYS(write));
+ allowSyscall(ctx, SCMP_SYS(writev));
+ // END extract-syscalls
+
+ // chmod family: prevent adding setuid/setgid bits to existing files.
+ // The Nix store does not support setuid/setgid, and even their temporary creation can weaken the security of the sandbox.
+ ALLOW_CHMOD_IF_SAFE(ctx, SCMP_SYS(chmod), 1);
+ ALLOW_CHMOD_IF_SAFE(ctx, SCMP_SYS(fchmod), 1);
+ ALLOW_CHMOD_IF_SAFE(ctx, SCMP_SYS(fchmodat), 2);
+ ALLOW_CHMOD_IF_SAFE(ctx, SCMP_SYS(fchmodat2), 2);
+
+ // setxattr family: prevent creation of extended attributes or ACLs.
+ // Not all filesystems support them, and they're incompatible with the NAR format.
if (seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(setxattr), 0) != 0 ||
seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(lsetxattr), 0) != 0 ||
seccomp_rule_add(ctx, SCMP_ACT_ERRNO(ENOTSUP), SCMP_SYS(fsetxattr), 0) != 0)
diff --git a/src/libstore/linux/fchmodat2-compat.hh b/src/libstore/linux/fchmodat2-compat.hh
deleted file mode 100644
index d5ef81e22..000000000
--- a/src/libstore/linux/fchmodat2-compat.hh
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Determine the syscall number for `fchmodat2`.
- *
- * On most platforms this is 452. Exceptions can be found on
- * a glibc git checkout via `rg --pcre2 'define __NR_fchmodat2 (?!452)'`.
- *
- * The problem is that glibc 2.39 and libseccomp 2.5.5 are needed to
- * get the syscall number. However, a Lix built against nixpkgs 23.11
- * (glibc 2.38) should still have the issue fixed without depending
- * on the build environment.
- *
- * To achieve that, the macros below try to determine the platform and
- * set the syscall number which is platform-specific, but
- * in most cases 452.
- *
- * TODO: remove this when 23.11 is EOL and the entire (supported) ecosystem
- * is on glibc 2.39.
- */
-
-#pragma once
-///@file
-
-#if defined(__alpha__)
-# define NIX_SYSCALL_FCHMODAT2 562
-#elif defined(__x86_64__) && SIZE_MAX == 0xFFFFFFFF // x32
-# define NIX_SYSCALL_FCHMODAT2 1073742276
-#elif defined(__mips__) && defined(__mips64) && defined(_ABIN64) // mips64/n64
-# define NIX_SYSCALL_FCHMODAT2 5452
-#elif defined(__mips__) && defined(__mips64) && defined(_ABIN32) // mips64/n32
-# define NIX_SYSCALL_FCHMODAT2 6452
-#elif defined(__mips__) && defined(_ABIO32) // mips32
-# define NIX_SYSCALL_FCHMODAT2 4452
-#else
-# define NIX_SYSCALL_FCHMODAT2 452
-#endif
diff --git a/tests/nixos/setuid/fchmodat2-suid.c b/tests/nixos/setuid/fchmodat2-suid.c
index 931489ad7..7280331d5 100644
--- a/tests/nixos/setuid/fchmodat2-suid.c
+++ b/tests/nixos/setuid/fchmodat2-suid.c
@@ -12,10 +12,7 @@ int main(void) {
fprintf(fd, "henlo :3");
fclose(fd);
- // FIXME use something nicer here that's less
- // platform-dependent as soon as we go to 24.05
- // and the glibc is new enough to support fchmodat2
- long rs = syscall(452, NULL, name, S_ISUID, 0);
+ long rs = syscall(SYS_fchmodat2, NULL, name, S_ISUID, 0);
assert(rs == -1);
assert(errno == EPERM);
}