aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEelco Dolstra <edolstra@gmail.com>2023-02-10 20:41:13 +0100
committerGitHub <noreply@github.com>2023-02-10 20:41:13 +0100
commit67451d8ed797d1bd5fb69f6218ea686761ad13b0 (patch)
treeb7d7fc553066c29ae6009445c089f620b36352ad
parent9ebbe35817a7f7becf77d9f0cd76c54d693f6f28 (diff)
parenta21405a4e8a5ca4bfbe8df8de2f76d69c4608a9f (diff)
Merge pull request #7802 from edolstra/fix-7783
Fix PID namespace support check
-rw-r--r--src/libstore/build/local-derivation-goal.cc24
-rw-r--r--src/libutil/namespaces.cc91
-rw-r--r--src/libutil/namespaces.hh4
-rw-r--r--src/libutil/util.cc33
-rw-r--r--src/libutil/util.hh1
-rw-r--r--tests/nixos/remote-builds.nix5
6 files changed, 88 insertions, 70 deletions
diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc
index e1cc504f8..7c4892c96 100644
--- a/src/libstore/build/local-derivation-goal.cc
+++ b/src/libstore/build/local-derivation-goal.cc
@@ -209,7 +209,7 @@ void LocalDerivationGoal::tryLocalBuild()
#if __linux__
if (useChroot) {
- if (!mountNamespacesSupported() || !pidNamespacesSupported()) {
+ if (!mountAndPidNamespacesSupported()) {
if (!settings.sandboxFallback)
throw Error("this system does not support the kernel namespaces that are required for sandboxing; use '--no-sandbox' to disable sandboxing");
debug("auto-disabling sandboxing because the prerequisite namespaces are not available");
@@ -385,12 +385,6 @@ void LocalDerivationGoal::cleanupPostOutputsRegisteredModeNonCheck()
}
-int childEntry(void * arg)
-{
- ((LocalDerivationGoal *) arg)->runChild();
- return 1;
-}
-
#if __linux__
static void linkOrCopy(const Path & from, const Path & to)
{
@@ -916,21 +910,15 @@ void LocalDerivationGoal::startBuilder()
if (getuid() == 0 && setgroups(0, 0) == -1)
throw SysError("setgroups failed");
- size_t stackSize = 1 * 1024 * 1024;
- char * stack = (char *) mmap(0, stackSize,
- PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
- if (stack == MAP_FAILED) throw SysError("allocating stack");
-
- int flags = CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_PARENT | SIGCHLD;
+ ProcessOptions options;
+ options.cloneFlags = CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_PARENT | SIGCHLD;
if (privateNetwork)
- flags |= CLONE_NEWNET;
+ options.cloneFlags |= CLONE_NEWNET;
if (usingUserNamespace)
- flags |= CLONE_NEWUSER;
+ options.cloneFlags |= CLONE_NEWUSER;
- pid_t child = clone(childEntry, stack + stackSize, flags, this);
+ pid_t child = startProcess([&]() { runChild(); }, options);
- if (child == -1)
- throw SysError("creating sandboxed builder process using clone()");
writeFull(builderOut.writeSide.get(),
fmt("%d %d\n", usingUserNamespace, child));
_exit(0);
diff --git a/src/libutil/namespaces.cc b/src/libutil/namespaces.cc
index fdd52d92b..f66accb10 100644
--- a/src/libutil/namespaces.cc
+++ b/src/libutil/namespaces.cc
@@ -4,7 +4,7 @@
#include "util.hh"
#include "finally.hh"
-#include <mntent.h>
+#include <sys/mount.h>
namespace nix {
@@ -33,63 +33,60 @@ bool userNamespacesSupported()
return false;
}
- Pid pid = startProcess([&]()
- {
- auto res = unshare(CLONE_NEWUSER);
- _exit(res ? 1 : 0);
- });
-
- bool supported = pid.wait() == 0;
-
- if (!supported)
- debug("user namespaces do not work on this system");
-
- return supported;
- }();
- return res;
-}
-
-bool mountNamespacesSupported()
-{
- static auto res = [&]() -> bool
- {
- bool useUserNamespace = userNamespacesSupported();
-
- Pid pid = startProcess([&]()
- {
- auto res = unshare(CLONE_NEWNS | (useUserNamespace ? CLONE_NEWUSER : 0));
- _exit(res ? 1 : 0);
- });
-
- bool supported = pid.wait() == 0;
-
- if (!supported)
- debug("mount namespaces do not work on this system");
+ try {
+ Pid pid = startProcess([&]()
+ {
+ _exit(0);
+ }, {
+ .cloneFlags = CLONE_NEWUSER
+ });
+
+ auto r = pid.wait();
+ assert(!r);
+ } catch (SysError & e) {
+ debug("user namespaces do not work on this system: %s", e.msg());
+ return false;
+ }
- return supported;
+ return true;
}();
return res;
}
-bool pidNamespacesSupported()
+bool mountAndPidNamespacesSupported()
{
static auto res = [&]() -> bool
{
- /* Check whether /proc is fully visible, i.e. there are no
- filesystems mounted on top of files inside /proc. If this
- is not the case, then we cannot mount a new /proc inside
- the sandbox that matches the sandbox's PID namespace.
- See https://lore.kernel.org/lkml/87tvsrjai0.fsf@xmission.com/T/. */
- auto fp = fopen("/proc/mounts", "r");
- if (!fp) return false;
- Finally delFP = [&]() { fclose(fp); };
-
- while (auto ent = getmntent(fp))
- if (hasPrefix(std::string_view(ent->mnt_dir), "/proc/")) {
- debug("PID namespaces do not work because /proc is not fully visible; disabling sandboxing");
+ try {
+
+ Pid pid = startProcess([&]()
+ {
+ /* Make sure we don't remount the parent's /proc. */
+ if (mount(0, "/", 0, MS_PRIVATE | MS_REC, 0) == -1)
+ _exit(1);
+
+ /* Test whether we can remount /proc. The kernel disallows
+ this if /proc is not fully visible, i.e. if there are
+ filesystems mounted on top of files inside /proc. See
+ https://lore.kernel.org/lkml/87tvsrjai0.fsf@xmission.com/T/. */
+ if (mount("none", "/proc", "proc", 0, 0) == -1)
+ _exit(2);
+
+ _exit(0);
+ }, {
+ .cloneFlags = CLONE_NEWNS | CLONE_NEWPID | (userNamespacesSupported() ? CLONE_NEWUSER : 0)
+ });
+
+ if (pid.wait()) {
+ debug("PID namespaces do not work on this system: cannot remount /proc");
return false;
}
+ } catch (SysError & e) {
+ debug("mount namespaces do not work on this system: %s", e.msg());
+ return false;
+ }
+
return true;
}();
return res;
diff --git a/src/libutil/namespaces.hh b/src/libutil/namespaces.hh
index 34e54d5ad..e82379b9c 100644
--- a/src/libutil/namespaces.hh
+++ b/src/libutil/namespaces.hh
@@ -6,9 +6,7 @@ namespace nix {
bool userNamespacesSupported();
-bool mountNamespacesSupported();
-
-bool pidNamespacesSupported();
+bool mountAndPidNamespacesSupported();
#endif
diff --git a/src/libutil/util.cc b/src/libutil/util.cc
index 40a54b010..885bae69c 100644
--- a/src/libutil/util.cc
+++ b/src/libutil/util.cc
@@ -36,6 +36,7 @@
#ifdef __linux__
#include <sys/prctl.h>
#include <sys/resource.h>
+#include <sys/mman.h>
#include <cmath>
#endif
@@ -1064,9 +1065,17 @@ static pid_t doFork(bool allowVfork, std::function<void()> fun)
}
+static int childEntry(void * arg)
+{
+ auto main = (std::function<void()> *) arg;
+ (*main)();
+ return 1;
+}
+
+
pid_t startProcess(std::function<void()> fun, const ProcessOptions & options)
{
- auto wrapper = [&]() {
+ std::function<void()> wrapper = [&]() {
if (!options.allowVfork)
logger = makeSimpleLogger();
try {
@@ -1086,7 +1095,27 @@ pid_t startProcess(std::function<void()> fun, const ProcessOptions & options)
_exit(1);
};
- pid_t pid = doFork(options.allowVfork, wrapper);
+ pid_t pid = -1;
+
+ if (options.cloneFlags) {
+ #ifdef __linux__
+ // Not supported, since then we don't know when to free the stack.
+ assert(!(options.cloneFlags & CLONE_VM));
+
+ size_t stackSize = 1 * 1024 * 1024;
+ auto stack = (char *) mmap(0, stackSize,
+ PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+ if (stack == MAP_FAILED) throw SysError("allocating stack");
+
+ Finally freeStack([&]() { munmap(stack, stackSize); });
+
+ pid = clone(childEntry, stack + stackSize, options.cloneFlags | SIGCHLD, &wrapper);
+ #else
+ throw Error("clone flags are only supported on Linux");
+ #endif
+ } else
+ pid = doFork(options.allowVfork, wrapper);
+
if (pid == -1) throw SysError("unable to fork");
return pid;
diff --git a/src/libutil/util.hh b/src/libutil/util.hh
index 4fadafaf2..b5625ecef 100644
--- a/src/libutil/util.hh
+++ b/src/libutil/util.hh
@@ -307,6 +307,7 @@ struct ProcessOptions
bool dieWithParent = true;
bool runExitHandlers = false;
bool allowVfork = false;
+ int cloneFlags = 0; // use clone() with the specified flags (Linux only)
};
pid_t startProcess(std::function<void()> fun, const ProcessOptions & options = ProcessOptions());
diff --git a/tests/nixos/remote-builds.nix b/tests/nixos/remote-builds.nix
index 696cd2652..1c96cc787 100644
--- a/tests/nixos/remote-builds.nix
+++ b/tests/nixos/remote-builds.nix
@@ -11,6 +11,11 @@ let
{ services.openssh.enable = true;
virtualisation.writableStore = true;
nix.settings.sandbox = true;
+
+ # Regression test for use of PID namespaces when /proc has
+ # filesystems mounted on top of it
+ # (i.e. /proc/sys/fs/binfmt_misc).
+ boot.binfmt.emulatedSystems = [ "aarch64-linux" ];
};
# Trivial Nix expression to build remotely.