diff options
Diffstat (limited to 'src/libutil')
-rw-r--r-- | src/libutil/namespaces.cc | 91 | ||||
-rw-r--r-- | src/libutil/namespaces.hh | 4 | ||||
-rw-r--r-- | src/libutil/util.cc | 33 | ||||
-rw-r--r-- | src/libutil/util.hh | 1 |
4 files changed, 77 insertions, 52 deletions
diff --git a/src/libutil/namespaces.cc b/src/libutil/namespaces.cc index fdd52d92b..f66accb10 100644 --- a/src/libutil/namespaces.cc +++ b/src/libutil/namespaces.cc @@ -4,7 +4,7 @@ #include "util.hh" #include "finally.hh" -#include <mntent.h> +#include <sys/mount.h> namespace nix { @@ -33,63 +33,60 @@ bool userNamespacesSupported() return false; } - Pid pid = startProcess([&]() - { - auto res = unshare(CLONE_NEWUSER); - _exit(res ? 1 : 0); - }); - - bool supported = pid.wait() == 0; - - if (!supported) - debug("user namespaces do not work on this system"); - - return supported; - }(); - return res; -} - -bool mountNamespacesSupported() -{ - static auto res = [&]() -> bool - { - bool useUserNamespace = userNamespacesSupported(); - - Pid pid = startProcess([&]() - { - auto res = unshare(CLONE_NEWNS | (useUserNamespace ? CLONE_NEWUSER : 0)); - _exit(res ? 1 : 0); - }); - - bool supported = pid.wait() == 0; - - if (!supported) - debug("mount namespaces do not work on this system"); + try { + Pid pid = startProcess([&]() + { + _exit(0); + }, { + .cloneFlags = CLONE_NEWUSER + }); + + auto r = pid.wait(); + assert(!r); + } catch (SysError & e) { + debug("user namespaces do not work on this system: %s", e.msg()); + return false; + } - return supported; + return true; }(); return res; } -bool pidNamespacesSupported() +bool mountAndPidNamespacesSupported() { static auto res = [&]() -> bool { - /* Check whether /proc is fully visible, i.e. there are no - filesystems mounted on top of files inside /proc. If this - is not the case, then we cannot mount a new /proc inside - the sandbox that matches the sandbox's PID namespace. - See https://lore.kernel.org/lkml/87tvsrjai0.fsf@xmission.com/T/. */ - auto fp = fopen("/proc/mounts", "r"); - if (!fp) return false; - Finally delFP = [&]() { fclose(fp); }; - - while (auto ent = getmntent(fp)) - if (hasPrefix(std::string_view(ent->mnt_dir), "/proc/")) { - debug("PID namespaces do not work because /proc is not fully visible; disabling sandboxing"); + try { + + Pid pid = startProcess([&]() + { + /* Make sure we don't remount the parent's /proc. */ + if (mount(0, "/", 0, MS_PRIVATE | MS_REC, 0) == -1) + _exit(1); + + /* Test whether we can remount /proc. The kernel disallows + this if /proc is not fully visible, i.e. if there are + filesystems mounted on top of files inside /proc. See + https://lore.kernel.org/lkml/87tvsrjai0.fsf@xmission.com/T/. */ + if (mount("none", "/proc", "proc", 0, 0) == -1) + _exit(2); + + _exit(0); + }, { + .cloneFlags = CLONE_NEWNS | CLONE_NEWPID | (userNamespacesSupported() ? CLONE_NEWUSER : 0) + }); + + if (pid.wait()) { + debug("PID namespaces do not work on this system: cannot remount /proc"); return false; } + } catch (SysError & e) { + debug("mount namespaces do not work on this system: %s", e.msg()); + return false; + } + return true; }(); return res; diff --git a/src/libutil/namespaces.hh b/src/libutil/namespaces.hh index 34e54d5ad..e82379b9c 100644 --- a/src/libutil/namespaces.hh +++ b/src/libutil/namespaces.hh @@ -6,9 +6,7 @@ namespace nix { bool userNamespacesSupported(); -bool mountNamespacesSupported(); - -bool pidNamespacesSupported(); +bool mountAndPidNamespacesSupported(); #endif diff --git a/src/libutil/util.cc b/src/libutil/util.cc index 40a54b010..885bae69c 100644 --- a/src/libutil/util.cc +++ b/src/libutil/util.cc @@ -36,6 +36,7 @@ #ifdef __linux__ #include <sys/prctl.h> #include <sys/resource.h> +#include <sys/mman.h> #include <cmath> #endif @@ -1064,9 +1065,17 @@ static pid_t doFork(bool allowVfork, std::function<void()> fun) } +static int childEntry(void * arg) +{ + auto main = (std::function<void()> *) arg; + (*main)(); + return 1; +} + + pid_t startProcess(std::function<void()> fun, const ProcessOptions & options) { - auto wrapper = [&]() { + std::function<void()> wrapper = [&]() { if (!options.allowVfork) logger = makeSimpleLogger(); try { @@ -1086,7 +1095,27 @@ pid_t startProcess(std::function<void()> fun, const ProcessOptions & options) _exit(1); }; - pid_t pid = doFork(options.allowVfork, wrapper); + pid_t pid = -1; + + if (options.cloneFlags) { + #ifdef __linux__ + // Not supported, since then we don't know when to free the stack. + assert(!(options.cloneFlags & CLONE_VM)); + + size_t stackSize = 1 * 1024 * 1024; + auto stack = (char *) mmap(0, stackSize, + PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + if (stack == MAP_FAILED) throw SysError("allocating stack"); + + Finally freeStack([&]() { munmap(stack, stackSize); }); + + pid = clone(childEntry, stack + stackSize, options.cloneFlags | SIGCHLD, &wrapper); + #else + throw Error("clone flags are only supported on Linux"); + #endif + } else + pid = doFork(options.allowVfork, wrapper); + if (pid == -1) throw SysError("unable to fork"); return pid; diff --git a/src/libutil/util.hh b/src/libutil/util.hh index 4fadafaf2..b5625ecef 100644 --- a/src/libutil/util.hh +++ b/src/libutil/util.hh @@ -307,6 +307,7 @@ struct ProcessOptions bool dieWithParent = true; bool runExitHandlers = false; bool allowVfork = false; + int cloneFlags = 0; // use clone() with the specified flags (Linux only) }; pid_t startProcess(std::function<void()> fun, const ProcessOptions & options = ProcessOptions()); |