aboutsummaryrefslogtreecommitdiff
path: root/src/libstore
diff options
context:
space:
mode:
authorArtemis Tosini <lix@artem.ist>2024-05-19 23:01:51 +0000
committerArtemis Tosini <me@artem.ist>2024-07-03 22:37:41 +0000
commite040b762a48f022b1ea4080020083f7367cf3ee5 (patch)
tree49eb474b89a65672d29978fc1b361871b26a3c40 /src/libstore
parentaf1dcc2d5e5f9f1bc01e12face96259cf4183629 (diff)
libstore: add LocalDerivationGoal startChild hook
Add a platform-specific function for starting sandboxed child. Generally this just means startProcess, but on Linux we use flags for clone to start a new namespace Change-Id: I41c8aba62676a162388bbe5ab8a7518904c7b058
Diffstat (limited to 'src/libstore')
-rw-r--r--src/libstore/build/local-derivation-goal.cc164
-rw-r--r--src/libstore/build/local-derivation-goal.hh6
-rw-r--r--src/libstore/platform/linux.cc154
-rw-r--r--src/libstore/platform/linux.hh6
4 files changed, 176 insertions, 154 deletions
diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc
index bc8879428..fb86a9d18 100644
--- a/src/libstore/build/local-derivation-goal.cc
+++ b/src/libstore/build/local-derivation-goal.cc
@@ -1,6 +1,7 @@
#include "local-derivation-goal.hh"
#include "indirect-root-store.hh"
#include "hook-instance.hh"
+#include "store-api.hh"
#include "worker.hh"
#include "builtins.hh"
#include "builtins/buildenv.hh"
@@ -771,160 +772,7 @@ void LocalDerivationGoal::startBuilder()
buildResult.startTime = time(0);
/* Fork a child to build the package. */
-
-#if __linux__
- if (useChroot) {
- /* Set up private namespaces for the build:
-
- - The PID namespace causes the build to start as PID 1.
- Processes outside of the chroot are not visible to those
- on the inside, but processes inside the chroot are
- visible from the outside (though with different PIDs).
-
- - The private mount namespace ensures that all the bind
- mounts we do will only show up in this process and its
- children, and will disappear automatically when we're
- done.
-
- - The private network namespace ensures that the builder
- cannot talk to the outside world (or vice versa). It
- only has a private loopback interface. (Fixed-output
- derivations are not run in a private network namespace
- to allow functions like fetchurl to work.)
-
- - The IPC namespace prevents the builder from communicating
- with outside processes using SysV IPC mechanisms (shared
- memory, message queues, semaphores). It also ensures
- that all IPC objects are destroyed when the builder
- exits.
-
- - The UTS namespace ensures that builders see a hostname of
- localhost rather than the actual hostname.
-
- We use a helper process to do the clone() to work around
- clone() being broken in multi-threaded programs due to
- at-fork handlers not being run. Note that we use
- CLONE_PARENT to ensure that the real builder is parented to
- us.
- */
-
- if (derivationType->isSandboxed())
- privateNetwork = true;
-
- userNamespaceSync.create();
-
- Pipe sendPid;
- sendPid.create();
-
- Pid helper = startProcess([&]() {
- sendPid.readSide.close();
-
- /* We need to open the slave early, before
- CLONE_NEWUSER. Otherwise we get EPERM when running as
- root. */
- openSlave();
-
- /* Drop additional groups here because we can't do it
- after we've created the new user namespace. */
- if (setgroups(0, 0) == -1) {
- if (errno != EPERM)
- throw SysError("setgroups failed");
- if (settings.requireDropSupplementaryGroups)
- throw Error("setgroups failed. Set the require-drop-supplementary-groups option to false to skip this step.");
- }
-
- ProcessOptions options;
- options.cloneFlags = CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_PARENT | SIGCHLD;
- if (privateNetwork)
- options.cloneFlags |= CLONE_NEWNET;
- if (usingUserNamespace)
- options.cloneFlags |= CLONE_NEWUSER;
-
- pid_t child = startProcess([&]() { runChild(); }, options).release();
-
- writeFull(sendPid.writeSide.get(), fmt("%d\n", child));
- _exit(0);
- });
-
- sendPid.writeSide.close();
-
- if (helper.wait() != 0)
- throw Error("unable to start build process");
-
- userNamespaceSync.readSide.reset();
-
- /* Close the write side to prevent runChild() from hanging
- reading from this. */
- Finally cleanup([&]() {
- userNamespaceSync.writeSide.reset();
- });
-
- auto ss = tokenizeString<std::vector<std::string>>(readLine(sendPid.readSide.get()));
- assert(ss.size() == 1);
- pid = Pid{string2Int<pid_t>(ss[0]).value()};
-
- if (usingUserNamespace) {
- /* Set the UID/GID mapping of the builder's user namespace
- such that the sandbox user maps to the build user, or to
- the calling user (if build users are disabled). */
- uid_t hostUid = buildUser ? buildUser->getUID() : getuid();
- uid_t hostGid = buildUser ? buildUser->getGID() : getgid();
- uid_t nrIds = buildUser ? buildUser->getUIDCount() : 1;
-
- writeFile("/proc/" + std::to_string(pid.get()) + "/uid_map",
- fmt("%d %d %d", sandboxUid(), hostUid, nrIds));
-
- if (!buildUser || buildUser->getUIDCount() == 1)
- writeFile("/proc/" + std::to_string(pid.get()) + "/setgroups", "deny");
-
- writeFile("/proc/" + std::to_string(pid.get()) + "/gid_map",
- fmt("%d %d %d", sandboxGid(), hostGid, nrIds));
- } else {
- debug("note: not using a user namespace");
- }
-
- /* Now that we now the sandbox uid, we can write
- /etc/passwd. */
- writeFile(chrootRootDir + "/etc/passwd", fmt(
- "root:x:0:0:Nix build user:%3%:/noshell\n"
- "nixbld:x:%1%:%2%:Nix build user:%3%:/noshell\n"
- "nobody:x:65534:65534:Nobody:/:/noshell\n",
- sandboxUid(), sandboxGid(), settings.sandboxBuildDir));
-
- /* Declare the build user's group so that programs get a consistent
- view of the system (e.g., "id -gn"). */
- writeFile(chrootRootDir + "/etc/group",
- fmt("root:x:0:\n"
- "nixbld:!:%1%:\n"
- "nogroup:x:65534:\n", sandboxGid()));
-
- /* Save the mount- and user namespace of the child. We have to do this
- *before* the child does a chroot. */
- sandboxMountNamespace = AutoCloseFD{open(fmt("/proc/%d/ns/mnt", pid.get()).c_str(), O_RDONLY)};
- if (sandboxMountNamespace.get() == -1)
- throw SysError("getting sandbox mount namespace");
-
- if (usingUserNamespace) {
- sandboxUserNamespace = AutoCloseFD{open(fmt("/proc/%d/ns/user", pid.get()).c_str(), O_RDONLY)};
- if (sandboxUserNamespace.get() == -1)
- throw SysError("getting sandbox user namespace");
- }
-
- /* Move the child into its own cgroup. */
- if (cgroup)
- writeFile(*cgroup + "/cgroup.procs", fmt("%d", pid.get()));
-
- /* Signal the builder that we've updated its user namespace. */
- writeFull(userNamespaceSync.writeSide.get(), "1");
-
- } else
-#endif
- {
- pid = startProcess([&]() {
- openSlave();
- runChild();
- });
- }
+ pid = startChild(openSlave);
/* parent */
pid.setSeparatePG(true);
@@ -958,6 +806,14 @@ void LocalDerivationGoal::startBuilder()
}
+Pid LocalDerivationGoal::startChild(std::function<void()> openSlave) {
+ return startProcess([&]() {
+ openSlave();
+ runChild();
+ });
+}
+
+
void LocalDerivationGoal::initTmpDir() {
/* In a sandbox, for determinism, always use the same temporary
directory. */
diff --git a/src/libstore/build/local-derivation-goal.hh b/src/libstore/build/local-derivation-goal.hh
index 857339b5d..727a7f406 100644
--- a/src/libstore/build/local-derivation-goal.hh
+++ b/src/libstore/build/local-derivation-goal.hh
@@ -334,6 +334,12 @@ protected:
};
/**
+ * Create a new process that runs `openSlave` and `runChild`
+ * On some platforms this process is created with sandboxing flags.
+ */
+ virtual Pid startChild(std::function<void()> openSlave);
+
+ /**
* Execute the builder, replacing the current process.
* Generally this means an `execve` call.
*/
diff --git a/src/libstore/platform/linux.cc b/src/libstore/platform/linux.cc
index ac0440e5a..204f62b71 100644
--- a/src/libstore/platform/linux.cc
+++ b/src/libstore/platform/linux.cc
@@ -1,10 +1,12 @@
#include "build/worker.hh"
#include "cgroup.hh"
+#include "finally.hh"
#include "gc-store.hh"
#include "signals.hh"
#include "platform/linux.hh"
#include "regex.hh"
+#include <grp.h>
#include <regex>
namespace nix {
@@ -204,6 +206,158 @@ void LinuxLocalDerivationGoal::prepareSandbox()
}
}
+Pid LinuxLocalDerivationGoal::startChild(std::function<void()> openSlave)
+{
+ // If we're not sandboxing no need to faff about, use the fallback
+ if (!useChroot) {
+ return LocalDerivationGoal::startChild(openSlave);
+ }
+ /* Set up private namespaces for the build:
+
+ - The PID namespace causes the build to start as PID 1.
+ Processes outside of the chroot are not visible to those
+ on the inside, but processes inside the chroot are
+ visible from the outside (though with different PIDs).
+
+ - The private mount namespace ensures that all the bind
+ mounts we do will only show up in this process and its
+ children, and will disappear automatically when we're
+ done.
+
+ - The private network namespace ensures that the builder
+ cannot talk to the outside world (or vice versa). It
+ only has a private loopback interface. (Fixed-output
+ derivations are not run in a private network namespace
+ to allow functions like fetchurl to work.)
+
+ - The IPC namespace prevents the builder from communicating
+ with outside processes using SysV IPC mechanisms (shared
+ memory, message queues, semaphores). It also ensures
+ that all IPC objects are destroyed when the builder
+ exits.
+
+ - The UTS namespace ensures that builders see a hostname of
+ localhost rather than the actual hostname.
+
+ We use a helper process to do the clone() to work around
+ clone() being broken in multi-threaded programs due to
+ at-fork handlers not being run. Note that we use
+ CLONE_PARENT to ensure that the real builder is parented to
+ us.
+ */
+
+ if (derivationType->isSandboxed())
+ privateNetwork = true;
+
+ userNamespaceSync.create();
+
+ Pipe sendPid;
+ sendPid.create();
+
+ Pid helper = startProcess([&]() {
+ sendPid.readSide.close();
+
+ /* We need to open the slave early, before
+ CLONE_NEWUSER. Otherwise we get EPERM when running as
+ root. */
+ openSlave();
+
+ /* Drop additional groups here because we can't do it
+ after we've created the new user namespace. */
+ if (setgroups(0, 0) == -1) {
+ if (errno != EPERM)
+ throw SysError("setgroups failed");
+ if (settings.requireDropSupplementaryGroups)
+ throw Error("setgroups failed. Set the require-drop-supplementary-groups option to false to skip this step.");
+ }
+
+ ProcessOptions options;
+ options.cloneFlags = CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_PARENT | SIGCHLD;
+ if (privateNetwork)
+ options.cloneFlags |= CLONE_NEWNET;
+ if (usingUserNamespace)
+ options.cloneFlags |= CLONE_NEWUSER;
+
+ pid_t child = startProcess([&]() { runChild(); }, options).release();
+
+ writeFull(sendPid.writeSide.get(), fmt("%d\n", child));
+ _exit(0);
+ });
+
+ sendPid.writeSide.close();
+
+ if (helper.wait() != 0)
+ throw Error("unable to start build process");
+
+ userNamespaceSync.readSide.reset();
+
+ /* Close the write side to prevent runChild() from hanging
+ reading from this. */
+ Finally cleanup([&]() {
+ userNamespaceSync.writeSide.reset();
+ });
+
+ auto ss = tokenizeString<std::vector<std::string>>(readLine(sendPid.readSide.get()));
+ assert(ss.size() == 1);
+ Pid pid = Pid{string2Int<pid_t>(ss[0]).value()};
+
+ if (usingUserNamespace) {
+ /* Set the UID/GID mapping of the builder's user namespace
+ such that the sandbox user maps to the build user, or to
+ the calling user (if build users are disabled). */
+ uid_t hostUid = buildUser ? buildUser->getUID() : getuid();
+ uid_t hostGid = buildUser ? buildUser->getGID() : getgid();
+ uid_t nrIds = buildUser ? buildUser->getUIDCount() : 1;
+
+ writeFile("/proc/" + std::to_string(pid.get()) + "/uid_map",
+ fmt("%d %d %d", sandboxUid(), hostUid, nrIds));
+
+ if (!buildUser || buildUser->getUIDCount() == 1)
+ writeFile("/proc/" + std::to_string(pid.get()) + "/setgroups", "deny");
+
+ writeFile("/proc/" + std::to_string(pid.get()) + "/gid_map",
+ fmt("%d %d %d", sandboxGid(), hostGid, nrIds));
+ } else {
+ debug("note: not using a user namespace");
+ }
+
+ /* Now that we now the sandbox uid, we can write
+ /etc/passwd. */
+ writeFile(chrootRootDir + "/etc/passwd", fmt(
+ "root:x:0:0:Nix build user:%3%:/noshell\n"
+ "nixbld:x:%1%:%2%:Nix build user:%3%:/noshell\n"
+ "nobody:x:65534:65534:Nobody:/:/noshell\n",
+ sandboxUid(), sandboxGid(), settings.sandboxBuildDir));
+
+ /* Declare the build user's group so that programs get a consistent
+ view of the system (e.g., "id -gn"). */
+ writeFile(chrootRootDir + "/etc/group",
+ fmt("root:x:0:\n"
+ "nixbld:!:%1%:\n"
+ "nogroup:x:65534:\n", sandboxGid()));
+
+ /* Save the mount- and user namespace of the child. We have to do this
+ *before* the child does a chroot. */
+ sandboxMountNamespace = AutoCloseFD{open(fmt("/proc/%d/ns/mnt", pid.get()).c_str(), O_RDONLY)};
+ if (sandboxMountNamespace.get() == -1)
+ throw SysError("getting sandbox mount namespace");
+
+ if (usingUserNamespace) {
+ sandboxUserNamespace = AutoCloseFD{open(fmt("/proc/%d/ns/user", pid.get()).c_str(), O_RDONLY)};
+ if (sandboxUserNamespace.get() == -1)
+ throw SysError("getting sandbox user namespace");
+ }
+
+ /* Move the child into its own cgroup. */
+ if (cgroup)
+ writeFile(*cgroup + "/cgroup.procs", fmt("%d", pid.get()));
+
+ /* Signal the builder that we've updated its user namespace. */
+ writeFull(userNamespaceSync.writeSide.get(), "1");
+
+ return pid;
+}
+
void LinuxLocalDerivationGoal::killSandbox(bool getStats)
{
if (cgroup) {
diff --git a/src/libstore/platform/linux.hh b/src/libstore/platform/linux.hh
index 7ef578d2c..2173205bc 100644
--- a/src/libstore/platform/linux.hh
+++ b/src/libstore/platform/linux.hh
@@ -48,6 +48,12 @@ private:
void prepareSandbox() override;
/**
+ * Start child process in new namespaces and cgroup,
+ * create /etc/passwd and /etc/group based on discovered uid/gid
+ */
+ Pid startChild(std::function<void()> openSlave) override;
+
+ /**
* Kill all processes by build user, possibly using a reused
* cgroup if we have one
*/