aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEelco Dolstra <eelco.dolstra@logicblox.com>2014-08-21 14:08:09 +0200
committerEelco Dolstra <eelco.dolstra@logicblox.com>2014-08-21 14:08:09 +0200
commit524f89f1399724e596f61faba2c6861b1bb7b9c5 (patch)
treee652815f56cf7cf93dec282f1ec319511d5476a0
parentfefd3650d4aa0c378da2ae1f1cdda772d5afaf13 (diff)
Use unshare() instead of clone()
It turns out that using clone() to start a child process is unsafe in a multithreaded program. It can cause the initialisation of a build child process to hang in setgroups(), as seen several times in the build farm: The reason is that Glibc thinks that the other threads of the parent exist in the child, so in setxid_mark_thread() it tries to get a futex that has been acquired by another thread just before the clone(). With fork(), Glibc runs pthread_atfork() handlers that take care of this (in particular, __reclaim_stacks()). But clone() doesn't do that. Fortunately, we can use fork()+unshare() instead of clone() to set up private namespaces. See also https://www.mail-archive.com/lxc-devel@lists.linuxcontainers.org/msg03434.html.
-rw-r--r--src/libstore/build.cc90
1 files changed, 35 insertions, 55 deletions
diff --git a/src/libstore/build.cc b/src/libstore/build.cc
index 856e5f820..b64c32d10 100644
--- a/src/libstore/build.cc
+++ b/src/libstore/build.cc
@@ -1575,13 +1575,6 @@ void chmod_(const Path & path, mode_t mode)
}
-int childEntry(void * arg)
-{
- ((DerivationGoal *) arg)->initChild();
- return 1;
-}
-
-
void DerivationGoal::startBuilder()
{
startNest(nest, lvlInfo, format(
@@ -1897,48 +1890,10 @@ void DerivationGoal::startBuilder()
/* Create a pipe to get the output of the builder. */
builderOut.create();
- /* Fork a child to build the package. Note that while we
- currently use forks to run and wait for the children, it
- shouldn't be hard to use threads for this on systems where
- fork() is unavailable or inefficient.
-
- If we're building in a chroot, then also set up private
- namespaces for the build:
-
- - The PID namespace causes the build to start as PID 1.
- Processes outside of the chroot are not visible to those on
- the inside, but processes inside the chroot are visible from
- the outside (though with different PIDs).
-
- - The private mount namespace ensures that all the bind mounts
- we do will only show up in this process and its children, and
- will disappear automatically when we're done.
-
- - The private network namespace ensures that the builder cannot
- talk to the outside world (or vice versa). It only has a
- private loopback interface.
-
- - The IPC namespace prevents the builder from communicating
- with outside processes using SysV IPC mechanisms (shared
- memory, message queues, semaphores). It also ensures that
- all IPC objects are destroyed when the builder exits.
-
- - The UTS namespace ensures that builders see a hostname of
- localhost rather than the actual hostname.
- */
-#if CHROOT_ENABLED
- if (useChroot) {
- char stack[32 * 1024];
- pid = clone(childEntry, stack + sizeof(stack) - 8,
- CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS | SIGCHLD, this);
- } else
-#endif
- {
- pid = fork();
- if (pid == 0) initChild();
- }
-
- if (pid == -1) throw SysError("unable to fork");
+ /* Fork a child to build the package. */
+ pid = startProcess([&]() {
+ initChild();
+ });
/* parent */
pid.setSeparatePG(true);
@@ -1965,14 +1920,41 @@ void DerivationGoal::initChild()
try { /* child */
- _writeToStderr = 0;
-
- restoreAffinity();
-
commonChildInit(builderOut);
#if CHROOT_ENABLED
if (useChroot) {
+
+ /* Set up private namespaces for the build:
+
+ - The PID namespace causes the build to start as PID 1.
+ Processes outside of the chroot are not visible to
+ those on the inside, but processes inside the chroot
+ are visible from the outside (though with different
+ PIDs).
+
+ - The private mount namespace ensures that all the bind
+ mounts we do will only show up in this process and
+ its children, and will disappear automatically when
+ we're done.
+
+ - The private network namespace ensures that the
+ builder cannot talk to the outside world (or vice
+ versa). It only has a private loopback interface.
+
+ - The IPC namespace prevents the builder from
+ communicating with outside processes using SysV IPC
+ mechanisms (shared memory, message queues,
+ semaphores). It also ensures that all IPC objects
+ are destroyed when the builder exits.
+
+ - The UTS namespace ensures that builders see a
+ hostname of localhost rather than the actual
+ hostname.
+ */
+ if (unshare(CLONE_NEWNS | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS) == -1)
+ throw SysError("setting up private namespaces");
+
/* Initialise the loopback interface. */
AutoCloseFD fd(socket(PF_INET, SOCK_DGRAM, IPPROTO_IP));
if (fd == -1) throw SysError("cannot open IP socket");
@@ -2169,8 +2151,6 @@ void DerivationGoal::initChild()
writeToStderr("while setting up the build environment: " + string(e.what()) + "\n");
_exit(1);
}
-
- abort(); /* never reached */
}