aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEelco Dolstra <edolstra@gmail.com>2020-05-19 23:25:44 +0200
committerEelco Dolstra <edolstra@gmail.com>2020-07-06 13:50:33 +0200
commitba50c3efa3b2394f5a8372939bc600008cd25e7e (patch)
treef2139d3768d91d29dddbd4e5d10414f5a2833188
parent570c443f560e015cf02e4f96102eaaa0e6853562 (diff)
Add "uid-range" and "systemd-cgroup" system features
"uid-range" provides 65536 UIDs to a build and runs the build as root in its user namespace. "systemd-cgroup" allows the build to mount the systemd cgroup controller (needed for running systemd-nspawn and NixOS containers). Also, add a configuration option "auto-allocate-uids" which is needed to enable these features, and some experimental feature gates. So to enable support for containers you need the following in nix.conf: experimental-features = auto-allocate-uids systemd-cgroup auto-allocate-uids = true system-features = uid-range systemd-cgroup
-rw-r--r--src/libstore/build.cc270
-rw-r--r--src/libstore/cgroup.cc19
-rw-r--r--src/libstore/globals.hh5
-rw-r--r--src/libstore/user-lock.cc212
-rw-r--r--src/libstore/user-lock.hh39
5 files changed, 340 insertions, 205 deletions
diff --git a/src/libstore/build.cc b/src/libstore/build.cc
index 97554e9cf..1f79a8d2d 100644
--- a/src/libstore/build.cc
+++ b/src/libstore/build.cc
@@ -16,7 +16,7 @@
#include "machines.hh"
#include "daemon.hh"
#include "worker-protocol.hh"
-#include "cgroup.hh"
+#include "user-lock.hh"
#include <algorithm>
#include <iostream>
@@ -504,154 +504,6 @@ void handleDiffHook(
}
}
-//////////////////////////////////////////////////////////////////////
-
-
-class UserLock
-{
-private:
- Path fnUserLock;
- AutoCloseFD fdUserLock;
-
- bool isEnabled = false;
- uid_t uid = 0;
- gid_t gid = 0;
- std::vector<gid_t> supplementaryGIDs;
-
-public:
- UserLock();
-
- void kill();
-
- uid_t getUID() { assert(uid); return uid; }
- gid_t getGID() { assert(gid); return gid; }
- uint32_t getIDCount() { return settings.idsPerBuild; }
- std::vector<gid_t> getSupplementaryGIDs() { return supplementaryGIDs; }
-
- bool findFreeUser();
-
- bool enabled() { return isEnabled; }
-
-};
-
-
-UserLock::UserLock()
-{
-#if 0
- assert(settings.buildUsersGroup != "");
- createDirs(settings.nixStateDir + "/userpool");
-#endif
-}
-
-bool UserLock::findFreeUser() {
- if (enabled()) return true;
-
-#if 0
- /* Get the members of the build-users-group. */
- struct group * gr = getgrnam(settings.buildUsersGroup.get().c_str());
- if (!gr)
- throw Error("the group '%1%' specified in 'build-users-group' does not exist",
- settings.buildUsersGroup);
- gid = gr->gr_gid;
-
- /* Copy the result of getgrnam. */
- Strings users;
- for (char * * p = gr->gr_mem; *p; ++p) {
- debug("found build user '%1%'", *p);
- users.push_back(*p);
- }
-
- if (users.empty())
- throw Error("the build users group '%1%' has no members",
- settings.buildUsersGroup);
-
- /* Find a user account that isn't currently in use for another
- build. */
- for (auto & i : users) {
- debug("trying user '%1%'", i);
-
- struct passwd * pw = getpwnam(i.c_str());
- if (!pw)
- throw Error("the user '%1%' in the group '%2%' does not exist",
- i, settings.buildUsersGroup);
-
-
- fnUserLock = (format("%1%/userpool/%2%") % settings.nixStateDir % pw->pw_uid).str();
-
- AutoCloseFD fd = open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600);
- if (!fd)
- throw SysError("opening user lock '%1%'", fnUserLock);
-
- if (lockFile(fd.get(), ltWrite, false)) {
- fdUserLock = std::move(fd);
- user = i;
- uid = pw->pw_uid;
-
- /* Sanity check... */
- if (uid == getuid() || uid == geteuid())
- throw Error("the Nix user should not be a member of '%1%'",
- settings.buildUsersGroup);
-
-#if __linux__
- /* Get the list of supplementary groups of this build user. This
- is usually either empty or contains a group such as "kvm". */
- supplementaryGIDs.resize(10);
- int ngroups = supplementaryGIDs.size();
- int err = getgrouplist(pw->pw_name, pw->pw_gid,
- supplementaryGIDs.data(), &ngroups);
- if (err == -1)
- throw Error("failed to get list of supplementary groups for '%1%'", pw->pw_name);
-
- supplementaryGIDs.resize(ngroups);
-#endif
-
- isEnabled = true;
- return true;
- }
- }
-
- return false;
-#endif
-
- assert(settings.startId > 0);
- assert(settings.startId % settings.idsPerBuild == 0);
- assert(settings.uidCount % settings.idsPerBuild == 0);
- assert((uint64_t) settings.startId + (uint64_t) settings.uidCount <= std::numeric_limits<uid_t>::max());
-
- // FIXME: check whether the id range overlaps any known users
-
- size_t nrSlots = settings.uidCount / settings.idsPerBuild;
-
- for (size_t i = 0; i < nrSlots; i++) {
- debug("trying user slot '%d'", i);
-
- createDirs(settings.nixStateDir + "/userpool");
-
- fnUserLock = fmt("%s/userpool/slot-%d", settings.nixStateDir, i);
-
- AutoCloseFD fd = open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600);
- if (!fd)
- throw SysError("opening user lock '%1%'", fnUserLock);
-
- if (lockFile(fd.get(), ltWrite, false)) {
- fdUserLock = std::move(fd);
- uid = settings.startId + i * settings.idsPerBuild;
- gid = settings.startId + i * settings.idsPerBuild;
- return true;
- }
- }
-
- return false;
-}
-
-void UserLock::kill()
-{
- // FIXME: use a cgroup to kill all processes in the build?
-#if 0
- killUser(uid);
-#endif
-}
-
//////////////////////////////////////////////////////////////////////
@@ -840,6 +692,13 @@ private:
Path chrootRootDir;
+ /* Whether to give the build more than 1 UID. */
+ bool useUidRange = false;
+
+ /* Whether to make the 'systemd' cgroup controller available to
+ the build. */
+ bool useSystemdCgroup = false;
+
/* RAII object to delete the chroot directory. */
std::shared_ptr<AutoDelete> autoDelChroot;
@@ -896,8 +755,8 @@ private:
result. */
std::map<Path, ValidPathInfo> prevInfos;
- const uid_t sandboxUid = 1000;
- const gid_t sandboxGid = 100;
+ uid_t sandboxUid = -1;
+ gid_t sandboxGid = -1;
const static Path homeDir;
@@ -1445,6 +1304,7 @@ void DerivationGoal::inputsRealised()
result = BuildResult();
}
+
void DerivationGoal::started() {
auto msg = fmt(
buildMode == bmRepair ? "repairing outputs of '%s'" :
@@ -1459,6 +1319,7 @@ void DerivationGoal::started() {
worker.updateProgress();
}
+
void DerivationGoal::tryToBuild()
{
trace("trying to build");
@@ -1556,25 +1417,28 @@ void DerivationGoal::tryToBuild()
worker.wakeUp(shared_from_this());
}
+
void DerivationGoal::tryLocalBuild() {
/* If `build-users-group' is not empty, then we have to build as
one of the members of that group. */
- if ((settings.buildUsersGroup != "" || settings.startId.get() != 0) && getuid() == 0) {
+ static bool useBuildUsers = (settings.buildUsersGroup != "" || settings.startId.get() != 0) && getuid() == 0;
+ if (useBuildUsers) {
#if defined(__linux__) || defined(__APPLE__)
- if (!buildUser) buildUser = std::make_unique<UserLock>();
+ if (!buildUser)
+ buildUser = acquireUserLock();
- if (buildUser->findFreeUser()) {
- /* Make sure that no other processes are executing under this
- uid. */
- buildUser->kill();
- } else {
+ if (!buildUser) {
if (!actLock)
actLock = std::make_unique<Activity>(*logger, lvlWarn, actBuildWaiting,
fmt("waiting for UID to build '%s'", yellowtxt(worker.store.printStorePath(drvPath))));
worker.waitForAWhile(shared_from_this());
return;
}
+
+ /* Make sure that no other processes are executing under this
+ uid. */
+ buildUser->kill();
#else
/* Don't know how to block the creation of setuid/setgid
binaries on this platform. */
@@ -2087,6 +1951,9 @@ void DerivationGoal::startBuilder()
}
}
+ useUidRange = parsedDrv->getRequiredSystemFeatures().count("uid-range");
+ useSystemdCgroup = parsedDrv->getRequiredSystemFeatures().count("systemd-cgroup");
+
if (useChroot) {
/* Allow a user-configurable set of directories from the
@@ -2166,7 +2033,7 @@ void DerivationGoal::startBuilder()
printMsg(lvlChatty, format("setting up chroot environment in '%1%'") % chrootRootDir);
- if (mkdir(chrootRootDir.c_str(), 0755) == -1)
+ if (mkdir(chrootRootDir.c_str(), useUidRange ? 0755 : 0750) == -1)
throw SysError("cannot create '%1%'", chrootRootDir);
// FIXME: only make root writable for user namespace builds.
@@ -2186,6 +2053,12 @@ void DerivationGoal::startBuilder()
createDirs(chrootRootDir + "/etc");
chownToBuilder(chrootRootDir + "/etc");
+ if (useUidRange && (!buildUser || buildUser->getUIDCount() < 65536))
+ throw Error("feature 'uid-range' requires '%s' to be enabled", settings.autoAllocateUids.name);
+
+ sandboxUid = useUidRange ? 0 : 1000;
+ sandboxGid = useUidRange ? 0 : 100;
+
writeFile(chrootRootDir + "/etc/passwd", fmt(
"root:x:0:0:Nix build user:%3%:/noshell\n"
"nixbld:x:%1%:%2%:Nix build user:%3%:/noshell\n"
@@ -2238,12 +2111,32 @@ void DerivationGoal::startBuilder()
for (auto & i : drv->outputs)
dirsInChroot.erase(worker.store.printStorePath(i.second.path));
-#elif __APPLE__
- /* We don't really have any parent prep work to do (yet?)
- All work happens in the child, instead. */
+ if (useSystemdCgroup) {
+ settings.requireExperimentalFeature("systemd-cgroup");
+ std::optional<Path> cgroup;
+ if (!buildUser || !(cgroup = buildUser->getCgroup()))
+ throw Error("feature 'systemd-cgroup' requires 'auto-allocate-uids = true' in nix.conf");
+ chownToBuilder(*cgroup);
+ chownToBuilder(*cgroup + "/cgroup.procs");
+ }
+
#else
- throw Error("sandboxing builds is not supported on this platform");
+ if (useUidRange)
+ throw Error("feature 'uid-range' is not supported on this platform");
+ if (useSystemdCgroup)
+ throw Error("feature 'systemd-cgroup' is not supported on this platform");
+ #if __APPLE__
+ /* We don't really have any parent prep work to do (yet?)
+ All work happens in the child, instead. */
+ #else
+ throw Error("sandboxing builds is not supported on this platform");
+ #endif
#endif
+ } else {
+ if (useUidRange)
+ throw Error("feature 'uid-range' is only supported in sandboxed builds");
+ if (useSystemdCgroup)
+ throw Error("feature 'systemd-cgroup' is only supported in sandboxed builds");
}
if (needsHashRewrite()) {
@@ -2375,31 +2268,6 @@ void DerivationGoal::startBuilder()
#if __linux__
if (useChroot) {
- /* Create a systemd cgroup since that's the minimum required
- by systemd-nspawn. */
- // FIXME: do we want to use the parent cgroup? We should
- // always use the same cgroup regardless of whether we're the
- // daemon or run from a user session via sudo.
- auto ourCgroups = getCgroups("/proc/self/cgroup");
- auto systemdCgroup = ourCgroups["systemd"];
- if (systemdCgroup == "")
- throw Error("'systemd' cgroup does not exist");
-
- auto hostCgroup = canonPath("/sys/fs/cgroup/systemd/" + systemdCgroup);
-
- if (!pathExists(hostCgroup))
- throw Error("expected cgroup directory '%s'", hostCgroup);
-
- auto childCgroup = fmt("%s/nix-%d", hostCgroup, buildUser->getUID());
-
- destroyCgroup(childCgroup);
-
- if (mkdir(childCgroup.c_str(), 0755) == -1)
- throw SysError("creating cgroup '%s'", childCgroup);
-
- chownToBuilder(childCgroup);
- chownToBuilder(childCgroup + "/cgroup.procs");
-
/* Set up private namespaces for the build:
- The PID namespace causes the build to start as PID 1.
@@ -2508,15 +2376,16 @@ void DerivationGoal::startBuilder()
the calling user (if build users are disabled). */
uid_t hostUid = buildUser ? buildUser->getUID() : getuid();
uid_t hostGid = buildUser ? buildUser->getGID() : getgid();
- uint32_t nrIds = settings.idsPerBuild; // FIXME
+ uint32_t nrIds = buildUser && useUidRange ? buildUser->getUIDCount() : 1;
writeFile("/proc/" + std::to_string(pid) + "/uid_map",
- fmt("%d %d %d", /* sandboxUid */ 0, hostUid, nrIds));
+ fmt("%d %d %d", sandboxUid, hostUid, nrIds));
- //writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny");
+ if (!useUidRange)
+ writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny");
writeFile("/proc/" + std::to_string(pid) + "/gid_map",
- fmt("%d %d %d", /* sandboxGid */ 0, hostGid, nrIds));
+ fmt("%d %d %d", sandboxGid, hostGid, nrIds));
/* Save the mount namespace of the child. We have to do this
*before* the child does a chroot. */
@@ -2525,7 +2394,10 @@ void DerivationGoal::startBuilder()
throw SysError("getting sandbox mount namespace");
/* Move the child into its own cgroup. */
- writeFile(childCgroup + "/cgroup.procs", fmt("%d", (pid_t) pid));
+ if (buildUser) {
+ if (auto cgroup = buildUser->getCgroup())
+ writeFile(*cgroup + "/cgroup.procs", fmt("%d", (pid_t) pid));
+ }
/* Signal the builder that we've updated its user namespace. */
writeFull(userNamespaceSync.writeSide.get(), "1");
@@ -3361,7 +3233,7 @@ void DerivationGoal::runChild()
/* Unshare the cgroup namespace. This means
/proc/self/cgroup will show the child's cgroup as '/'
rather than whatever it is in the parent. */
- if (unshare(CLONE_NEWCGROUP) == -1)
+ if (useSystemdCgroup && unshare(CLONE_NEWCGROUP) == -1)
throw SysError("unsharing cgroup namespace");
/* Do the chroot(). */
@@ -3386,16 +3258,10 @@ void DerivationGoal::runChild()
/* Switch to the sandbox uid/gid in the user namespace,
which corresponds to the build user or calling user in
the parent namespace. */
-#if 0
if (setgid(sandboxGid) == -1)
throw SysError("setgid failed");
if (setuid(sandboxUid) == -1)
throw SysError("setuid failed");
-#endif
- if (setgid(0) == -1)
- throw SysError("setgid failed");
- if (setuid(0) == -1)
- throw SysError("setuid failed");
setUser = false;
}
@@ -3789,7 +3655,7 @@ void DerivationGoal::registerOutputs()
something like that. */
canonicalisePathMetaData(
actualPath,
- buildUser ? std::optional(std::make_pair(buildUser->getUID(), buildUser->getUID() + buildUser->getIDCount() - 1)) : std::nullopt,
+ buildUser ? std::optional(buildUser->getUIDRange()) : std::nullopt,
inodesSeen);
/* FIXME: this is in-memory. */
@@ -3866,7 +3732,7 @@ void DerivationGoal::registerOutputs()
all files are owned by the build user, if applicable. */
canonicalisePathMetaData(actualPath,
buildUser && !rewritten
- ? std::optional(std::make_pair(buildUser->getUID(), buildUser->getUID() + buildUser->getIDCount() - 1))
+ ? std::optional(buildUser->getUIDRange())
: std::nullopt,
inodesSeen);
diff --git a/src/libstore/cgroup.cc b/src/libstore/cgroup.cc
index 887facdca..9e5e937df 100644
--- a/src/libstore/cgroup.cc
+++ b/src/libstore/cgroup.cc
@@ -4,6 +4,7 @@
#include "util.hh"
#include <chrono>
+#include <unordered_set>
#include <dirent.h>
@@ -19,7 +20,7 @@ std::map<std::string, std::string> getCgroups(const Path & cgroupFile)
if (!std::regex_match(line, match, regex))
throw Error("invalid line '%s' in '%s'", line, cgroupFile);
- std::string name = hasPrefix(match[2], "name=") ? std::string(match[2], 5) : match[2];
+ std::string name = hasPrefix(std::string(match[2]), "name=") ? std::string(match[2], 5) : match[2];
cgroups.insert_or_assign(name, match[3]);
}
@@ -28,6 +29,8 @@ std::map<std::string, std::string> getCgroups(const Path & cgroupFile)
void destroyCgroup(const Path & cgroup)
{
+ if (!pathExists(cgroup)) return;
+
for (auto & entry : readDirectory(cgroup)) {
if (entry.type != DT_DIR) continue;
destroyCgroup(cgroup + "/" + entry.name);
@@ -35,6 +38,8 @@ void destroyCgroup(const Path & cgroup)
int round = 1;
+ std::unordered_set<pid_t> pidsShown;
+
while (true) {
auto pids = tokenizeString<std::vector<std::string>>(readFile(cgroup + "/cgroup.procs"));
@@ -46,13 +51,23 @@ void destroyCgroup(const Path & cgroup)
for (auto & pid_s : pids) {
pid_t pid;
if (!string2Int(pid_s, pid)) throw Error("invalid pid '%s'", pid);
+ if (pidsShown.insert(pid).second) {
+ try {
+ auto cmdline = readFile(fmt("/proc/%d/cmdline", pid));
+ using namespace std::string_literals;
+ warn("killing stray builder process %d (%s)...",
+ pid, trim(replaceStrings(cmdline, "\0"s, " ")));
+ } catch (SysError &) {
+ }
+ }
// FIXME: pid wraparound
if (kill(pid, SIGKILL) == -1 && errno != ESRCH)
throw SysError("killing member %d of cgroup '%s'", pid, cgroup);
}
auto sleep = std::chrono::milliseconds((int) std::pow(2.0, std::min(round, 10)));
- printError("waiting for %d ms for cgroup '%s' to become empty", sleep.count(), cgroup);
+ if (sleep.count() > 100)
+ printError("waiting for %d ms for cgroup '%s' to become empty", sleep.count(), cgroup);
std::this_thread::sleep_for(sleep);
round++;
}
diff --git a/src/libstore/globals.hh b/src/libstore/globals.hh
index 89db072b0..5cf73c7b4 100644
--- a/src/libstore/globals.hh
+++ b/src/libstore/globals.hh
@@ -149,10 +149,13 @@ public:
"The Unix group that contains the build users."};
#if __linux__
+ Setting<bool> autoAllocateUids{this, false, "auto-allocate-uids",
+ "Whether to allocate UIDs for builders automatically."};
+
const uint32_t idsPerBuild = 1 << 16;
Setting<uint32_t> startId{this, 872415232, "start-id",
- "The first UID and GID to use for dynamic ID allocation. (0 means disable.)"};
+ "The first UID and GID to use for dynamic ID allocation."};
Setting<uint32_t> uidCount{this, idsPerBuild * 128, "id-count",
"The number of UIDs/GIDs to use for dynamic ID allocation."};
diff --git a/src/libstore/user-lock.cc b/src/libstore/user-lock.cc
new file mode 100644
index 000000000..8a09df4d1
--- /dev/null
+++ b/src/libstore/user-lock.cc
@@ -0,0 +1,212 @@
+#include "user-lock.hh"
+#include "globals.hh"
+#include "pathlocks.hh"
+#include "cgroup.hh"
+
+namespace nix {
+
+struct SimpleUserLock : UserLock
+{
+ AutoCloseFD fdUserLock;
+ uid_t uid;
+ gid_t gid;
+ std::vector<gid_t> supplementaryGIDs;
+
+ void kill() override
+ {
+ killUser(uid);
+ }
+
+ std::pair<uid_t, uid_t> getUIDRange() override
+ {
+ assert(uid);
+ return {uid, uid};
+ }
+
+ gid_t getGID() override { assert(gid); return gid; }
+
+ std::vector<gid_t> getSupplementaryGIDs() override { return supplementaryGIDs; }
+
+ static std::unique_ptr<UserLock> acquire()
+ {
+ assert(settings.buildUsersGroup != "");
+ createDirs(settings.nixStateDir + "/userpool");
+
+ /* Get the members of the build-users-group. */
+ struct group * gr = getgrnam(settings.buildUsersGroup.get().c_str());
+ if (!gr)
+ throw Error("the group '%s' specified in 'build-users-group' does not exist", settings.buildUsersGroup);
+
+ /* Copy the result of getgrnam. */
+ Strings users;
+ for (char * * p = gr->gr_mem; *p; ++p) {
+ debug("found build user '%s'", *p);
+ users.push_back(*p);
+ }
+
+ if (users.empty())
+ throw Error("the build users group '%s' has no members", settings.buildUsersGroup);
+
+ /* Find a user account that isn't currently in use for another
+ build. */
+ for (auto & i : users) {
+ debug("trying user '%s'", i);
+
+ struct passwd * pw = getpwnam(i.c_str());
+ if (!pw)
+ throw Error("the user '%s' in the group '%s' does not exist", i, settings.buildUsersGroup);
+
+ auto fnUserLock = fmt("%s/userpool/%s", settings.nixStateDir,pw->pw_uid);
+
+ AutoCloseFD fd = open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600);
+ if (!fd)
+ throw SysError("opening user lock '%s'", fnUserLock);
+
+ if (lockFile(fd.get(), ltWrite, false)) {
+ auto lock = std::make_unique<SimpleUserLock>();
+
+ lock->fdUserLock = std::move(fd);
+ lock->uid = pw->pw_uid;
+ lock->gid = gr->gr_gid;
+
+ /* Sanity check... */
+ if (lock->uid == getuid() || lock->uid == geteuid())
+ throw Error("the Nix user should not be a member of '%s'", settings.buildUsersGroup);
+
+ #if __linux__
+ /* Get the list of supplementary groups of this build
+ user. This is usually either empty or contains a
+ group such as "kvm". */
+ lock->supplementaryGIDs.resize(10);
+ int ngroups = lock->supplementaryGIDs.size();
+ int err = getgrouplist(pw->pw_name, pw->pw_gid,
+ lock->supplementaryGIDs.data(), &ngroups);
+ if (err == -1)
+ throw Error("failed to get list of supplementary groups for '%s'", pw->pw_name);
+
+ lock->supplementaryGIDs.resize(ngroups);
+ #endif
+
+ return lock;
+ }
+ }
+
+ return nullptr;
+ }
+};
+
+#if __linux__
+struct CgroupUserLock : UserLock
+{
+ AutoCloseFD fdUserLock;
+ uid_t uid;
+
+ void kill() override
+ {
+ if (cgroup) {
+ destroyCgroup(*cgroup);
+ cgroup.reset();
+ }
+ }
+
+ std::pair<uid_t, uid_t> getUIDRange() override
+ {
+ assert(uid);
+ return {uid, uid + settings.idsPerBuild - 1};
+ }
+
+ gid_t getGID() override
+ {
+ // We use the same GID ranges as for the UIDs.
+ assert(uid);
+ return uid;
+ }
+
+ std::vector<gid_t> getSupplementaryGIDs() override { return {}; } // FIXME
+
+ static std::unique_ptr<UserLock> acquire()
+ {
+ settings.requireExperimentalFeature("auto-allocate-uids");
+ assert(settings.startId > 0);
+ assert(settings.startId % settings.idsPerBuild == 0);
+ assert(settings.uidCount % settings.idsPerBuild == 0);
+ assert((uint64_t) settings.startId + (uint64_t) settings.uidCount <= std::numeric_limits<uid_t>::max());
+
+ // FIXME: check whether the id range overlaps any known users
+
+ createDirs(settings.nixStateDir + "/userpool2");
+
+ size_t nrSlots = settings.uidCount / settings.idsPerBuild;
+
+ for (size_t i = 0; i < nrSlots; i++) {
+ debug("trying user slot '%d'", i);
+
+ createDirs(settings.nixStateDir + "/userpool2");
+
+ auto fnUserLock = fmt("%s/userpool2/slot-%d", settings.nixStateDir, i);
+
+ AutoCloseFD fd = open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600);
+ if (!fd)
+ throw SysError("opening user lock '%s'", fnUserLock);
+
+ if (lockFile(fd.get(), ltWrite, false)) {
+ auto lock = std::make_unique<CgroupUserLock>();
+ lock->fdUserLock = std::move(fd);
+ lock->uid = settings.startId + i * settings.idsPerBuild;
+ auto s = drainFD(lock->fdUserLock.get());
+ if (s != "") lock->cgroup = s;
+ return lock;
+ }
+ }
+
+ return nullptr;
+ }
+
+ std::optional<Path> cgroup;
+
+ std::optional<Path> getCgroup() override
+ {
+ if (!cgroup) {
+ /* Create a systemd cgroup since that's the minimum
+ required by systemd-nspawn. */
+ auto ourCgroups = getCgroups("/proc/self/cgroup");
+ auto systemdCgroup = ourCgroups["systemd"];
+ if (systemdCgroup == "")
+ throw Error("'systemd' cgroup does not exist");
+
+ auto hostCgroup = canonPath("/sys/fs/cgroup/systemd/" + systemdCgroup);
+
+ if (!pathExists(hostCgroup))
+ throw Error("expected cgroup directory '%s'", hostCgroup);
+
+ cgroup = fmt("%s/nix-%d", hostCgroup, uid);
+
+ destroyCgroup(*cgroup);
+
+ if (mkdir(cgroup->c_str(), 0755) == -1)
+ throw SysError("creating cgroup '%s'", *cgroup);
+
+ /* Record the cgroup in the lock file. This ensures that
+ if we subsequently get executed under a different parent
+ cgroup, we kill the previous cgroup first. */
+ if (ftruncate(fdUserLock.get(), 0) == -1)
+ throw Error("truncating user lock");
+ writeFull(fdUserLock.get(), *cgroup);
+ }
+
+ return cgroup;
+ };
+};
+#endif
+
+std::unique_ptr<UserLock> acquireUserLock()
+{
+ #if __linux__
+ if (settings.autoAllocateUids)
+ return CgroupUserLock::acquire();
+ else
+ #endif
+ return SimpleUserLock::acquire();
+}
+
+}
diff --git a/src/libstore/user-lock.hh b/src/libstore/user-lock.hh
new file mode 100644
index 000000000..88d068689
--- /dev/null
+++ b/src/libstore/user-lock.hh
@@ -0,0 +1,39 @@
+#pragma once
+
+#include "types.hh"
+
+namespace nix {
+
+struct UserLock
+{
+ virtual ~UserLock() { }
+
+ /* Get the first and last UID. */
+ virtual std::pair<uid_t, uid_t> getUIDRange() = 0;
+
+ /* Get the first UID. */
+ uid_t getUID()
+ {
+ return getUIDRange().first;
+ }
+
+ uid_t getUIDCount()
+ {
+ return getUIDRange().second - getUIDRange().first + 1;
+ }
+
+ virtual gid_t getGID() = 0;
+
+ virtual std::vector<gid_t> getSupplementaryGIDs() = 0;
+
+ /* Kill any processes currently executing as this user. */
+ virtual void kill() = 0;
+
+ virtual std::optional<Path> getCgroup() { return {}; };
+};
+
+/* Acquire a user lock. Note that this may return nullptr if no user
+ is available. */
+std::unique_ptr<UserLock> acquireUserLock();
+
+}