diff options
author | Eelco Dolstra <edolstra@gmail.com> | 2022-11-29 14:01:42 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-29 14:01:42 +0100 |
commit | fbc53e97edfff092e6a4c1b7f0546137fdb6238c (patch) | |
tree | cd45faa2f2b75065689fcc0d72e7e705dfea1257 /src | |
parent | f904f6a66f2fea3d6a9498dd47284aa8c16fb26d (diff) | |
parent | 4f762e2b023fd451fdbab0de8d6394dd7201640d (diff) |
Merge pull request #3600 from NixOS/auto-uid-allocation
Automatic UID allocation
Diffstat (limited to 'src')
-rw-r--r-- | src/libcmd/installables.cc | 2 | ||||
-rw-r--r-- | src/libstore/build-result.hh | 5 | ||||
-rw-r--r-- | src/libstore/build/derivation-goal.cc | 8 | ||||
-rw-r--r-- | src/libstore/build/local-derivation-goal.cc | 252 | ||||
-rw-r--r-- | src/libstore/build/local-derivation-goal.hh | 11 | ||||
-rw-r--r-- | src/libstore/cgroup.cc | 131 | ||||
-rw-r--r-- | src/libstore/cgroup.hh | 27 | ||||
-rw-r--r-- | src/libstore/globals.cc | 4 | ||||
-rw-r--r-- | src/libstore/globals.hh | 44 | ||||
-rw-r--r-- | src/libstore/local-store.cc | 27 | ||||
-rw-r--r-- | src/libstore/local-store.hh | 15 | ||||
-rw-r--r-- | src/libstore/lock.cc | 236 | ||||
-rw-r--r-- | src/libstore/lock.hh | 43 | ||||
-rw-r--r-- | src/libstore/parsed-derivations.cc | 6 | ||||
-rw-r--r-- | src/libstore/parsed-derivations.hh | 2 | ||||
-rw-r--r-- | src/libutil/experimental-features.cc | 2 | ||||
-rw-r--r-- | src/libutil/experimental-features.hh | 2 | ||||
-rw-r--r-- | src/libutil/filesystem.cc | 9 | ||||
-rw-r--r-- | src/nix-store/nix-store.cc | 2 | ||||
-rw-r--r-- | src/nix/build.cc | 4 |
20 files changed, 643 insertions, 189 deletions
diff --git a/src/libcmd/installables.cc b/src/libcmd/installables.cc index d6e62e775..dbe4a449d 100644 --- a/src/libcmd/installables.cc +++ b/src/libcmd/installables.cc @@ -931,7 +931,7 @@ std::vector<std::pair<std::shared_ptr<Installable>, BuiltPathWithResult>> Instal case Realise::Outputs: { if (settings.printMissing) - printMissing(store, pathsToBuild, lvlInfo); + printMissing(store, pathsToBuild, lvlInfo); for (auto & buildResult : store->buildPathsWithResults(pathsToBuild, bMode, evalStore)) { if (!buildResult.success()) diff --git a/src/libstore/build-result.hh b/src/libstore/build-result.hh index 24fb1f763..a5749cf33 100644 --- a/src/libstore/build-result.hh +++ b/src/libstore/build-result.hh @@ -5,7 +5,7 @@ #include <string> #include <chrono> - +#include <optional> namespace nix { @@ -78,6 +78,9 @@ struct BuildResult was repeated). */ time_t startTime = 0, stopTime = 0; + /* User and system CPU time the build took. */ + std::optional<std::chrono::microseconds> cpuUser, cpuSystem; + bool success() { return status == Built || status == Substituted || status == AlreadyValid || status == ResolvesToAlreadyValid; diff --git a/src/libstore/build/derivation-goal.cc b/src/libstore/build/derivation-goal.cc index 1938f4bcb..67cfc38af 100644 --- a/src/libstore/build/derivation-goal.cc +++ b/src/libstore/build/derivation-goal.cc @@ -886,6 +886,14 @@ void DerivationGoal::buildDone() cleanupPostChildKill(); + if (buildResult.cpuUser && buildResult.cpuSystem) { + debug("builder for '%s' terminated with status %d, user CPU %.3fs, system CPU %.3fs", + worker.store.printStorePath(drvPath), + status, + ((double) buildResult.cpuUser->count()) / 1000000, + ((double) buildResult.cpuSystem->count()) / 1000000); + } + bool diskFull = false; try { diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index ef49f8bf1..c9b7b24f3 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -14,6 +14,7 @@ #include "topo-sort.hh" #include "callback.hh" #include "json-utils.hh" +#include "cgroup.hh" #include <regex> #include <queue> @@ -129,26 +130,44 @@ void LocalDerivationGoal::killChild() if (pid != -1) { worker.childTerminated(this); - if (buildUser) { - /* If we're using a build user, then there is a tricky - race condition: if we kill the build user before the - child has done its setuid() to the build user uid, then - it won't be killed, and we'll potentially lock up in - pid.wait(). So also send a conventional kill to the - child. */ - ::kill(-pid, SIGKILL); /* ignore the result */ - buildUser->kill(); - pid.wait(); - } else - pid.kill(); + /* If we're using a build user, then there is a tricky race + condition: if we kill the build user before the child has + done its setuid() to the build user uid, then it won't be + killed, and we'll potentially lock up in pid.wait(). So + also send a conventional kill to the child. */ + ::kill(-pid, SIGKILL); /* ignore the result */ - assert(pid == -1); + killSandbox(true); + + pid.wait(); } DerivationGoal::killChild(); } +void LocalDerivationGoal::killSandbox(bool getStats) +{ + if (cgroup) { + #if __linux__ + auto stats = destroyCgroup(*cgroup); + if (getStats) { + buildResult.cpuUser = stats.cpuUser; + buildResult.cpuSystem = stats.cpuSystem; + } + #else + abort(); + #endif + } + + else if (buildUser) { + auto uid = buildUser->getUID(); + assert(uid != 0); + killUser(uid); + } +} + + void LocalDerivationGoal::tryLocalBuild() { unsigned int curBuilds = worker.getNrLocalBuilds(); if (curBuilds >= settings.maxBuildJobs) { @@ -158,28 +177,46 @@ void LocalDerivationGoal::tryLocalBuild() { return; } - /* If `build-users-group' is not empty, then we have to build as - one of the members of that group. */ - if (settings.buildUsersGroup != "" && getuid() == 0) { -#if defined(__linux__) || defined(__APPLE__) - if (!buildUser) buildUser = std::make_unique<UserLock>(); + /* Are we doing a chroot build? */ + { + auto noChroot = parsedDrv->getBoolAttr("__noChroot"); + if (settings.sandboxMode == smEnabled) { + if (noChroot) + throw Error("derivation '%s' has '__noChroot' set, " + "but that's not allowed when 'sandbox' is 'true'", worker.store.printStorePath(drvPath)); +#if __APPLE__ + if (additionalSandboxProfile != "") + throw Error("derivation '%s' specifies a sandbox profile, " + "but this is only allowed when 'sandbox' is 'relaxed'", worker.store.printStorePath(drvPath)); +#endif + useChroot = true; + } + else if (settings.sandboxMode == smDisabled) + useChroot = false; + else if (settings.sandboxMode == smRelaxed) + useChroot = derivationType.isSandboxed() && !noChroot; + } + + auto & localStore = getLocalStore(); + if (localStore.storeDir != localStore.realStoreDir.get()) { + #if __linux__ + useChroot = true; + #else + throw Error("building using a diverted store is not supported on this platform"); + #endif + } - if (buildUser->findFreeUser()) { - /* Make sure that no other processes are executing under this - uid. */ - buildUser->kill(); - } else { + if (useBuildUsers()) { + if (!buildUser) + buildUser = acquireUserLock(parsedDrv->useUidRange() ? 65536 : 1, useChroot); + + if (!buildUser) { if (!actLock) actLock = std::make_unique<Activity>(*logger, lvlWarn, actBuildWaiting, fmt("waiting for UID to build '%s'", yellowtxt(worker.store.printStorePath(drvPath)))); worker.waitForAWhile(shared_from_this()); return; } -#else - /* Don't know how to block the creation of setuid/setgid - binaries on this platform. */ - throw Error("build users are not supported on this platform for security reasons"); -#endif } actLock.reset(); @@ -270,7 +307,7 @@ void LocalDerivationGoal::cleanupPostChildKill() malicious user from leaving behind a process that keeps files open and modifies them after they have been chown'ed to root. */ - if (buildUser) buildUser->kill(); + killSandbox(true); /* Terminate the recursive Nix daemon. */ stopDaemon(); @@ -363,6 +400,60 @@ static void linkOrCopy(const Path & from, const Path & to) void LocalDerivationGoal::startBuilder() { + if ((buildUser && buildUser->getUIDCount() != 1) + #if __linux__ + || settings.useCgroups + #endif + ) + { + #if __linux__ + settings.requireExperimentalFeature(Xp::Cgroups); + + auto ourCgroups = getCgroups("/proc/self/cgroup"); + auto ourCgroup = ourCgroups[""]; + if (ourCgroup == "") + throw Error("cannot determine cgroup name from /proc/self/cgroup"); + + auto ourCgroupPath = canonPath("/sys/fs/cgroup/" + ourCgroup); + + if (!pathExists(ourCgroupPath)) + throw Error("expected cgroup directory '%s'", ourCgroupPath); + + static std::atomic<unsigned int> counter{0}; + + cgroup = buildUser + ? fmt("%s/nix-build-uid-%d", ourCgroupPath, buildUser->getUID()) + : fmt("%s/nix-build-pid-%d-%d", ourCgroupPath, getpid(), counter++); + + debug("using cgroup '%s'", *cgroup); + + /* When using a build user, record the cgroup we used for that + user so that if we got interrupted previously, we can kill + any left-over cgroup first. */ + if (buildUser) { + auto cgroupsDir = settings.nixStateDir + "/cgroups"; + createDirs(cgroupsDir); + + auto cgroupFile = fmt("%s/%d", cgroupsDir, buildUser->getUID()); + + if (pathExists(cgroupFile)) { + auto prevCgroup = readFile(cgroupFile); + destroyCgroup(prevCgroup); + } + + writeFile(cgroupFile, *cgroup); + } + + #else + throw Error("cgroups are not supported on this platform"); + #endif + } + + /* Make sure that no other processes are executing under the + sandbox uids. This must be done before any chownToBuilder() + calls. */ + killSandbox(false); + /* Right platform? */ if (!parsedDrv->canBuildLocally(worker.store)) throw Error("a '%s' with features {%s} is required to build '%s', but I am a '%s' with features {%s}", @@ -376,35 +467,6 @@ void LocalDerivationGoal::startBuilder() additionalSandboxProfile = parsedDrv->getStringAttr("__sandboxProfile").value_or(""); #endif - /* Are we doing a chroot build? */ - { - auto noChroot = parsedDrv->getBoolAttr("__noChroot"); - if (settings.sandboxMode == smEnabled) { - if (noChroot) - throw Error("derivation '%s' has '__noChroot' set, " - "but that's not allowed when 'sandbox' is 'true'", worker.store.printStorePath(drvPath)); -#if __APPLE__ - if (additionalSandboxProfile != "") - throw Error("derivation '%s' specifies a sandbox profile, " - "but this is only allowed when 'sandbox' is 'relaxed'", worker.store.printStorePath(drvPath)); -#endif - useChroot = true; - } - else if (settings.sandboxMode == smDisabled) - useChroot = false; - else if (settings.sandboxMode == smRelaxed) - useChroot = derivationType.isSandboxed() && !noChroot; - } - - auto & localStore = getLocalStore(); - if (localStore.storeDir != localStore.realStoreDir.get()) { - #if __linux__ - useChroot = true; - #else - throw Error("building using a diverted store is not supported on this platform"); - #endif - } - /* Create a temporary directory where the build will take place. */ tmpDir = createTempDir("", "nix-build-" + std::string(drvPath.name()), false, false, 0700); @@ -580,10 +642,11 @@ void LocalDerivationGoal::startBuilder() printMsg(lvlChatty, format("setting up chroot environment in '%1%'") % chrootRootDir); - if (mkdir(chrootRootDir.c_str(), 0750) == -1) + // FIXME: make this 0700 + if (mkdir(chrootRootDir.c_str(), buildUser && buildUser->getUIDCount() != 1 ? 0755 : 0750) == -1) throw SysError("cannot create '%1%'", chrootRootDir); - if (buildUser && chown(chrootRootDir.c_str(), 0, buildUser->getGID()) == -1) + if (buildUser && chown(chrootRootDir.c_str(), buildUser->getUIDCount() != 1 ? buildUser->getUID() : 0, buildUser->getGID()) == -1) throw SysError("cannot change ownership of '%1%'", chrootRootDir); /* Create a writable /tmp in the chroot. Many builders need @@ -597,6 +660,10 @@ void LocalDerivationGoal::startBuilder() nobody account. The latter is kind of a hack to support Samba-in-QEMU. */ createDirs(chrootRootDir + "/etc"); + chownToBuilder(chrootRootDir + "/etc"); + + if (parsedDrv->useUidRange() && (!buildUser || buildUser->getUIDCount() < 65536)) + throw Error("feature 'uid-range' requires the setting '%s' to be enabled", settings.autoAllocateUids.name); /* Declare the build user's group so that programs get a consistent view of the system (e.g., "id -gn"). */ @@ -647,12 +714,28 @@ void LocalDerivationGoal::startBuilder() dirsInChroot.erase(worker.store.printStorePath(*i.second.second)); } -#elif __APPLE__ - /* We don't really have any parent prep work to do (yet?) - All work happens in the child, instead. */ + if (cgroup) { + if (mkdir(cgroup->c_str(), 0755) != 0) + throw SysError("creating cgroup '%s'", *cgroup); + chownToBuilder(*cgroup); + chownToBuilder(*cgroup + "/cgroup.procs"); + chownToBuilder(*cgroup + "/cgroup.threads"); + //chownToBuilder(*cgroup + "/cgroup.subtree_control"); + } + #else - throw Error("sandboxing builds is not supported on this platform"); + if (parsedDrv->useUidRange()) + throw Error("feature 'uid-range' is not supported on this platform"); + #if __APPLE__ + /* We don't really have any parent prep work to do (yet?) + All work happens in the child, instead. */ + #else + throw Error("sandboxing builds is not supported on this platform"); + #endif #endif + } else { + if (parsedDrv->useUidRange()) + throw Error("feature 'uid-range' is only supported in sandboxed builds"); } if (needsHashRewrite() && pathExists(homeDir)) @@ -913,14 +996,16 @@ void LocalDerivationGoal::startBuilder() the calling user (if build users are disabled). */ uid_t hostUid = buildUser ? buildUser->getUID() : getuid(); uid_t hostGid = buildUser ? buildUser->getGID() : getgid(); + uid_t nrIds = buildUser ? buildUser->getUIDCount() : 1; writeFile("/proc/" + std::to_string(pid) + "/uid_map", - fmt("%d %d 1", sandboxUid(), hostUid)); + fmt("%d %d %d", sandboxUid(), hostUid, nrIds)); - writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny"); + if (!buildUser || buildUser->getUIDCount() == 1) + writeFile("/proc/" + std::to_string(pid) + "/setgroups", "deny"); writeFile("/proc/" + std::to_string(pid) + "/gid_map", - fmt("%d %d 1", sandboxGid(), hostGid)); + fmt("%d %d %d", sandboxGid(), hostGid, nrIds)); } else { debug("note: not using a user namespace"); if (!buildUser) @@ -947,6 +1032,10 @@ void LocalDerivationGoal::startBuilder() throw SysError("getting sandbox user namespace"); } + /* Move the child into its own cgroup. */ + if (cgroup) + writeFile(*cgroup + "/cgroup.procs", fmt("%d", (pid_t) pid)); + /* Signal the builder that we've updated its user namespace. */ writeFull(userNamespaceSync.writeSide.get(), "1"); @@ -1779,6 +1868,13 @@ void LocalDerivationGoal::runChild() if (mount("none", (chrootRootDir + "/proc").c_str(), "proc", 0, 0) == -1) throw SysError("mounting /proc"); + /* Mount sysfs on /sys. */ + if (buildUser && buildUser->getUIDCount() != 1) { + createDirs(chrootRootDir + "/sys"); + if (mount("none", (chrootRootDir + "/sys").c_str(), "sysfs", 0, 0) == -1) + throw SysError("mounting /sys"); + } + /* Mount a new tmpfs on /dev/shm to ensure that whatever the builder puts in /dev/shm is cleaned up automatically. */ if (pathExists("/dev/shm") && mount("none", (chrootRootDir + "/dev/shm").c_str(), "tmpfs", 0, @@ -1821,6 +1917,12 @@ void LocalDerivationGoal::runChild() if (unshare(CLONE_NEWNS) == -1) throw SysError("unsharing mount namespace"); + /* Unshare the cgroup namespace. This means + /proc/self/cgroup will show the child's cgroup as '/' + rather than whatever it is in the parent. */ + if (cgroup && unshare(CLONE_NEWCGROUP) == -1) + throw SysError("unsharing cgroup namespace"); + /* Do the chroot(). */ if (chdir(chrootRootDir.c_str()) == -1) throw SysError("cannot change directory to '%1%'", chrootRootDir); @@ -1906,9 +2008,8 @@ void LocalDerivationGoal::runChild() if (setUser && buildUser) { /* Preserve supplementary groups of the build user, to allow admins to specify groups such as "kvm". */ - if (!buildUser->getSupplementaryGIDs().empty() && - setgroups(buildUser->getSupplementaryGIDs().size(), - buildUser->getSupplementaryGIDs().data()) == -1) + auto gids = buildUser->getSupplementaryGIDs(); + if (setgroups(gids.size(), gids.data()) == -1) throw SysError("cannot set supplementary groups of build user"); if (setgid(buildUser->getGID()) == -1 || @@ -2237,7 +2338,10 @@ DrvOutputs LocalDerivationGoal::registerOutputs() /* Canonicalise first. This ensures that the path we're rewriting doesn't contain a hard link to /etc/shadow or something like that. */ - canonicalisePathMetaData(actualPath, buildUser ? buildUser->getUID() : -1, inodesSeen); + canonicalisePathMetaData( + actualPath, + buildUser ? std::optional(buildUser->getUIDRange()) : std::nullopt, + inodesSeen); debug("scanning for references for output '%s' in temp location '%s'", outputName, actualPath); @@ -2330,6 +2434,10 @@ DrvOutputs LocalDerivationGoal::registerOutputs() sink.s = rewriteStrings(sink.s, outputRewrites); StringSource source(sink.s); restorePath(actualPath, source); + + /* FIXME: set proper permissions in restorePath() so + we don't have to do another traversal. */ + canonicalisePathMetaData(actualPath, {}, inodesSeen); } }; @@ -2492,7 +2600,7 @@ DrvOutputs LocalDerivationGoal::registerOutputs() /* FIXME: set proper permissions in restorePath() so we don't have to do another traversal. */ - canonicalisePathMetaData(actualPath, -1, inodesSeen); + canonicalisePathMetaData(actualPath, {}, inodesSeen); /* Calculate where we'll move the output files. In the checking case we will leave leave them where they are, for now, rather than move to diff --git a/src/libstore/build/local-derivation-goal.hh b/src/libstore/build/local-derivation-goal.hh index d456e9cae..34c4e9187 100644 --- a/src/libstore/build/local-derivation-goal.hh +++ b/src/libstore/build/local-derivation-goal.hh @@ -15,6 +15,9 @@ struct LocalDerivationGoal : public DerivationGoal /* The process ID of the builder. */ Pid pid; + /* The cgroup of the builder, if any. */ + std::optional<Path> cgroup; + /* The temporary directory. */ Path tmpDir; @@ -92,8 +95,8 @@ struct LocalDerivationGoal : public DerivationGoal result. */ std::map<Path, ValidPathInfo> prevInfos; - uid_t sandboxUid() { return usingUserNamespace ? 1000 : buildUser->getUID(); } - gid_t sandboxGid() { return usingUserNamespace ? 100 : buildUser->getGID(); } + uid_t sandboxUid() { return usingUserNamespace ? (!buildUser || buildUser->getUIDCount() == 1 ? 1000 : 0) : buildUser->getUID(); } + gid_t sandboxGid() { return usingUserNamespace ? (!buildUser || buildUser->getUIDCount() == 1 ? 100 : 0) : buildUser->getGID(); } const static Path homeDir; @@ -197,6 +200,10 @@ struct LocalDerivationGoal : public DerivationGoal /* Forcibly kill the child process, if any. */ void killChild() override; + /* Kill any processes running under the build user UID or in the + cgroup of the build. */ + void killSandbox(bool getStats); + /* Create alternative path calculated from but distinct from the input, so we can avoid overwriting outputs (or other store paths) that already exist. */ diff --git a/src/libstore/cgroup.cc b/src/libstore/cgroup.cc new file mode 100644 index 000000000..f693d77be --- /dev/null +++ b/src/libstore/cgroup.cc @@ -0,0 +1,131 @@ +#if __linux__ + +#include "cgroup.hh" +#include "util.hh" + +#include <chrono> +#include <cmath> +#include <regex> +#include <unordered_set> +#include <thread> + +#include <dirent.h> + +namespace nix { + +// FIXME: obsolete, check for cgroup2 +std::map<std::string, std::string> getCgroups(const Path & cgroupFile) +{ + std::map<std::string, std::string> cgroups; + + for (auto & line : tokenizeString<std::vector<std::string>>(readFile(cgroupFile), "\n")) { + static std::regex regex("([0-9]+):([^:]*):(.*)"); + std::smatch match; + if (!std::regex_match(line, match, regex)) + throw Error("invalid line '%s' in '%s'", line, cgroupFile); + + std::string name = hasPrefix(std::string(match[2]), "name=") ? std::string(match[2], 5) : match[2]; + cgroups.insert_or_assign(name, match[3]); + } + + return cgroups; +} + +static CgroupStats destroyCgroup(const Path & cgroup, bool returnStats) +{ + if (!pathExists(cgroup)) return {}; + + auto procsFile = cgroup + "/cgroup.procs"; + + if (!pathExists(procsFile)) + throw Error("'%s' is not a cgroup", cgroup); + + /* Use the fast way to kill every process in a cgroup, if + available. */ + auto killFile = cgroup + "/cgroup.kill"; + if (pathExists(killFile)) + writeFile(killFile, "1"); + + /* Otherwise, manually kill every process in the subcgroups and + this cgroup. */ + for (auto & entry : readDirectory(cgroup)) { + if (entry.type != DT_DIR) continue; + destroyCgroup(cgroup + "/" + entry.name, false); + } + + int round = 1; + + std::unordered_set<pid_t> pidsShown; + + while (true) { + auto pids = tokenizeString<std::vector<std::string>>(readFile(procsFile)); + + if (pids.empty()) break; + + if (round > 20) + throw Error("cannot kill cgroup '%s'", cgroup); + + for (auto & pid_s : pids) { + pid_t pid; + if (auto o = string2Int<pid_t>(pid_s)) + pid = *o; + else + throw Error("invalid pid '%s'", pid); + if (pidsShown.insert(pid).second) { + try { + auto cmdline = readFile(fmt("/proc/%d/cmdline", pid)); + using namespace std::string_literals; + warn("killing stray builder process %d (%s)...", + pid, trim(replaceStrings(cmdline, "\0"s, " "))); + } catch (SysError &) { + } + } + // FIXME: pid wraparound + if (kill(pid, SIGKILL) == -1 && errno != ESRCH) + throw SysError("killing member %d of cgroup '%s'", pid, cgroup); + } + + auto sleep = std::chrono::milliseconds((int) std::pow(2.0, std::min(round, 10))); + if (sleep.count() > 100) + printError("waiting for %d ms for cgroup '%s' to become empty", sleep.count(), cgroup); + std::this_thread::sleep_for(sleep); + round++; + } + + CgroupStats stats; + + if (returnStats) { + auto cpustatPath = cgroup + "/cpu.stat"; + + if (pathExists(cpustatPath)) { + for (auto & line : tokenizeString<std::vector<std::string>>(readFile(cpustatPath), "\n")) { + std::string_view userPrefix = "user_usec "; + if (hasPrefix(line, userPrefix)) { + auto n = string2Int<uint64_t>(line.substr(userPrefix.size())); + if (n) stats.cpuUser = std::chrono::microseconds(*n); + } + + std::string_view systemPrefix = "system_usec "; + if (hasPrefix(line, systemPrefix)) { + auto n = string2Int<uint64_t>(line.substr(systemPrefix.size())); + if (n) stats.cpuSystem = std::chrono::microseconds(*n); + } + } + } + + } + + if (rmdir(cgroup.c_str()) == -1) + throw SysError("deleting cgroup '%s'", cgroup); + + return stats; +} + +CgroupStats destroyCgroup(const Path & cgroup) +{ + return destroyCgroup(cgroup, true); +} + +} + +#endif diff --git a/src/libstore/cgroup.hh b/src/libstore/cgroup.hh new file mode 100644 index 000000000..3ead4735f --- /dev/null +++ b/src/libstore/cgroup.hh @@ -0,0 +1,27 @@ +#pragma once + +#if __linux__ + +#include <chrono> +#include <optional> + +#include "types.hh" + +namespace nix { + +std::map<std::string, std::string> getCgroups(const Path & cgroupFile); + +struct CgroupStats +{ + std::optional<std::chrono::microseconds> cpuUser, cpuSystem; +}; + +/* Destroy the cgroup denoted by 'path'. The postcondition is that + 'path' does not exist, and thus any processes in the cgroup have + been killed. Also return statistics from the cgroup just before + destruction. */ +CgroupStats destroyCgroup(const Path & cgroup); + +} + +#endif diff --git a/src/libstore/globals.cc b/src/libstore/globals.cc index ff658c428..b7f55cae7 100644 --- a/src/libstore/globals.cc +++ b/src/libstore/globals.cc @@ -131,6 +131,10 @@ StringSet Settings::getDefaultSystemFeatures() StringSet features{"nixos-test", "benchmark", "big-parallel"}; #if __linux__ + features.insert("uid-range"); + #endif + + #if __linux__ if (access("/dev/kvm", R_OK | W_OK) == 0) features.insert("kvm"); #endif diff --git a/src/libstore/globals.hh b/src/libstore/globals.hh index 3dcf3d479..b40dcfa77 100644 --- a/src/libstore/globals.hh +++ b/src/libstore/globals.hh @@ -46,6 +46,14 @@ struct PluginFilesSetting : public BaseSetting<Paths> void set(const std::string & str, bool append = false) override; }; +const uint32_t maxIdsPerBuild = + #if __linux__ + 1 << 16 + #else + 1 + #endif + ; + class Settings : public Config { unsigned int getDefaultCores(); @@ -275,6 +283,38 @@ public: multi-user settings with untrusted users. )"}; + Setting<bool> autoAllocateUids{this, false, "auto-allocate-uids", + "Whether to allocate UIDs for builders automatically."}; + + Setting<uint32_t> startId{this, + #if __linux__ + 0x34000000, + #else + 56930, + #endif + "start-id", + "The first UID and GID to use for dynamic ID allocation."}; + + Setting<uint32_t> uidCount{this, + #if __linux__ + maxIdsPerBuild * 128, + #else + 128, + #endif + "id-count", + "The number of UIDs/GIDs to use for dynamic ID allocation."}; + + #if __linux__ + Setting<bool> useCgroups{ + this, false, "use-cgroups", + R"( + Whether to execute builds inside cgroups. Cgroups are + enabled automatically for derivations that require the + `uid-range` system feature. + )" + }; + #endif + Setting<bool> impersonateLinux26{this, false, "impersonate-linux-26", "Whether to impersonate a Linux 2.6 machine on newer kernels.", {"build-impersonate-linux-26"}}; @@ -563,10 +603,10 @@ public: cache) must have a signature by a trusted key. A trusted key is one listed in `trusted-public-keys`, or a public key counterpart to a private key stored in a file listed in `secret-key-files`. - + Set to `false` to disable signature checking and trust all non-content-addressed paths unconditionally. - + (Content-addressed paths are inherently trustworthy and thus unaffected by this configuration option.) )"}; diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index d374d4558..b67668e52 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -583,7 +583,10 @@ void canonicaliseTimestampAndPermissions(const Path & path) } -static void canonicalisePathMetaData_(const Path & path, uid_t fromUid, InodesSeen & inodesSeen) +static void canonicalisePathMetaData_( + const Path & path, + std::optional<std::pair<uid_t, uid_t>> uidRange, + InodesSeen & inodesSeen) { checkInterrupt(); @@ -630,7 +633,7 @@ static void canonicalisePathMetaData_(const Path & path, uid_t fromUid, InodesSe However, ignore files that we chown'ed ourselves previously to ensure that we don't fail on hard links within the same build (i.e. "touch $out/foo; ln $out/foo $out/bar"). */ - if (fromUid != (uid_t) -1 && st.st_uid != fromUid) { + if (uidRange && (st.st_uid < uidRange->first || st.st_uid > uidRange->second)) { if (S_ISDIR(st.st_mode) || !inodesSeen.count(Inode(st.st_dev, st.st_ino))) throw BuildError("invalid ownership on file '%1%'", path); mode_t mode = st.st_mode & ~S_IFMT; @@ -663,14 +666,17 @@ static void canonicalisePathMetaData_(const Path & path, uid_t fromUid, InodesSe if (S_ISDIR(st.st_mode)) { DirEntries entries = readDirectory(path); for (auto & i : entries) - canonicalisePathMetaData_(path + "/" + i.name, fromUid, inodesSeen); + canonicalisePathMetaData_(path + "/" + i.name, uidRange, inodesSeen); } } -void canonicalisePathMetaData(const Path & path, uid_t fromUid, InodesSeen & inodesSeen) +void canonicalisePathMetaData( + const Path & path, + std::optional<std::pair<uid_t, uid_t>> uidRange, + InodesSeen & inodesSeen) { - canonicalisePathMetaData_(path, fromUid, inodesSeen); + canonicalisePathMetaData_(path, uidRange, inodesSeen); /* On platforms that don't have lchown(), the top-level path can't be a symlink, since we can't change its ownership. */ @@ -683,10 +689,11 @@ void canonicalisePathMetaData(const Path & path, uid_t fromUid, InodesSeen & ino } -void canonicalisePathMetaData(const Path & path, uid_t fromUid) +void canonicalisePathMetaData(const Path & path, + std::optional<std::pair<uid_t, uid_t>> uidRange) { InodesSeen inodesSeen; - canonicalisePathMetaData(path, fromUid, inodesSeen); + canonicalisePathMetaData(path, uidRange, inodesSeen); } @@ -1331,7 +1338,7 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source, autoGC(); - canonicalisePathMetaData(realPath, -1); + canonicalisePathMetaData(realPath, {}); optimisePath(realPath, repair); // FIXME: combine with hashPath() @@ -1444,7 +1451,7 @@ StorePath LocalStore::addToStoreFromDump(Source & source0, std::string_view name narHash = narSink.finish(); } - canonicalisePathMetaData(realPath, -1); // FIXME: merge into restorePath + canonicalisePathMetaData(realPath, {}); // FIXME: merge into restorePath optimisePath(realPath, repair); @@ -1486,7 +1493,7 @@ StorePath LocalStore::addTextToStore( writeFile(realPath, s); - canonicalisePathMetaData(realPath, -1); + canonicalisePathMetaData(realPath, {}); StringSink sink; dumpString(s, sink); diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index bd0ce1fe6..4579c2f62 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -310,9 +310,18 @@ typedef std::set<Inode> InodesSeen; - the permissions are set of 444 or 555 (i.e., read-only with or without execute permission; setuid bits etc. are cleared) - the owner and group are set to the Nix user and group, if we're - running as root. */ -void canonicalisePathMetaData(const Path & path, uid_t fromUid, InodesSeen & inodesSeen); -void canonicalisePathMetaData(const Path & path, uid_t fromUid); + running as root. + If uidRange is not empty, this function will throw an error if it + encounters files owned by a user outside of the closed interval + [uidRange->first, uidRange->second]. +*/ +void canonicalisePathMetaData( + const Path & path, + std::optional<std::pair<uid_t, uid_t>> uidRange, + InodesSeen & inodesSeen); +void canonicalisePathMetaData( + const Path & path, + std::optional<std::pair<uid_t, uid_t>> uidRange); void canonicaliseTimestampAndPermissions(const Path & path); diff --git a/src/libstore/lock.cc b/src/libstore/lock.cc index fa718f55d..2858137d6 100644 --- a/src/libstore/lock.cc +++ b/src/libstore/lock.cc @@ -2,105 +2,197 @@ #include "globals.hh" #include "pathlocks.hh" -#include <grp.h> #include <pwd.h> - -#include <fcntl.h> -#include <unistd.h> +#include <grp.h> namespace nix { -UserLock::UserLock() +struct SimpleUserLock : UserLock { - assert(settings.buildUsersGroup != ""); - createDirs(settings.nixStateDir + "/userpool"); -} + AutoCloseFD fdUserLock; + uid_t uid; + gid_t gid; + std::vector<gid_t> supplementaryGIDs; + + uid_t getUID() override { assert(uid); return uid; } + uid_t getUIDCount() override { return 1; } + gid_t getGID() override { assert(gid); return gid; } + + std::vector<gid_t> getSupplementaryGIDs() override { return supplementaryGIDs; } + + static std::unique_ptr<UserLock> acquire() + { + assert(settings.buildUsersGroup != ""); + createDirs(settings.nixStateDir + "/userpool"); + + /* Get the members of the build-users-group. */ + struct group * gr = getgrnam(settings.buildUsersGroup.get().c_str()); + if (!gr) + throw Error("the group '%s' specified in 'build-users-group' does not exist", settings.buildUsersGroup); + + /* Copy the result of getgrnam. */ + Strings users; + for (char * * p = gr->gr_mem; *p; ++p) { + debug("found build user '%s'", *p); + users.push_back(*p); + } -bool UserLock::findFreeUser() { - if (enabled()) return true; - - /* Get the members of the build-users-group. */ - struct group * gr = getgrnam(settings.buildUsersGroup.get().c_str()); - if (!gr) - throw Error("the group '%1%' specified in 'build-users-group' does not exist", - settings.buildUsersGroup); - gid = gr->gr_gid; - - /* Copy the result of getgrnam. */ - Strings users; - for (char * * p = gr->gr_mem; *p; ++p) { - debug("found build user '%1%'", *p); - users.push_back(*p); + if (users.empty()) + throw Error("the build users group '%s' has no members", settings.buildUsersGroup); + + /* Find a user account that isn't currently in use for another + build. */ + for (auto & i : users) { + debug("trying user '%s'", i); + + struct passwd * pw = getpwnam(i.c_str()); + if (!pw) + throw Error("the user '%s' in the group '%s' does not exist", i, settings.buildUsersGroup); + + auto fnUserLock = fmt("%s/userpool/%s", settings.nixStateDir,pw->pw_uid); + + AutoCloseFD fd = open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600); + if (!fd) + throw SysError("opening user lock '%s'", fnUserLock); + + if (lockFile(fd.get(), ltWrite, false)) { + auto lock = std::make_unique<SimpleUserLock>(); + + lock->fdUserLock = std::move(fd); + lock->uid = pw->pw_uid; + lock->gid = gr->gr_gid; + + /* Sanity check... */ + if (lock->uid == getuid() || lock->uid == geteuid()) + throw Error("the Nix user should not be a member of '%s'", settings.buildUsersGroup); + + #if __linux__ + /* Get the list of supplementary groups of this build + user. This is usually either empty or contains a + group such as "kvm". */ + int ngroups = 32; // arbitrary initial guess + std::vector<gid_t> gids; + gids.resize(ngroups); + + int err = getgrouplist( + pw->pw_name, pw->pw_gid, + gids.data(), + &ngroups); + + /* Our initial size of 32 wasn't sufficient, the + correct size has been stored in ngroups, so we try + again. */ + if (err == -1) { + gids.resize(ngroups); + err = getgrouplist( + pw->pw_name, pw->pw_gid, + gids.data(), + &ngroups); + } + + // If it failed once more, then something must be broken. + if (err == -1) + throw Error("failed to get list of supplementary groups for '%s'", pw->pw_name); + + // Finally, trim back the GID list to its real size. + for (auto i = 0; i < ngroups; i++) + if (gids[i] != lock->gid) + lock->supplementaryGIDs.push_back(gids[i]); + #endif + + return lock; + } + } + + return nullptr; } +}; - if (users.empty()) - throw Error("the build users group '%1%' has no members", - settings.buildUsersGroup); +struct AutoUserLock : UserLock +{ + AutoCloseFD fdUserLock; + uid_t firstUid = 0; + gid_t firstGid = 0; + uid_t nrIds = 1; - /* Find a user account that isn't currently in use for another - build. */ - for (auto & i : users) { - debug("trying user '%1%'", i); + uid_t getUID() override { assert(firstUid); return firstUid; } - struct passwd * pw = getpwnam(i.c_str()); - if (!pw) - throw Error("the user '%1%' in the group '%2%' does not exist", - i, settings.buildUsersGroup); + gid_t getUIDCount() override { return nrIds; } + gid_t getGID() override { assert(firstGid); return firstGid; } - fnUserLock = (format("%1%/userpool/%2%") % settings.nixStateDir % pw->pw_uid).str(); + std::vector<gid_t> getSupplementaryGIDs() override { return {}; } - AutoCloseFD fd = open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600); - if (!fd) - throw SysError("opening user lock '%1%'", fnUserLock); + static std::unique_ptr<UserLock> acquire(uid_t nrIds, bool useChroot) + { + settings.requireExperimentalFeature(Xp::AutoAllocateUids); + assert(settings.startId > 0); + assert(settings.uidCount % maxIdsPerBuild == 0); + assert((uint64_t) settings.startId + (uint64_t) settings.uidCount <= std::numeric_limits<uid_t>::max()); + assert(nrIds <= maxIdsPerBuild); - if (lockFile(fd.get(), ltWrite, false)) { - fdUserLock = std::move(fd); - user = i; - uid = pw->pw_uid; + createDirs(settings.nixStateDir + "/userpool2"); - /* Sanity check... */ - if (uid == getuid() || uid == geteuid()) - throw Error("the Nix user should not be a member of '%1%'", - settings.buildUsersGroup); + size_t nrSlots = settings.uidCount / maxIdsPerBuild; -#if __linux__ - /* Get the list of supplementary groups of this build user. This - is usually either empty or contains a group such as "kvm". */ - int ngroups = 32; // arbitrary initial guess - supplementaryGIDs.resize(ngroups); + for (size_t i = 0; i < nrSlots; i++) { + debug("trying user slot '%d'", i); - int err = getgrouplist(pw->pw_name, pw->pw_gid, supplementaryGIDs.data(), - &ngroups); + createDirs(settings.nixStateDir + "/userpool2"); - // Our initial size of 32 wasn't sufficient, the correct size has - // been stored in ngroups, so we try again. - if (err == -1) { - supplementaryGIDs.resize(ngroups); - err = getgrouplist(pw->pw_name, pw->pw_gid, supplementaryGIDs.data(), - &ngroups); - } + auto fnUserLock = fmt("%s/userpool2/slot-%d", settings.nixStateDir, i); + + AutoCloseFD fd = open(fnUserLock.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600); + if (!fd) + throw SysError("opening user lock '%s'", fnUserLock); - // If it failed once more, then something must be broken. - if (err == -1) - throw Error("failed to get list of supplementary groups for '%1%'", - pw->pw_name); + if (lockFile(fd.get(), ltWrite, false)) { - // Finally, trim back the GID list to its real size - supplementaryGIDs.resize(ngroups); -#endif + auto firstUid = settings.startId + i * maxIdsPerBuild; - isEnabled = true; - return true; + auto pw = getpwuid(firstUid); + if (pw) + throw Error("auto-allocated UID %d clashes with existing user account '%s'", firstUid, pw->pw_name); + + auto lock = std::make_unique<AutoUserLock>(); + lock->fdUserLock = std::move(fd); + lock->firstUid = firstUid; + if (useChroot) + lock->firstGid = firstUid; + else { + struct group * gr = getgrnam(settings.buildUsersGroup.get().c_str()); + if (!gr) + throw Error("the group '%s' specified in 'build-users-group' does not exist", settings.buildUsersGroup); + lock->firstGid = gr->gr_gid; + } + lock->nrIds = nrIds; + return lock; + } } + + return nullptr; } +}; - return false; +std::unique_ptr<UserLock> acquireUserLock(uid_t nrIds, bool useChroot) +{ + if (settings.autoAllocateUids) + return AutoUserLock::acquire(nrIds, useChroot); + else + return SimpleUserLock::acquire(); } -void UserLock::kill() +bool useBuildUsers() { - killUser(uid); + #if __linux__ + static bool b = (settings.buildUsersGroup != "" || settings.startId.get() != 0) && getuid() == 0; + return b; + #elif __APPLE__ + static bool b = settings.buildUsersGroup != "" && getuid() == 0; + return b; + #else + return false; + #endif } } diff --git a/src/libstore/lock.hh b/src/libstore/lock.hh index 3d29a7b5b..49ad86de7 100644 --- a/src/libstore/lock.hh +++ b/src/libstore/lock.hh @@ -1,37 +1,38 @@ #pragma once -#include "sync.hh" #include "types.hh" -#include "util.hh" + +#include <optional> + +#include <sys/types.h> namespace nix { -class UserLock +struct UserLock { -private: - Path fnUserLock; - AutoCloseFD fdUserLock; + virtual ~UserLock() { } - bool isEnabled = false; - std::string user; - uid_t uid = 0; - gid_t gid = 0; - std::vector<gid_t> supplementaryGIDs; + /* Get the first and last UID. */ + std::pair<uid_t, uid_t> getUIDRange() + { + auto first = getUID(); + return {first, first + getUIDCount() - 1}; + } -public: - UserLock(); + /* Get the first UID. */ + virtual uid_t getUID() = 0; - void kill(); + virtual uid_t getUIDCount() = 0; - std::string getUser() { return user; } - uid_t getUID() { assert(uid); return uid; } - uid_t getGID() { assert(gid); return gid; } - std::vector<gid_t> getSupplementaryGIDs() { return supplementaryGIDs; } + virtual gid_t getGID() = 0; - bool findFreeUser(); + virtual std::vector<gid_t> getSupplementaryGIDs() = 0; +}; - bool enabled() { return isEnabled; } +/* Acquire a user lock for a UID range of size `nrIds`. Note that this + may return nullptr if no user is available. */ +std::unique_ptr<UserLock> acquireUserLock(uid_t nrIds, bool useChroot); -}; +bool useBuildUsers(); } diff --git a/src/libstore/parsed-derivations.cc b/src/libstore/parsed-derivations.cc index 59a30db10..cc4a94fab 100644 --- a/src/libstore/parsed-derivations.cc +++ b/src/libstore/parsed-derivations.cc @@ -89,6 +89,7 @@ std::optional<Strings> ParsedDerivation::getStringsAttr(const std::string & name StringSet ParsedDerivation::getRequiredSystemFeatures() const { + // FIXME: cache this? StringSet res; for (auto & i : getStringsAttr("requiredSystemFeatures").value_or(Strings())) res.insert(i); @@ -124,6 +125,11 @@ bool ParsedDerivation::substitutesAllowed() const return getBoolAttr("allowSubstitutes", true); } +bool ParsedDerivation::useUidRange() const +{ + return getRequiredSystemFeatures().count("uid-range"); +} + static std::regex shVarName("[A-Za-z_][A-Za-z0-9_]*"); std::optional<nlohmann::json> ParsedDerivation::prepareStructuredAttrs(Store & store, const StorePathSet & inputPaths) diff --git a/src/libstore/parsed-derivations.hh b/src/libstore/parsed-derivations.hh index 95bec21e8..bfb3857c0 100644 --- a/src/libstore/parsed-derivations.hh +++ b/src/libstore/parsed-derivations.hh @@ -38,6 +38,8 @@ public: bool substitutesAllowed() const; + bool useUidRange() const; + std::optional<nlohmann::json> prepareStructuredAttrs(Store & store, const StorePathSet & inputPaths); }; diff --git a/src/libutil/experimental-features.cc b/src/libutil/experimental-features.cc index fa79cca6b..e0902971e 100644 --- a/src/libutil/experimental-features.cc +++ b/src/libutil/experimental-features.cc @@ -14,6 +14,8 @@ std::map<ExperimentalFeature, std::string> stringifiedXpFeatures = { { Xp::NoUrlLiterals, "no-url-literals" }, { Xp::FetchClosure, "fetch-closure" }, { Xp::ReplFlake, "repl-flake" }, + { Xp::AutoAllocateUids, "auto-allocate-uids" }, + { Xp::Cgroups, "cgroups" }, }; const std::optional<ExperimentalFeature> parseExperimentalFeature(const std::string_view & name) diff --git a/src/libutil/experimental-features.hh b/src/libutil/experimental-features.hh index d09ab025c..af775feb0 100644 --- a/src/libutil/experimental-features.hh +++ b/src/libutil/experimental-features.hh @@ -23,6 +23,8 @@ enum struct ExperimentalFeature NoUrlLiterals, FetchClosure, ReplFlake, + AutoAllocateUids, + Cgroups, }; /** diff --git a/src/libutil/filesystem.cc b/src/libutil/filesystem.cc index 403389e60..3a732cff8 100644 --- a/src/libutil/filesystem.cc +++ b/src/libutil/filesystem.cc @@ -1,5 +1,6 @@ #include <sys/time.h> #include <filesystem> +#include <atomic> #include "finally.hh" #include "util.hh" @@ -10,7 +11,7 @@ namespace fs = std::filesystem; namespace nix { static Path tempName(Path tmpRoot, const Path & prefix, bool includePid, - int & counter) + std::atomic<unsigned int> & counter) { tmpRoot = canonPath(tmpRoot.empty() ? getEnv("TMPDIR").value_or("/tmp") : tmpRoot, true); if (includePid) @@ -22,9 +23,9 @@ static Path tempName(Path tmpRoot, const Path & prefix, bool includePid, Path createTempDir(const Path & tmpRoot, const Path & prefix, bool includePid, bool useGlobalCounter, mode_t mode) { - static int globalCounter = 0; - int localCounter = 0; - int & counter(useGlobalCounter ? globalCounter : localCounter); + static std::atomic<unsigned int> globalCounter = 0; + std::atomic<unsigned int> localCounter = 0; + auto & counter(useGlobalCounter ? globalCounter : localCounter); while (1) { checkInterrupt(); diff --git a/src/nix-store/nix-store.cc b/src/nix-store/nix-store.cc index 23f2ad3cf..b59a6d026 100644 --- a/src/nix-store/nix-store.cc +++ b/src/nix-store/nix-store.cc @@ -516,7 +516,7 @@ static void registerValidity(bool reregister, bool hashGiven, bool canonicalise) if (!store->isValidPath(info->path) || reregister) { /* !!! races */ if (canonicalise) - canonicalisePathMetaData(store->printStorePath(info->path), -1); + canonicalisePathMetaData(store->printStorePath(info->path), {}); if (!hashGiven) { HashResult hash = hashPath(htSHA256, store->printStorePath(info->path)); info->narHash = hash.first; diff --git a/src/nix/build.cc b/src/nix/build.cc index 85b1efc33..94b169167 100644 --- a/src/nix/build.cc +++ b/src/nix/build.cc @@ -30,6 +30,10 @@ nlohmann::json builtPathsWithResultToJSON(const std::vector<BuiltPathWithResult> if (b.result) { j["startTime"] = b.result->startTime; j["stopTime"] = b.result->stopTime; + if (b.result->cpuUser) + j["cpuUser"] = ((double) b.result->cpuUser->count()) / 1000000; + if (b.result->cpuSystem) + j["cpuSystem"] = ((double) b.result->cpuSystem->count()) / 1000000; } res.push_back(j); }, b.path.raw()); |