diff options
author | Artemis Tosini <lix@artem.ist> | 2024-04-26 17:26:45 +0000 |
---|---|---|
committer | Gerrit Code Review <gerrit@lix> | 2024-04-26 17:26:45 +0000 |
commit | 789aa39576a3c45ab63278cd01e1538c27e1ce9f (patch) | |
tree | faf22edc533ae9ce60916d94c9c7414b49234641 | |
parent | a1ad4e52a667d76472e8a5a3daf44c0eb34c2150 (diff) | |
parent | c03de0df627864fb7e83e9af88201b8a5fcd4930 (diff) |
Merge "gc: Find roots using libproc on Darwin" into main
-rw-r--r-- | src/libstore/local.mk | 2 | ||||
-rw-r--r-- | src/libstore/meson.build | 3 | ||||
-rw-r--r-- | src/libstore/platform.cc | 4 | ||||
-rw-r--r-- | src/libstore/platform/darwin.cc | 223 | ||||
-rw-r--r-- | src/libstore/platform/darwin.hh | 35 | ||||
-rw-r--r-- | src/libstore/platform/linux.cc | 10 | ||||
-rw-r--r-- | src/libutil/meson.build | 2 | ||||
-rw-r--r-- | src/libutil/regex.cc | 16 | ||||
-rw-r--r-- | src/libutil/regex.hh | 11 | ||||
-rw-r--r-- | tests/functional/common/vars-and-functions.sh.in | 1 | ||||
-rw-r--r-- | tests/functional/gc-runtime.nix | 36 | ||||
-rw-r--r-- | tests/functional/gc-runtime.sh | 32 |
12 files changed, 341 insertions, 34 deletions
diff --git a/src/libstore/local.mk b/src/libstore/local.mk index 6bd73965d..078a63c83 100644 --- a/src/libstore/local.mk +++ b/src/libstore/local.mk @@ -7,6 +7,8 @@ libstore_DIR := $(d) libstore_SOURCES := $(wildcard $(d)/*.cc $(d)/builtins/*.cc $(d)/build/*.cc) ifdef HOST_LINUX libstore_SOURCES += $(d)/platform/linux.cc +else ifdef HOST_DARWIN +libstore_SOURCES += $(d)/platform/darwin.cc else libstore_SOURCES += $(d)/platform/fallback.cc endif diff --git a/src/libstore/meson.build b/src/libstore/meson.build index 94471dc29..5fde92dd0 100644 --- a/src/libstore/meson.build +++ b/src/libstore/meson.build @@ -162,6 +162,9 @@ libstore_headers = files( if host_machine.system() == 'linux' libstore_sources += files('platform/linux.cc') libstore_headers += files('platform/linux.hh') +elif host_machine.system() == 'darwin' + libstore_sources += files('platform/darwin.cc') + libstore_headers += files('platform/darwin.hh') else libstore_sources += files('platform/fallback.cc') libstore_headers += files('platform/fallback.hh') diff --git a/src/libstore/platform.cc b/src/libstore/platform.cc index 9c389ef55..acdedab99 100644 --- a/src/libstore/platform.cc +++ b/src/libstore/platform.cc @@ -2,6 +2,8 @@ #if __linux__ #include "platform/linux.hh" +#elif __APPLE__ +#include "platform/darwin.hh" #else #include "platform/fallback.hh" #endif @@ -11,6 +13,8 @@ std::shared_ptr<LocalStore> LocalStore::makeLocalStore(const Params & params) { #if __linux__ return std::shared_ptr<LocalStore>(new LinuxLocalStore(params)); +#elif __APPLE__ + return std::shared_ptr<LocalStore>(new DarwinLocalStore(params)); #else return std::shared_ptr<LocalStore>(new FallbackLocalStore(params)); #endif diff --git a/src/libstore/platform/darwin.cc b/src/libstore/platform/darwin.cc new file mode 100644 index 000000000..bbb81784c --- /dev/null +++ b/src/libstore/platform/darwin.cc @@ -0,0 +1,223 @@ +#include "gc-store.hh" +#include "signals.hh" +#include "platform/darwin.hh" +#include "regex.hh" + +#include <sys/proc_info.h> +#include <sys/sysctl.h> +#include <libproc.h> + +#include <regex> + +namespace nix { + +void DarwinLocalStore::findPlatformRoots(UncheckedRoots & unchecked) +{ + auto storePathRegex = regex::storePathRegex(storeDir); + + std::vector<int> pids; + int pidBufSize = 1; + + while (pidBufSize > pids.size() * sizeof(int)) { + // Reserve some extra size so we don't fail too much + pids.resize((pidBufSize + pidBufSize / 8) / sizeof(int)); + pidBufSize = proc_listpids(PROC_ALL_PIDS, 0, pids.data(), pids.size() * sizeof(int)); + + if (pidBufSize <= 0) { + throw SysError("Listing PIDs"); + } + } + + pids.resize(pidBufSize / sizeof(int)); + + for (auto pid : pids) { + // It doesn't make sense to ask about the kernel + if (pid == 0) { + continue; + } + + try { + // Process cwd/root directory + struct proc_vnodepathinfo vnodeInfo; + if (proc_pidinfo(pid, PROC_PIDVNODEPATHINFO, 0, &vnodeInfo, sizeof(vnodeInfo)) <= 0) { + throw SysError("Getting pid %1% working directory", pid); + } + + unchecked[std::string(vnodeInfo.pvi_cdir.vip_path)].emplace(fmt("{libproc/%d/cwd}", pid) + ); + unchecked[std::string(vnodeInfo.pvi_rdir.vip_path)].emplace( + fmt("{libproc/%d/rootdir}", pid) + ); + + // File descriptors + std::vector<struct proc_fdinfo> fds; + int fdBufSize = 1; + while (fdBufSize > fds.size() * sizeof(struct proc_fdinfo)) { + // Reserve some extra size so we don't fail too much + fds.resize((fdBufSize + fdBufSize / 8) / sizeof(struct proc_fdinfo)); + fdBufSize = proc_pidinfo( + pid, PROC_PIDLISTFDS, 0, fds.data(), fds.size() * sizeof(struct proc_fdinfo) + ); + + if (fdBufSize <= 0) { + throw SysError("Listing pid %1% file descriptors", pid); + } + } + fds.resize(fdBufSize / sizeof(struct proc_fdinfo)); + + for (auto fd : fds) { + // By definition, only a vnode is on the filesystem + if (fd.proc_fdtype != PROX_FDTYPE_VNODE) { + continue; + } + + struct vnode_fdinfowithpath fdInfo; + if (proc_pidfdinfo( + pid, fd.proc_fd, PROC_PIDFDVNODEPATHINFO, &fdInfo, sizeof(fdInfo) + ) + <= 0) + { + // They probably just closed this fd, no need to cancel looking at ranges and + // arguments + if (errno == EBADF) { + continue; + } + throw SysError("Getting pid %1% fd %2% path", pid, fd.proc_fd); + } + + unchecked[std::string(fdInfo.pvip.vip_path)].emplace( + fmt("{libproc/%d/fd/%d}", pid, fd.proc_fd) + ); + } + + // Regions (e.g. mmapped files, executables, shared libraries) + uint64_t nextAddr = 0; + while (true) { + // Seriously, what are you doing XNU? + // There's 3 flavors of PROC_PIDREGIONPATHINFO: + // * PROC_PIDREGIONPATHINFO includes all regions + // * PROC_PIDREGIONPATHINFO2 includes regions backed by a vnode + // * PROC_PIDREGIONPATHINFO3 includes regions backed by a vnode on a specified + // filesystem Only PROC_PIDREGIONPATHINFO is documented. Unfortunately, using it + // would make finding gcroots take about 100x as long and tests would fail from + // timeout. According to the Frida source code, PROC_PIDREGIONPATHINFO2 has been + // available since XNU 2782.1.97 in OS X 10.10 + // + // 22 means PROC_PIDREGIONPATHINFO2 + struct proc_regionwithpathinfo regionInfo; + if (proc_pidinfo(pid, 22, nextAddr, ®ionInfo, sizeof(regionInfo)) <= 0) { + // PROC_PIDREGIONPATHINFO signals we're done with an error, + // so we're expected to hit this once per process + if (errno == ESRCH || errno == EINVAL) { + break; + } + throw SysError("Getting pid %1% region path", pid); + } + + unchecked[std::string(regionInfo.prp_vip.vip_path)].emplace( + fmt("{libproc/%d/region}", pid) + ); + + nextAddr = regionInfo.prp_prinfo.pri_address + regionInfo.prp_prinfo.pri_size; + } + + // Arguments and environment variables + // We can't read environment variables of binaries with entitlements unless + // nix has the `com.apple.private.read-environment-variables` entitlement or SIP is off + // We can read arguments for all applications though. + + // Yes, it's a sysctl, the proc_info and sysctl APIs are mostly similar, + // but both have exclusive capabilities + int sysctlName[3] = {CTL_KERN, KERN_PROCARGS2, pid}; + size_t argsSize = 0; + if (sysctl(sysctlName, 3, nullptr, &argsSize, nullptr, 0) < 0) { + throw SysError("Reading pid %1% arguments", pid); + } + + std::vector<char> args(argsSize); + if (sysctl(sysctlName, 3, args.data(), &argsSize, nullptr, 0) < 0) { + throw SysError("Reading pid %1% arguments", pid); + } + + if (argsSize < args.size()) { + args.resize(argsSize); + } + + // We have these perfectly nice arguments, but have to ignore them because + // otherwise we'd see arguments to nix-store commands and + // `nix-store --delete /nix/store/whatever` would always fail + // First 4 bytes are an int of argc. + if (args.size() < sizeof(int)) { + continue; + } + auto argc = reinterpret_cast<int *>(args.data())[0]; + + auto argsIter = args.begin(); + std::advance(argsIter, sizeof(int)); + // Executable then argc args, each separated by some number of null bytes + for (int i = 0; argsIter != args.end() && i < argc + 1; i++) { + argsIter = std::find(argsIter, args.end(), '\0'); + argsIter = std::find_if(argsIter, args.end(), [](char ch) { return ch != '\0'; }); + } + + if (argsIter != args.end()) { + auto env_end = std::sregex_iterator{}; + for (auto i = std::sregex_iterator{argsIter, args.end(), storePathRegex}; + i != env_end; + ++i) + { + unchecked[i->str()].emplace(fmt("{libproc/%d/environ}", pid)); + } + }; + + // Per-thread working directories + struct proc_taskallinfo taskAllInfo; + if (proc_pidinfo(pid, PROC_PIDTASKALLINFO, 0, &taskAllInfo, sizeof(taskAllInfo)) <= 0) { + throw SysError("Reading pid %1% tasks", pid); + } + + // If the process doesn't have the per-thread cwd flag then we already have the + // process-wide cwd from PROC_PIDVNODEPATHINFO + if (taskAllInfo.pbsd.pbi_flags & PROC_FLAG_THCWD) { + std::vector<uint64_t> tids(taskAllInfo.ptinfo.pti_threadnum); + int tidBufSize = proc_pidinfo( + pid, PROC_PIDLISTTHREADS, 0, tids.data(), tids.size() * sizeof(uint64_t) + ); + if (tidBufSize <= 0) { + throw SysError("Listing pid %1% threads", pid); + } + + for (auto tid : tids) { + struct proc_threadwithpathinfo threadPathInfo; + if (proc_pidinfo( + pid, + PROC_PIDTHREADPATHINFO, + tid, + &threadPathInfo, + sizeof(threadPathInfo) + ) + <= 0) + { + throw SysError("Reading pid %1% thread %2% cwd", pid, tid); + } + + unchecked[std::string(threadPathInfo.pvip.vip_path)].emplace( + fmt("{libproc/%d/thread/%d/cwd}", pid, tid) + ); + } + } + } catch (SysError & e) { + // ENOENT/ESRCH: Process no longer exists (proc_info) + // EINVAL: Process no longer exists (sysctl) + // EACCESS/EPERM: We don't have permission to read this field (proc_info) + // EIO: Kernel failed to read from target process memory during KERN_PROCARGS2 (sysctl) + if (errno == ENOENT || errno == ESRCH || errno == EINVAL || errno == EACCES + || errno == EPERM || errno == EIO) + { + continue; + } + throw; + } + } +} +} diff --git a/src/libstore/platform/darwin.hh b/src/libstore/platform/darwin.hh new file mode 100644 index 000000000..b7170aa05 --- /dev/null +++ b/src/libstore/platform/darwin.hh @@ -0,0 +1,35 @@ +#pragma once +///@file + +#include "gc-store.hh" +#include "local-store.hh" + +namespace nix { + +/** + * Darwin-specific implementation of LocalStore + */ +class DarwinLocalStore : public LocalStore +{ +public: + DarwinLocalStore(const Params & params) + : StoreConfig(params) + , LocalFSStoreConfig(params) + , LocalStoreConfig(params) + , Store(params) + , LocalFSStore(params) + , LocalStore(params) + { + } + DarwinLocalStore(const std::string scheme, std::string path, const Params & params) + : DarwinLocalStore(params) + { + throw UnimplementedError("DarwinLocalStore"); + } + +private: + + void findPlatformRoots(UncheckedRoots & unchecked) override; +}; + +} diff --git a/src/libstore/platform/linux.cc b/src/libstore/platform/linux.cc index 9be3e47da..a34608894 100644 --- a/src/libstore/platform/linux.cc +++ b/src/libstore/platform/linux.cc @@ -1,6 +1,7 @@ #include "gc-store.hh" #include "signals.hh" #include "platform/linux.hh" +#include "regex.hh" #include <regex> @@ -26,12 +27,6 @@ static void readProcLink(const std::string & file, UncheckedRoots & roots) } } -static std::string quoteRegexChars(const std::string & raw) -{ - static auto specialRegex = std::regex(R"([.^$\\*+?()\[\]{}|])"); - return std::regex_replace(raw, specialRegex, R"(\$&)"); -} - static void readFileRoots(const char * path, UncheckedRoots & roots) { try { @@ -50,8 +45,7 @@ void LinuxLocalStore::findPlatformRoots(UncheckedRoots & unchecked) struct dirent * ent; auto digitsRegex = std::regex(R"(^\d+$)"); auto mapRegex = std::regex(R"(^\s*\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(/\S+)\s*$)"); - auto storePathRegex = - std::regex(quoteRegexChars(storeDir) + R"(/[0-9a-z]+[0-9a-zA-Z\+\-\._\?=]*)"); + auto storePathRegex = regex::storePathRegex(storeDir); while (errno = 0, ent = readdir(procDir.get())) { checkInterrupt(); if (std::regex_match(ent->d_name, digitsRegex)) { diff --git a/src/libutil/meson.build b/src/libutil/meson.build index 11bf97ee7..069798a6f 100644 --- a/src/libutil/meson.build +++ b/src/libutil/meson.build @@ -22,6 +22,7 @@ libutil_sources = files( 'position.cc', 'print-elided.cc', 'references.cc', + 'regex.cc', 'serialise.cc', 'shlex.cc', 'signals.cc', @@ -77,6 +78,7 @@ libutil_headers = files( 'ref.hh', 'references.hh', 'regex-combinators.hh', + 'regex.hh', 'repair-flag.hh', 'serialise.hh', 'shlex.hh', diff --git a/src/libutil/regex.cc b/src/libutil/regex.cc new file mode 100644 index 000000000..a9e6c6bee --- /dev/null +++ b/src/libutil/regex.cc @@ -0,0 +1,16 @@ +#include <string> +#include <regex> + +namespace nix::regex { +std::string quoteRegexChars(const std::string & raw) +{ + static auto specialRegex = std::regex(R"([.^$\\*+?()\[\]{}|])"); + return std::regex_replace(raw, specialRegex, R"(\$&)"); +} + +std::regex storePathRegex(const std::string & storeDir) +{ + return std::regex(quoteRegexChars(storeDir) + R"(/[0-9a-z]+[0-9a-zA-Z\+\-\._\?=]*)"); +} + +} diff --git a/src/libutil/regex.hh b/src/libutil/regex.hh new file mode 100644 index 000000000..744a7d54a --- /dev/null +++ b/src/libutil/regex.hh @@ -0,0 +1,11 @@ +#pragma once +///@file + +#include <string> +#include <regex> + +namespace nix::regex { +std::string quoteRegexChars(const std::string & raw); + +std::regex storePathRegex(const std::string & storeDir); +} diff --git a/tests/functional/common/vars-and-functions.sh.in b/tests/functional/common/vars-and-functions.sh.in index b054bf834..3d2e44024 100644 --- a/tests/functional/common/vars-and-functions.sh.in +++ b/tests/functional/common/vars-and-functions.sh.in @@ -24,7 +24,6 @@ if [[ -n $NIX_STORE ]]; then export _NIX_TEST_NO_SANDBOX=1 fi export _NIX_IN_TEST=$TEST_ROOT/shared -export _NIX_TEST_NO_LSOF=1 export NIX_REMOTE=${NIX_REMOTE_-} unset NIX_PATH export TEST_HOME=$TEST_ROOT/test-home diff --git a/tests/functional/gc-runtime.nix b/tests/functional/gc-runtime.nix index ee5980bdf..4303e0880 100644 --- a/tests/functional/gc-runtime.nix +++ b/tests/functional/gc-runtime.nix @@ -1,17 +1,29 @@ with import ./config.nix; -mkDerivation { - name = "gc-runtime"; - builder = - # Test inline source file definitions. - builtins.toFile "builder.sh" '' - mkdir $out +{ + environ = mkDerivation { + name = "gc-runtime-environ"; + buildCommand = "mkdir $out; echo environ > $out/environ"; + }; - cat > $out/program <<EOF - #! ${shell} - sleep 10000 - EOF + open = mkDerivation { + name = "gc-runtime-open"; + buildCommand = "mkdir $out; echo open > $out/open"; + }; - chmod +x $out/program - ''; + program = mkDerivation { + name = "gc-runtime-program"; + builder = + # Test inline source file definitions. + builtins.toFile "builder.sh" '' + mkdir $out + + cat > $out/program <<EOF + #! ${shell} + sleep 10000 < \$1 + EOF + + chmod +x $out/program + ''; + }; } diff --git a/tests/functional/gc-runtime.sh b/tests/functional/gc-runtime.sh index dc1826a55..6e17acfc0 100644 --- a/tests/functional/gc-runtime.sh +++ b/tests/functional/gc-runtime.sh @@ -1,38 +1,44 @@ source common.sh -case $system in - *linux*) - ;; - *) - skipTest "Not running Linux"; -esac - set -m # enable job control, needed for kill profiles="$NIX_STATE_DIR"/profiles rm -rf $profiles -nix-env -p $profiles/test -f ./gc-runtime.nix -i gc-runtime +nix-env -p $profiles/test -f ./gc-runtime.nix -i gc-runtime-{program,environ,open} -outPath=$(nix-env -p $profiles/test -q --no-name --out-path gc-runtime) -echo $outPath +programPath=$(nix-env -p $profiles/test -q --no-name --out-path gc-runtime-program) +environPath=$(nix-env -p $profiles/test -q --no-name --out-path gc-runtime-environ) +openPath=$(nix-env -p $profiles/test -q --no-name --out-path gc-runtime-open) +echo $programPath $environPath $openPath echo "backgrounding program..." -$profiles/test/program & +export environPath +$profiles/test/program $openPath/open & sleep 2 # hack - wait for the program to get started child=$! echo PID=$child -nix-env -p $profiles/test -e gc-runtime +nix-env -p $profiles/test -e gc-runtime-{program,environ,open} nix-env -p $profiles/test --delete-generations old nix-store --gc kill -- -$child -if ! test -e $outPath; then +if ! test -e $programPath; then echo "running program was garbage collected!" exit 1 fi +if ! test -e $environPath; then + echo "file in environment variable was garbage collected!" + exit 1 +fi + +if ! test -e $openPath; then + echo "opened file was garbage collected!" + exit 1 +fi + exit 0 |