diff options
author | Artemis Tosini <lix@artem.ist> | 2024-04-26 17:26:45 +0000 |
---|---|---|
committer | Gerrit Code Review <gerrit@lix> | 2024-04-26 17:26:45 +0000 |
commit | 789aa39576a3c45ab63278cd01e1538c27e1ce9f (patch) | |
tree | faf22edc533ae9ce60916d94c9c7414b49234641 /src/libstore/platform/darwin.cc | |
parent | a1ad4e52a667d76472e8a5a3daf44c0eb34c2150 (diff) | |
parent | c03de0df627864fb7e83e9af88201b8a5fcd4930 (diff) |
Merge "gc: Find roots using libproc on Darwin" into main
Diffstat (limited to 'src/libstore/platform/darwin.cc')
-rw-r--r-- | src/libstore/platform/darwin.cc | 223 |
1 files changed, 223 insertions, 0 deletions
diff --git a/src/libstore/platform/darwin.cc b/src/libstore/platform/darwin.cc new file mode 100644 index 000000000..bbb81784c --- /dev/null +++ b/src/libstore/platform/darwin.cc @@ -0,0 +1,223 @@ +#include "gc-store.hh" +#include "signals.hh" +#include "platform/darwin.hh" +#include "regex.hh" + +#include <sys/proc_info.h> +#include <sys/sysctl.h> +#include <libproc.h> + +#include <regex> + +namespace nix { + +void DarwinLocalStore::findPlatformRoots(UncheckedRoots & unchecked) +{ + auto storePathRegex = regex::storePathRegex(storeDir); + + std::vector<int> pids; + int pidBufSize = 1; + + while (pidBufSize > pids.size() * sizeof(int)) { + // Reserve some extra size so we don't fail too much + pids.resize((pidBufSize + pidBufSize / 8) / sizeof(int)); + pidBufSize = proc_listpids(PROC_ALL_PIDS, 0, pids.data(), pids.size() * sizeof(int)); + + if (pidBufSize <= 0) { + throw SysError("Listing PIDs"); + } + } + + pids.resize(pidBufSize / sizeof(int)); + + for (auto pid : pids) { + // It doesn't make sense to ask about the kernel + if (pid == 0) { + continue; + } + + try { + // Process cwd/root directory + struct proc_vnodepathinfo vnodeInfo; + if (proc_pidinfo(pid, PROC_PIDVNODEPATHINFO, 0, &vnodeInfo, sizeof(vnodeInfo)) <= 0) { + throw SysError("Getting pid %1% working directory", pid); + } + + unchecked[std::string(vnodeInfo.pvi_cdir.vip_path)].emplace(fmt("{libproc/%d/cwd}", pid) + ); + unchecked[std::string(vnodeInfo.pvi_rdir.vip_path)].emplace( + fmt("{libproc/%d/rootdir}", pid) + ); + + // File descriptors + std::vector<struct proc_fdinfo> fds; + int fdBufSize = 1; + while (fdBufSize > fds.size() * sizeof(struct proc_fdinfo)) { + // Reserve some extra size so we don't fail too much + fds.resize((fdBufSize + fdBufSize / 8) / sizeof(struct proc_fdinfo)); + fdBufSize = proc_pidinfo( + pid, PROC_PIDLISTFDS, 0, fds.data(), fds.size() * sizeof(struct proc_fdinfo) + ); + + if (fdBufSize <= 0) { + throw SysError("Listing pid %1% file descriptors", pid); + } + } + fds.resize(fdBufSize / sizeof(struct proc_fdinfo)); + + for (auto fd : fds) { + // By definition, only a vnode is on the filesystem + if (fd.proc_fdtype != PROX_FDTYPE_VNODE) { + continue; + } + + struct vnode_fdinfowithpath fdInfo; + if (proc_pidfdinfo( + pid, fd.proc_fd, PROC_PIDFDVNODEPATHINFO, &fdInfo, sizeof(fdInfo) + ) + <= 0) + { + // They probably just closed this fd, no need to cancel looking at ranges and + // arguments + if (errno == EBADF) { + continue; + } + throw SysError("Getting pid %1% fd %2% path", pid, fd.proc_fd); + } + + unchecked[std::string(fdInfo.pvip.vip_path)].emplace( + fmt("{libproc/%d/fd/%d}", pid, fd.proc_fd) + ); + } + + // Regions (e.g. mmapped files, executables, shared libraries) + uint64_t nextAddr = 0; + while (true) { + // Seriously, what are you doing XNU? + // There's 3 flavors of PROC_PIDREGIONPATHINFO: + // * PROC_PIDREGIONPATHINFO includes all regions + // * PROC_PIDREGIONPATHINFO2 includes regions backed by a vnode + // * PROC_PIDREGIONPATHINFO3 includes regions backed by a vnode on a specified + // filesystem Only PROC_PIDREGIONPATHINFO is documented. Unfortunately, using it + // would make finding gcroots take about 100x as long and tests would fail from + // timeout. According to the Frida source code, PROC_PIDREGIONPATHINFO2 has been + // available since XNU 2782.1.97 in OS X 10.10 + // + // 22 means PROC_PIDREGIONPATHINFO2 + struct proc_regionwithpathinfo regionInfo; + if (proc_pidinfo(pid, 22, nextAddr, ®ionInfo, sizeof(regionInfo)) <= 0) { + // PROC_PIDREGIONPATHINFO signals we're done with an error, + // so we're expected to hit this once per process + if (errno == ESRCH || errno == EINVAL) { + break; + } + throw SysError("Getting pid %1% region path", pid); + } + + unchecked[std::string(regionInfo.prp_vip.vip_path)].emplace( + fmt("{libproc/%d/region}", pid) + ); + + nextAddr = regionInfo.prp_prinfo.pri_address + regionInfo.prp_prinfo.pri_size; + } + + // Arguments and environment variables + // We can't read environment variables of binaries with entitlements unless + // nix has the `com.apple.private.read-environment-variables` entitlement or SIP is off + // We can read arguments for all applications though. + + // Yes, it's a sysctl, the proc_info and sysctl APIs are mostly similar, + // but both have exclusive capabilities + int sysctlName[3] = {CTL_KERN, KERN_PROCARGS2, pid}; + size_t argsSize = 0; + if (sysctl(sysctlName, 3, nullptr, &argsSize, nullptr, 0) < 0) { + throw SysError("Reading pid %1% arguments", pid); + } + + std::vector<char> args(argsSize); + if (sysctl(sysctlName, 3, args.data(), &argsSize, nullptr, 0) < 0) { + throw SysError("Reading pid %1% arguments", pid); + } + + if (argsSize < args.size()) { + args.resize(argsSize); + } + + // We have these perfectly nice arguments, but have to ignore them because + // otherwise we'd see arguments to nix-store commands and + // `nix-store --delete /nix/store/whatever` would always fail + // First 4 bytes are an int of argc. + if (args.size() < sizeof(int)) { + continue; + } + auto argc = reinterpret_cast<int *>(args.data())[0]; + + auto argsIter = args.begin(); + std::advance(argsIter, sizeof(int)); + // Executable then argc args, each separated by some number of null bytes + for (int i = 0; argsIter != args.end() && i < argc + 1; i++) { + argsIter = std::find(argsIter, args.end(), '\0'); + argsIter = std::find_if(argsIter, args.end(), [](char ch) { return ch != '\0'; }); + } + + if (argsIter != args.end()) { + auto env_end = std::sregex_iterator{}; + for (auto i = std::sregex_iterator{argsIter, args.end(), storePathRegex}; + i != env_end; + ++i) + { + unchecked[i->str()].emplace(fmt("{libproc/%d/environ}", pid)); + } + }; + + // Per-thread working directories + struct proc_taskallinfo taskAllInfo; + if (proc_pidinfo(pid, PROC_PIDTASKALLINFO, 0, &taskAllInfo, sizeof(taskAllInfo)) <= 0) { + throw SysError("Reading pid %1% tasks", pid); + } + + // If the process doesn't have the per-thread cwd flag then we already have the + // process-wide cwd from PROC_PIDVNODEPATHINFO + if (taskAllInfo.pbsd.pbi_flags & PROC_FLAG_THCWD) { + std::vector<uint64_t> tids(taskAllInfo.ptinfo.pti_threadnum); + int tidBufSize = proc_pidinfo( + pid, PROC_PIDLISTTHREADS, 0, tids.data(), tids.size() * sizeof(uint64_t) + ); + if (tidBufSize <= 0) { + throw SysError("Listing pid %1% threads", pid); + } + + for (auto tid : tids) { + struct proc_threadwithpathinfo threadPathInfo; + if (proc_pidinfo( + pid, + PROC_PIDTHREADPATHINFO, + tid, + &threadPathInfo, + sizeof(threadPathInfo) + ) + <= 0) + { + throw SysError("Reading pid %1% thread %2% cwd", pid, tid); + } + + unchecked[std::string(threadPathInfo.pvip.vip_path)].emplace( + fmt("{libproc/%d/thread/%d/cwd}", pid, tid) + ); + } + } + } catch (SysError & e) { + // ENOENT/ESRCH: Process no longer exists (proc_info) + // EINVAL: Process no longer exists (sysctl) + // EACCESS/EPERM: We don't have permission to read this field (proc_info) + // EIO: Kernel failed to read from target process memory during KERN_PROCARGS2 (sysctl) + if (errno == ENOENT || errno == ESRCH || errno == EINVAL || errno == EACCES + || errno == EPERM || errno == EIO) + { + continue; + } + throw; + } + } +} +} |