aboutsummaryrefslogtreecommitdiff
path: root/src/libstore/platform/darwin.cc
diff options
context:
space:
mode:
authorArtemis Tosini <lix@artem.ist>2024-04-26 17:26:45 +0000
committerGerrit Code Review <gerrit@lix>2024-04-26 17:26:45 +0000
commit789aa39576a3c45ab63278cd01e1538c27e1ce9f (patch)
treefaf22edc533ae9ce60916d94c9c7414b49234641 /src/libstore/platform/darwin.cc
parenta1ad4e52a667d76472e8a5a3daf44c0eb34c2150 (diff)
parentc03de0df627864fb7e83e9af88201b8a5fcd4930 (diff)
Merge "gc: Find roots using libproc on Darwin" into main
Diffstat (limited to 'src/libstore/platform/darwin.cc')
-rw-r--r--src/libstore/platform/darwin.cc223
1 files changed, 223 insertions, 0 deletions
diff --git a/src/libstore/platform/darwin.cc b/src/libstore/platform/darwin.cc
new file mode 100644
index 000000000..bbb81784c
--- /dev/null
+++ b/src/libstore/platform/darwin.cc
@@ -0,0 +1,223 @@
+#include "gc-store.hh"
+#include "signals.hh"
+#include "platform/darwin.hh"
+#include "regex.hh"
+
+#include <sys/proc_info.h>
+#include <sys/sysctl.h>
+#include <libproc.h>
+
+#include <regex>
+
+namespace nix {
+
+void DarwinLocalStore::findPlatformRoots(UncheckedRoots & unchecked)
+{
+ auto storePathRegex = regex::storePathRegex(storeDir);
+
+ std::vector<int> pids;
+ int pidBufSize = 1;
+
+ while (pidBufSize > pids.size() * sizeof(int)) {
+ // Reserve some extra size so we don't fail too much
+ pids.resize((pidBufSize + pidBufSize / 8) / sizeof(int));
+ pidBufSize = proc_listpids(PROC_ALL_PIDS, 0, pids.data(), pids.size() * sizeof(int));
+
+ if (pidBufSize <= 0) {
+ throw SysError("Listing PIDs");
+ }
+ }
+
+ pids.resize(pidBufSize / sizeof(int));
+
+ for (auto pid : pids) {
+ // It doesn't make sense to ask about the kernel
+ if (pid == 0) {
+ continue;
+ }
+
+ try {
+ // Process cwd/root directory
+ struct proc_vnodepathinfo vnodeInfo;
+ if (proc_pidinfo(pid, PROC_PIDVNODEPATHINFO, 0, &vnodeInfo, sizeof(vnodeInfo)) <= 0) {
+ throw SysError("Getting pid %1% working directory", pid);
+ }
+
+ unchecked[std::string(vnodeInfo.pvi_cdir.vip_path)].emplace(fmt("{libproc/%d/cwd}", pid)
+ );
+ unchecked[std::string(vnodeInfo.pvi_rdir.vip_path)].emplace(
+ fmt("{libproc/%d/rootdir}", pid)
+ );
+
+ // File descriptors
+ std::vector<struct proc_fdinfo> fds;
+ int fdBufSize = 1;
+ while (fdBufSize > fds.size() * sizeof(struct proc_fdinfo)) {
+ // Reserve some extra size so we don't fail too much
+ fds.resize((fdBufSize + fdBufSize / 8) / sizeof(struct proc_fdinfo));
+ fdBufSize = proc_pidinfo(
+ pid, PROC_PIDLISTFDS, 0, fds.data(), fds.size() * sizeof(struct proc_fdinfo)
+ );
+
+ if (fdBufSize <= 0) {
+ throw SysError("Listing pid %1% file descriptors", pid);
+ }
+ }
+ fds.resize(fdBufSize / sizeof(struct proc_fdinfo));
+
+ for (auto fd : fds) {
+ // By definition, only a vnode is on the filesystem
+ if (fd.proc_fdtype != PROX_FDTYPE_VNODE) {
+ continue;
+ }
+
+ struct vnode_fdinfowithpath fdInfo;
+ if (proc_pidfdinfo(
+ pid, fd.proc_fd, PROC_PIDFDVNODEPATHINFO, &fdInfo, sizeof(fdInfo)
+ )
+ <= 0)
+ {
+ // They probably just closed this fd, no need to cancel looking at ranges and
+ // arguments
+ if (errno == EBADF) {
+ continue;
+ }
+ throw SysError("Getting pid %1% fd %2% path", pid, fd.proc_fd);
+ }
+
+ unchecked[std::string(fdInfo.pvip.vip_path)].emplace(
+ fmt("{libproc/%d/fd/%d}", pid, fd.proc_fd)
+ );
+ }
+
+ // Regions (e.g. mmapped files, executables, shared libraries)
+ uint64_t nextAddr = 0;
+ while (true) {
+ // Seriously, what are you doing XNU?
+ // There's 3 flavors of PROC_PIDREGIONPATHINFO:
+ // * PROC_PIDREGIONPATHINFO includes all regions
+ // * PROC_PIDREGIONPATHINFO2 includes regions backed by a vnode
+ // * PROC_PIDREGIONPATHINFO3 includes regions backed by a vnode on a specified
+ // filesystem Only PROC_PIDREGIONPATHINFO is documented. Unfortunately, using it
+ // would make finding gcroots take about 100x as long and tests would fail from
+ // timeout. According to the Frida source code, PROC_PIDREGIONPATHINFO2 has been
+ // available since XNU 2782.1.97 in OS X 10.10
+ //
+ // 22 means PROC_PIDREGIONPATHINFO2
+ struct proc_regionwithpathinfo regionInfo;
+ if (proc_pidinfo(pid, 22, nextAddr, &regionInfo, sizeof(regionInfo)) <= 0) {
+ // PROC_PIDREGIONPATHINFO signals we're done with an error,
+ // so we're expected to hit this once per process
+ if (errno == ESRCH || errno == EINVAL) {
+ break;
+ }
+ throw SysError("Getting pid %1% region path", pid);
+ }
+
+ unchecked[std::string(regionInfo.prp_vip.vip_path)].emplace(
+ fmt("{libproc/%d/region}", pid)
+ );
+
+ nextAddr = regionInfo.prp_prinfo.pri_address + regionInfo.prp_prinfo.pri_size;
+ }
+
+ // Arguments and environment variables
+ // We can't read environment variables of binaries with entitlements unless
+ // nix has the `com.apple.private.read-environment-variables` entitlement or SIP is off
+ // We can read arguments for all applications though.
+
+ // Yes, it's a sysctl, the proc_info and sysctl APIs are mostly similar,
+ // but both have exclusive capabilities
+ int sysctlName[3] = {CTL_KERN, KERN_PROCARGS2, pid};
+ size_t argsSize = 0;
+ if (sysctl(sysctlName, 3, nullptr, &argsSize, nullptr, 0) < 0) {
+ throw SysError("Reading pid %1% arguments", pid);
+ }
+
+ std::vector<char> args(argsSize);
+ if (sysctl(sysctlName, 3, args.data(), &argsSize, nullptr, 0) < 0) {
+ throw SysError("Reading pid %1% arguments", pid);
+ }
+
+ if (argsSize < args.size()) {
+ args.resize(argsSize);
+ }
+
+ // We have these perfectly nice arguments, but have to ignore them because
+ // otherwise we'd see arguments to nix-store commands and
+ // `nix-store --delete /nix/store/whatever` would always fail
+ // First 4 bytes are an int of argc.
+ if (args.size() < sizeof(int)) {
+ continue;
+ }
+ auto argc = reinterpret_cast<int *>(args.data())[0];
+
+ auto argsIter = args.begin();
+ std::advance(argsIter, sizeof(int));
+ // Executable then argc args, each separated by some number of null bytes
+ for (int i = 0; argsIter != args.end() && i < argc + 1; i++) {
+ argsIter = std::find(argsIter, args.end(), '\0');
+ argsIter = std::find_if(argsIter, args.end(), [](char ch) { return ch != '\0'; });
+ }
+
+ if (argsIter != args.end()) {
+ auto env_end = std::sregex_iterator{};
+ for (auto i = std::sregex_iterator{argsIter, args.end(), storePathRegex};
+ i != env_end;
+ ++i)
+ {
+ unchecked[i->str()].emplace(fmt("{libproc/%d/environ}", pid));
+ }
+ };
+
+ // Per-thread working directories
+ struct proc_taskallinfo taskAllInfo;
+ if (proc_pidinfo(pid, PROC_PIDTASKALLINFO, 0, &taskAllInfo, sizeof(taskAllInfo)) <= 0) {
+ throw SysError("Reading pid %1% tasks", pid);
+ }
+
+ // If the process doesn't have the per-thread cwd flag then we already have the
+ // process-wide cwd from PROC_PIDVNODEPATHINFO
+ if (taskAllInfo.pbsd.pbi_flags & PROC_FLAG_THCWD) {
+ std::vector<uint64_t> tids(taskAllInfo.ptinfo.pti_threadnum);
+ int tidBufSize = proc_pidinfo(
+ pid, PROC_PIDLISTTHREADS, 0, tids.data(), tids.size() * sizeof(uint64_t)
+ );
+ if (tidBufSize <= 0) {
+ throw SysError("Listing pid %1% threads", pid);
+ }
+
+ for (auto tid : tids) {
+ struct proc_threadwithpathinfo threadPathInfo;
+ if (proc_pidinfo(
+ pid,
+ PROC_PIDTHREADPATHINFO,
+ tid,
+ &threadPathInfo,
+ sizeof(threadPathInfo)
+ )
+ <= 0)
+ {
+ throw SysError("Reading pid %1% thread %2% cwd", pid, tid);
+ }
+
+ unchecked[std::string(threadPathInfo.pvip.vip_path)].emplace(
+ fmt("{libproc/%d/thread/%d/cwd}", pid, tid)
+ );
+ }
+ }
+ } catch (SysError & e) {
+ // ENOENT/ESRCH: Process no longer exists (proc_info)
+ // EINVAL: Process no longer exists (sysctl)
+ // EACCESS/EPERM: We don't have permission to read this field (proc_info)
+ // EIO: Kernel failed to read from target process memory during KERN_PROCARGS2 (sysctl)
+ if (errno == ENOENT || errno == ESRCH || errno == EINVAL || errno == EACCES
+ || errno == EPERM || errno == EIO)
+ {
+ continue;
+ }
+ throw;
+ }
+ }
+}
+}