aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArtemis Tosini <me@artem.ist>2024-03-29 20:29:44 -0400
committerArtemis Tosini <lix@artem.ist>2024-04-25 23:24:21 -0400
commitc03de0df627864fb7e83e9af88201b8a5fcd4930 (patch)
tree39b4b783a896199d736022422b29bf1be05b304f
parent5420b3afd6c328faf1508dce03bbe8e58da8af2b (diff)
gc: Find roots using libproc on Darwin
Previously, the garbage collector found runtime roots on Darwin by shelling out to `lsof -n -w -F n` then parsing the result. However, this requires an lsof binary and can be extremely slow. The official Apple lsof returns in a reasonable amount of time, about 250ms in my tests, but the lsof packaged in nixpkgs is quite slow, taking about 40 seconds to run the command. Using libproc directly is about the same speed as Apple lsof, and allows us to reënable several tests that were disabled on Darwin. Change-Id: Ifa0adda7984e13c15535693baba835aae79a3577
-rw-r--r--src/libstore/local.mk2
-rw-r--r--src/libstore/meson.build3
-rw-r--r--src/libstore/platform.cc4
-rw-r--r--src/libstore/platform/darwin.cc223
-rw-r--r--src/libstore/platform/darwin.hh35
-rw-r--r--src/libstore/platform/linux.cc10
-rw-r--r--src/libutil/meson.build2
-rw-r--r--src/libutil/regex.cc16
-rw-r--r--src/libutil/regex.hh11
-rw-r--r--tests/functional/common/vars-and-functions.sh.in1
-rw-r--r--tests/functional/gc-runtime.nix36
-rw-r--r--tests/functional/gc-runtime.sh32
12 files changed, 341 insertions, 34 deletions
diff --git a/src/libstore/local.mk b/src/libstore/local.mk
index 6bd73965d..078a63c83 100644
--- a/src/libstore/local.mk
+++ b/src/libstore/local.mk
@@ -7,6 +7,8 @@ libstore_DIR := $(d)
libstore_SOURCES := $(wildcard $(d)/*.cc $(d)/builtins/*.cc $(d)/build/*.cc)
ifdef HOST_LINUX
libstore_SOURCES += $(d)/platform/linux.cc
+else ifdef HOST_DARWIN
+libstore_SOURCES += $(d)/platform/darwin.cc
else
libstore_SOURCES += $(d)/platform/fallback.cc
endif
diff --git a/src/libstore/meson.build b/src/libstore/meson.build
index 94471dc29..5fde92dd0 100644
--- a/src/libstore/meson.build
+++ b/src/libstore/meson.build
@@ -162,6 +162,9 @@ libstore_headers = files(
if host_machine.system() == 'linux'
libstore_sources += files('platform/linux.cc')
libstore_headers += files('platform/linux.hh')
+elif host_machine.system() == 'darwin'
+ libstore_sources += files('platform/darwin.cc')
+ libstore_headers += files('platform/darwin.hh')
else
libstore_sources += files('platform/fallback.cc')
libstore_headers += files('platform/fallback.hh')
diff --git a/src/libstore/platform.cc b/src/libstore/platform.cc
index 9c389ef55..acdedab99 100644
--- a/src/libstore/platform.cc
+++ b/src/libstore/platform.cc
@@ -2,6 +2,8 @@
#if __linux__
#include "platform/linux.hh"
+#elif __APPLE__
+#include "platform/darwin.hh"
#else
#include "platform/fallback.hh"
#endif
@@ -11,6 +13,8 @@ std::shared_ptr<LocalStore> LocalStore::makeLocalStore(const Params & params)
{
#if __linux__
return std::shared_ptr<LocalStore>(new LinuxLocalStore(params));
+#elif __APPLE__
+ return std::shared_ptr<LocalStore>(new DarwinLocalStore(params));
#else
return std::shared_ptr<LocalStore>(new FallbackLocalStore(params));
#endif
diff --git a/src/libstore/platform/darwin.cc b/src/libstore/platform/darwin.cc
new file mode 100644
index 000000000..bbb81784c
--- /dev/null
+++ b/src/libstore/platform/darwin.cc
@@ -0,0 +1,223 @@
+#include "gc-store.hh"
+#include "signals.hh"
+#include "platform/darwin.hh"
+#include "regex.hh"
+
+#include <sys/proc_info.h>
+#include <sys/sysctl.h>
+#include <libproc.h>
+
+#include <regex>
+
+namespace nix {
+
+void DarwinLocalStore::findPlatformRoots(UncheckedRoots & unchecked)
+{
+ auto storePathRegex = regex::storePathRegex(storeDir);
+
+ std::vector<int> pids;
+ int pidBufSize = 1;
+
+ while (pidBufSize > pids.size() * sizeof(int)) {
+ // Reserve some extra size so we don't fail too much
+ pids.resize((pidBufSize + pidBufSize / 8) / sizeof(int));
+ pidBufSize = proc_listpids(PROC_ALL_PIDS, 0, pids.data(), pids.size() * sizeof(int));
+
+ if (pidBufSize <= 0) {
+ throw SysError("Listing PIDs");
+ }
+ }
+
+ pids.resize(pidBufSize / sizeof(int));
+
+ for (auto pid : pids) {
+ // It doesn't make sense to ask about the kernel
+ if (pid == 0) {
+ continue;
+ }
+
+ try {
+ // Process cwd/root directory
+ struct proc_vnodepathinfo vnodeInfo;
+ if (proc_pidinfo(pid, PROC_PIDVNODEPATHINFO, 0, &vnodeInfo, sizeof(vnodeInfo)) <= 0) {
+ throw SysError("Getting pid %1% working directory", pid);
+ }
+
+ unchecked[std::string(vnodeInfo.pvi_cdir.vip_path)].emplace(fmt("{libproc/%d/cwd}", pid)
+ );
+ unchecked[std::string(vnodeInfo.pvi_rdir.vip_path)].emplace(
+ fmt("{libproc/%d/rootdir}", pid)
+ );
+
+ // File descriptors
+ std::vector<struct proc_fdinfo> fds;
+ int fdBufSize = 1;
+ while (fdBufSize > fds.size() * sizeof(struct proc_fdinfo)) {
+ // Reserve some extra size so we don't fail too much
+ fds.resize((fdBufSize + fdBufSize / 8) / sizeof(struct proc_fdinfo));
+ fdBufSize = proc_pidinfo(
+ pid, PROC_PIDLISTFDS, 0, fds.data(), fds.size() * sizeof(struct proc_fdinfo)
+ );
+
+ if (fdBufSize <= 0) {
+ throw SysError("Listing pid %1% file descriptors", pid);
+ }
+ }
+ fds.resize(fdBufSize / sizeof(struct proc_fdinfo));
+
+ for (auto fd : fds) {
+ // By definition, only a vnode is on the filesystem
+ if (fd.proc_fdtype != PROX_FDTYPE_VNODE) {
+ continue;
+ }
+
+ struct vnode_fdinfowithpath fdInfo;
+ if (proc_pidfdinfo(
+ pid, fd.proc_fd, PROC_PIDFDVNODEPATHINFO, &fdInfo, sizeof(fdInfo)
+ )
+ <= 0)
+ {
+ // They probably just closed this fd, no need to cancel looking at ranges and
+ // arguments
+ if (errno == EBADF) {
+ continue;
+ }
+ throw SysError("Getting pid %1% fd %2% path", pid, fd.proc_fd);
+ }
+
+ unchecked[std::string(fdInfo.pvip.vip_path)].emplace(
+ fmt("{libproc/%d/fd/%d}", pid, fd.proc_fd)
+ );
+ }
+
+ // Regions (e.g. mmapped files, executables, shared libraries)
+ uint64_t nextAddr = 0;
+ while (true) {
+ // Seriously, what are you doing XNU?
+ // There's 3 flavors of PROC_PIDREGIONPATHINFO:
+ // * PROC_PIDREGIONPATHINFO includes all regions
+ // * PROC_PIDREGIONPATHINFO2 includes regions backed by a vnode
+ // * PROC_PIDREGIONPATHINFO3 includes regions backed by a vnode on a specified
+ // filesystem Only PROC_PIDREGIONPATHINFO is documented. Unfortunately, using it
+ // would make finding gcroots take about 100x as long and tests would fail from
+ // timeout. According to the Frida source code, PROC_PIDREGIONPATHINFO2 has been
+ // available since XNU 2782.1.97 in OS X 10.10
+ //
+ // 22 means PROC_PIDREGIONPATHINFO2
+ struct proc_regionwithpathinfo regionInfo;
+ if (proc_pidinfo(pid, 22, nextAddr, &regionInfo, sizeof(regionInfo)) <= 0) {
+ // PROC_PIDREGIONPATHINFO signals we're done with an error,
+ // so we're expected to hit this once per process
+ if (errno == ESRCH || errno == EINVAL) {
+ break;
+ }
+ throw SysError("Getting pid %1% region path", pid);
+ }
+
+ unchecked[std::string(regionInfo.prp_vip.vip_path)].emplace(
+ fmt("{libproc/%d/region}", pid)
+ );
+
+ nextAddr = regionInfo.prp_prinfo.pri_address + regionInfo.prp_prinfo.pri_size;
+ }
+
+ // Arguments and environment variables
+ // We can't read environment variables of binaries with entitlements unless
+ // nix has the `com.apple.private.read-environment-variables` entitlement or SIP is off
+ // We can read arguments for all applications though.
+
+ // Yes, it's a sysctl, the proc_info and sysctl APIs are mostly similar,
+ // but both have exclusive capabilities
+ int sysctlName[3] = {CTL_KERN, KERN_PROCARGS2, pid};
+ size_t argsSize = 0;
+ if (sysctl(sysctlName, 3, nullptr, &argsSize, nullptr, 0) < 0) {
+ throw SysError("Reading pid %1% arguments", pid);
+ }
+
+ std::vector<char> args(argsSize);
+ if (sysctl(sysctlName, 3, args.data(), &argsSize, nullptr, 0) < 0) {
+ throw SysError("Reading pid %1% arguments", pid);
+ }
+
+ if (argsSize < args.size()) {
+ args.resize(argsSize);
+ }
+
+ // We have these perfectly nice arguments, but have to ignore them because
+ // otherwise we'd see arguments to nix-store commands and
+ // `nix-store --delete /nix/store/whatever` would always fail
+ // First 4 bytes are an int of argc.
+ if (args.size() < sizeof(int)) {
+ continue;
+ }
+ auto argc = reinterpret_cast<int *>(args.data())[0];
+
+ auto argsIter = args.begin();
+ std::advance(argsIter, sizeof(int));
+ // Executable then argc args, each separated by some number of null bytes
+ for (int i = 0; argsIter != args.end() && i < argc + 1; i++) {
+ argsIter = std::find(argsIter, args.end(), '\0');
+ argsIter = std::find_if(argsIter, args.end(), [](char ch) { return ch != '\0'; });
+ }
+
+ if (argsIter != args.end()) {
+ auto env_end = std::sregex_iterator{};
+ for (auto i = std::sregex_iterator{argsIter, args.end(), storePathRegex};
+ i != env_end;
+ ++i)
+ {
+ unchecked[i->str()].emplace(fmt("{libproc/%d/environ}", pid));
+ }
+ };
+
+ // Per-thread working directories
+ struct proc_taskallinfo taskAllInfo;
+ if (proc_pidinfo(pid, PROC_PIDTASKALLINFO, 0, &taskAllInfo, sizeof(taskAllInfo)) <= 0) {
+ throw SysError("Reading pid %1% tasks", pid);
+ }
+
+ // If the process doesn't have the per-thread cwd flag then we already have the
+ // process-wide cwd from PROC_PIDVNODEPATHINFO
+ if (taskAllInfo.pbsd.pbi_flags & PROC_FLAG_THCWD) {
+ std::vector<uint64_t> tids(taskAllInfo.ptinfo.pti_threadnum);
+ int tidBufSize = proc_pidinfo(
+ pid, PROC_PIDLISTTHREADS, 0, tids.data(), tids.size() * sizeof(uint64_t)
+ );
+ if (tidBufSize <= 0) {
+ throw SysError("Listing pid %1% threads", pid);
+ }
+
+ for (auto tid : tids) {
+ struct proc_threadwithpathinfo threadPathInfo;
+ if (proc_pidinfo(
+ pid,
+ PROC_PIDTHREADPATHINFO,
+ tid,
+ &threadPathInfo,
+ sizeof(threadPathInfo)
+ )
+ <= 0)
+ {
+ throw SysError("Reading pid %1% thread %2% cwd", pid, tid);
+ }
+
+ unchecked[std::string(threadPathInfo.pvip.vip_path)].emplace(
+ fmt("{libproc/%d/thread/%d/cwd}", pid, tid)
+ );
+ }
+ }
+ } catch (SysError & e) {
+ // ENOENT/ESRCH: Process no longer exists (proc_info)
+ // EINVAL: Process no longer exists (sysctl)
+ // EACCESS/EPERM: We don't have permission to read this field (proc_info)
+ // EIO: Kernel failed to read from target process memory during KERN_PROCARGS2 (sysctl)
+ if (errno == ENOENT || errno == ESRCH || errno == EINVAL || errno == EACCES
+ || errno == EPERM || errno == EIO)
+ {
+ continue;
+ }
+ throw;
+ }
+ }
+}
+}
diff --git a/src/libstore/platform/darwin.hh b/src/libstore/platform/darwin.hh
new file mode 100644
index 000000000..b7170aa05
--- /dev/null
+++ b/src/libstore/platform/darwin.hh
@@ -0,0 +1,35 @@
+#pragma once
+///@file
+
+#include "gc-store.hh"
+#include "local-store.hh"
+
+namespace nix {
+
+/**
+ * Darwin-specific implementation of LocalStore
+ */
+class DarwinLocalStore : public LocalStore
+{
+public:
+ DarwinLocalStore(const Params & params)
+ : StoreConfig(params)
+ , LocalFSStoreConfig(params)
+ , LocalStoreConfig(params)
+ , Store(params)
+ , LocalFSStore(params)
+ , LocalStore(params)
+ {
+ }
+ DarwinLocalStore(const std::string scheme, std::string path, const Params & params)
+ : DarwinLocalStore(params)
+ {
+ throw UnimplementedError("DarwinLocalStore");
+ }
+
+private:
+
+ void findPlatformRoots(UncheckedRoots & unchecked) override;
+};
+
+}
diff --git a/src/libstore/platform/linux.cc b/src/libstore/platform/linux.cc
index 9be3e47da..a34608894 100644
--- a/src/libstore/platform/linux.cc
+++ b/src/libstore/platform/linux.cc
@@ -1,6 +1,7 @@
#include "gc-store.hh"
#include "signals.hh"
#include "platform/linux.hh"
+#include "regex.hh"
#include <regex>
@@ -26,12 +27,6 @@ static void readProcLink(const std::string & file, UncheckedRoots & roots)
}
}
-static std::string quoteRegexChars(const std::string & raw)
-{
- static auto specialRegex = std::regex(R"([.^$\\*+?()\[\]{}|])");
- return std::regex_replace(raw, specialRegex, R"(\$&)");
-}
-
static void readFileRoots(const char * path, UncheckedRoots & roots)
{
try {
@@ -50,8 +45,7 @@ void LinuxLocalStore::findPlatformRoots(UncheckedRoots & unchecked)
struct dirent * ent;
auto digitsRegex = std::regex(R"(^\d+$)");
auto mapRegex = std::regex(R"(^\s*\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(/\S+)\s*$)");
- auto storePathRegex =
- std::regex(quoteRegexChars(storeDir) + R"(/[0-9a-z]+[0-9a-zA-Z\+\-\._\?=]*)");
+ auto storePathRegex = regex::storePathRegex(storeDir);
while (errno = 0, ent = readdir(procDir.get())) {
checkInterrupt();
if (std::regex_match(ent->d_name, digitsRegex)) {
diff --git a/src/libutil/meson.build b/src/libutil/meson.build
index 11bf97ee7..069798a6f 100644
--- a/src/libutil/meson.build
+++ b/src/libutil/meson.build
@@ -22,6 +22,7 @@ libutil_sources = files(
'position.cc',
'print-elided.cc',
'references.cc',
+ 'regex.cc',
'serialise.cc',
'shlex.cc',
'signals.cc',
@@ -77,6 +78,7 @@ libutil_headers = files(
'ref.hh',
'references.hh',
'regex-combinators.hh',
+ 'regex.hh',
'repair-flag.hh',
'serialise.hh',
'shlex.hh',
diff --git a/src/libutil/regex.cc b/src/libutil/regex.cc
new file mode 100644
index 000000000..a9e6c6bee
--- /dev/null
+++ b/src/libutil/regex.cc
@@ -0,0 +1,16 @@
+#include <string>
+#include <regex>
+
+namespace nix::regex {
+std::string quoteRegexChars(const std::string & raw)
+{
+ static auto specialRegex = std::regex(R"([.^$\\*+?()\[\]{}|])");
+ return std::regex_replace(raw, specialRegex, R"(\$&)");
+}
+
+std::regex storePathRegex(const std::string & storeDir)
+{
+ return std::regex(quoteRegexChars(storeDir) + R"(/[0-9a-z]+[0-9a-zA-Z\+\-\._\?=]*)");
+}
+
+}
diff --git a/src/libutil/regex.hh b/src/libutil/regex.hh
new file mode 100644
index 000000000..744a7d54a
--- /dev/null
+++ b/src/libutil/regex.hh
@@ -0,0 +1,11 @@
+#pragma once
+///@file
+
+#include <string>
+#include <regex>
+
+namespace nix::regex {
+std::string quoteRegexChars(const std::string & raw);
+
+std::regex storePathRegex(const std::string & storeDir);
+}
diff --git a/tests/functional/common/vars-and-functions.sh.in b/tests/functional/common/vars-and-functions.sh.in
index b054bf834..3d2e44024 100644
--- a/tests/functional/common/vars-and-functions.sh.in
+++ b/tests/functional/common/vars-and-functions.sh.in
@@ -24,7 +24,6 @@ if [[ -n $NIX_STORE ]]; then
export _NIX_TEST_NO_SANDBOX=1
fi
export _NIX_IN_TEST=$TEST_ROOT/shared
-export _NIX_TEST_NO_LSOF=1
export NIX_REMOTE=${NIX_REMOTE_-}
unset NIX_PATH
export TEST_HOME=$TEST_ROOT/test-home
diff --git a/tests/functional/gc-runtime.nix b/tests/functional/gc-runtime.nix
index ee5980bdf..4303e0880 100644
--- a/tests/functional/gc-runtime.nix
+++ b/tests/functional/gc-runtime.nix
@@ -1,17 +1,29 @@
with import ./config.nix;
-mkDerivation {
- name = "gc-runtime";
- builder =
- # Test inline source file definitions.
- builtins.toFile "builder.sh" ''
- mkdir $out
+{
+ environ = mkDerivation {
+ name = "gc-runtime-environ";
+ buildCommand = "mkdir $out; echo environ > $out/environ";
+ };
- cat > $out/program <<EOF
- #! ${shell}
- sleep 10000
- EOF
+ open = mkDerivation {
+ name = "gc-runtime-open";
+ buildCommand = "mkdir $out; echo open > $out/open";
+ };
- chmod +x $out/program
- '';
+ program = mkDerivation {
+ name = "gc-runtime-program";
+ builder =
+ # Test inline source file definitions.
+ builtins.toFile "builder.sh" ''
+ mkdir $out
+
+ cat > $out/program <<EOF
+ #! ${shell}
+ sleep 10000 < \$1
+ EOF
+
+ chmod +x $out/program
+ '';
+ };
}
diff --git a/tests/functional/gc-runtime.sh b/tests/functional/gc-runtime.sh
index dc1826a55..6e17acfc0 100644
--- a/tests/functional/gc-runtime.sh
+++ b/tests/functional/gc-runtime.sh
@@ -1,38 +1,44 @@
source common.sh
-case $system in
- *linux*)
- ;;
- *)
- skipTest "Not running Linux";
-esac
-
set -m # enable job control, needed for kill
profiles="$NIX_STATE_DIR"/profiles
rm -rf $profiles
-nix-env -p $profiles/test -f ./gc-runtime.nix -i gc-runtime
+nix-env -p $profiles/test -f ./gc-runtime.nix -i gc-runtime-{program,environ,open}
-outPath=$(nix-env -p $profiles/test -q --no-name --out-path gc-runtime)
-echo $outPath
+programPath=$(nix-env -p $profiles/test -q --no-name --out-path gc-runtime-program)
+environPath=$(nix-env -p $profiles/test -q --no-name --out-path gc-runtime-environ)
+openPath=$(nix-env -p $profiles/test -q --no-name --out-path gc-runtime-open)
+echo $programPath $environPath $openPath
echo "backgrounding program..."
-$profiles/test/program &
+export environPath
+$profiles/test/program $openPath/open &
sleep 2 # hack - wait for the program to get started
child=$!
echo PID=$child
-nix-env -p $profiles/test -e gc-runtime
+nix-env -p $profiles/test -e gc-runtime-{program,environ,open}
nix-env -p $profiles/test --delete-generations old
nix-store --gc
kill -- -$child
-if ! test -e $outPath; then
+if ! test -e $programPath; then
echo "running program was garbage collected!"
exit 1
fi
+if ! test -e $environPath; then
+ echo "file in environment variable was garbage collected!"
+ exit 1
+fi
+
+if ! test -e $openPath; then
+ echo "opened file was garbage collected!"
+ exit 1
+fi
+
exit 0